sp-client 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sp_client-1.0.0/PKG-INFO +6 -0
- sp_client-1.0.0/README.md +35 -0
- sp_client-1.0.0/pyproject.toml +16 -0
- sp_client-1.0.0/setup.cfg +4 -0
- sp_client-1.0.0/sp_client/__init__.py +1 -0
- sp_client-1.0.0/sp_client/scraping_pros_client.py +42 -0
- sp_client-1.0.0/sp_client.egg-info/PKG-INFO +6 -0
- sp_client-1.0.0/sp_client.egg-info/SOURCES.txt +9 -0
- sp_client-1.0.0/sp_client.egg-info/dependency_links.txt +1 -0
- sp_client-1.0.0/sp_client.egg-info/requires.txt +1 -0
- sp_client-1.0.0/sp_client.egg-info/top_level.txt +1 -0
sp_client-1.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# SP Client
|
|
2
|
+
|
|
3
|
+
Python library for the Scraping Pros API.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
first go to the pypi URL: https://test.pypi.org/project/sp-client/0.1.0/ and then install the
|
|
7
|
+
library as it is explicited in the page.
|
|
8
|
+
|
|
9
|
+
## Quick Start
|
|
10
|
+
```python
|
|
11
|
+
from sp_client import ScrapingPros
|
|
12
|
+
|
|
13
|
+
# Initialize the client
|
|
14
|
+
client = ScrapingPros(
|
|
15
|
+
token='your-api-token-here'
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
# Scrape a website
|
|
19
|
+
result = client.scrape_site(data)
|
|
20
|
+
print(result)
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
### scrape_site(data)
|
|
24
|
+
Scrapes a website with the instructions set on the data.
|
|
25
|
+
|
|
26
|
+
**Parameters:**
|
|
27
|
+
- `data`: Dictionary with instructions on how to perform the scraping.
|
|
28
|
+
|
|
29
|
+
**Example:**
|
|
30
|
+
```python
|
|
31
|
+
data = client.scrape_site(
|
|
32
|
+
"url": 'https://example.com',
|
|
33
|
+
"browser": True
|
|
34
|
+
)
|
|
35
|
+
```
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "sp-client"
|
|
7
|
+
version = "1.0.0"
|
|
8
|
+
description = "Python library for scraping pros API"
|
|
9
|
+
dependencies = [
|
|
10
|
+
"requests>=2.31.0",
|
|
11
|
+
]
|
|
12
|
+
requires-python = ">=3.8"
|
|
13
|
+
|
|
14
|
+
[tool.setuptools.packages.find]
|
|
15
|
+
where = ["."]
|
|
16
|
+
include = ["sp_client*"]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .scraping_pros_client import ScrapingPros
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
import base64
|
|
3
|
+
|
|
4
|
+
class ScrapingPros:
|
|
5
|
+
"""
|
|
6
|
+
Class to interact with the Scraping Pros API.
|
|
7
|
+
|
|
8
|
+
Args:
|
|
9
|
+
api_token: authentication token for the API.
|
|
10
|
+
|
|
11
|
+
Example:
|
|
12
|
+
>>> client = ScrapingPros('token123')
|
|
13
|
+
"""
|
|
14
|
+
def __init__(self, token):
|
|
15
|
+
self.token = token
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def scrape_site(self, data):
|
|
19
|
+
"""
|
|
20
|
+
Method to scrape a site.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
data: A dictionary with the instructions to scrape the website. Documentation in:
|
|
24
|
+
https://gitlab.com/7Puentes/scraping-pros-api/-/blob/master/docs/sync/scrape.md?ref_type=heads
|
|
25
|
+
|
|
26
|
+
Screenshots:
|
|
27
|
+
If a screenshot where to be requested, the user needs a "screenshots" folder in their
|
|
28
|
+
project so the screenshots are saved inside of said folder.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
response = requests.post(
|
|
32
|
+
"http://localhost:8000/v1/sync/scrape", #change this latter to production API
|
|
33
|
+
json=data,
|
|
34
|
+
headers={"Authorization": f"Bearer {self.token}"}
|
|
35
|
+
)
|
|
36
|
+
parsed_responsed = response.json()
|
|
37
|
+
if parsed_responsed.get("screenshot"):
|
|
38
|
+
jpgtxt = base64.decodebytes(parsed_responsed["screenshot"].encode("utf-8"))
|
|
39
|
+
with open(f"screenshots/.jpg", "wb") as f:
|
|
40
|
+
f.write(jpgtxt)
|
|
41
|
+
|
|
42
|
+
return parsed_responsed
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
requests>=2.31.0
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
sp_client
|