thordata-sdk 1.5.0__py3-none-any.whl → 1.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- thordata/__init__.py +1 -1
- thordata/async_client.py +12 -7
- thordata/client.py +12 -7
- thordata/enums.py +2 -2
- thordata/exceptions.py +70 -19
- thordata/models.py +1 -1
- thordata/retry.py +1 -1
- thordata/tools/__init__.py +11 -1
- thordata/tools/code.py +17 -4
- thordata/tools/ecommerce.py +194 -10
- thordata/tools/professional.py +155 -0
- thordata/tools/search.py +47 -5
- thordata/tools/social.py +225 -41
- thordata/tools/travel.py +100 -0
- thordata/tools/video.py +80 -7
- thordata/types/task.py +16 -4
- {thordata_sdk-1.5.0.dist-info → thordata_sdk-1.6.0.dist-info}/METADATA +63 -7
- thordata_sdk-1.6.0.dist-info/RECORD +35 -0
- {thordata_sdk-1.5.0.dist-info → thordata_sdk-1.6.0.dist-info}/WHEEL +1 -1
- thordata/_example_utils.py +0 -77
- thordata/demo.py +0 -138
- thordata_sdk-1.5.0.dist-info/RECORD +0 -35
- {thordata_sdk-1.5.0.dist-info → thordata_sdk-1.6.0.dist-info}/licenses/LICENSE +0 -0
- {thordata_sdk-1.5.0.dist-info → thordata_sdk-1.6.0.dist-info}/top_level.txt +0 -0
thordata/__init__.py
CHANGED
|
@@ -5,7 +5,7 @@ Official Python client for Thordata's Proxy Network, SERP API,
|
|
|
5
5
|
Universal Scraping API (Web Unlocker), and Web Scraper API.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
__version__ = "1.
|
|
8
|
+
__version__ = "1.6.0"
|
|
9
9
|
__author__ = "Thordata Developer Team/Kael Odin"
|
|
10
10
|
__email__ = "support@thordata.com"
|
|
11
11
|
|
thordata/async_client.py
CHANGED
|
@@ -124,10 +124,10 @@ class AsyncThordataClient:
|
|
|
124
124
|
).rstrip("/")
|
|
125
125
|
|
|
126
126
|
self._gateway_base_url = os.getenv(
|
|
127
|
-
"THORDATA_GATEWAY_BASE_URL", "https://
|
|
127
|
+
"THORDATA_GATEWAY_BASE_URL", "https://openapi.thordata.com/api/gateway"
|
|
128
128
|
)
|
|
129
129
|
self._child_base_url = os.getenv(
|
|
130
|
-
"THORDATA_CHILD_BASE_URL", "https://
|
|
130
|
+
"THORDATA_CHILD_BASE_URL", "https://openapi.thordata.com/api/child"
|
|
131
131
|
)
|
|
132
132
|
|
|
133
133
|
# URL Construction
|
|
@@ -145,7 +145,7 @@ class AsyncThordataClient:
|
|
|
145
145
|
self._proxy_users_url = f"{shared_api_base}/proxy-users"
|
|
146
146
|
|
|
147
147
|
whitelist_base = os.getenv(
|
|
148
|
-
"THORDATA_WHITELIST_BASE_URL", "https://
|
|
148
|
+
"THORDATA_WHITELIST_BASE_URL", "https://openapi.thordata.com/api"
|
|
149
149
|
)
|
|
150
150
|
self._whitelist_url = f"{whitelist_base}/whitelisted-ips"
|
|
151
151
|
|
|
@@ -352,7 +352,7 @@ class AsyncThordataClient:
|
|
|
352
352
|
file_name: str,
|
|
353
353
|
spider_id: str,
|
|
354
354
|
spider_name: str,
|
|
355
|
-
parameters: dict[str, Any],
|
|
355
|
+
parameters: dict[str, Any] | list[dict[str, Any]],
|
|
356
356
|
universal_params: dict[str, Any] | None = None,
|
|
357
357
|
) -> str:
|
|
358
358
|
config = ScraperTaskConfig(
|
|
@@ -434,7 +434,7 @@ class AsyncThordataClient:
|
|
|
434
434
|
file_name: str,
|
|
435
435
|
spider_id: str,
|
|
436
436
|
spider_name: str,
|
|
437
|
-
parameters: dict[str, Any],
|
|
437
|
+
parameters: dict[str, Any] | list[dict[str, Any]],
|
|
438
438
|
common_settings: CommonSettings,
|
|
439
439
|
) -> str:
|
|
440
440
|
config = VideoTaskConfig(
|
|
@@ -550,7 +550,7 @@ class AsyncThordataClient:
|
|
|
550
550
|
file_name: str,
|
|
551
551
|
spider_id: str,
|
|
552
552
|
spider_name: str,
|
|
553
|
-
parameters: dict[str, Any],
|
|
553
|
+
parameters: dict[str, Any] | list[dict[str, Any]],
|
|
554
554
|
universal_params: dict[str, Any] | None = None,
|
|
555
555
|
*,
|
|
556
556
|
max_wait: float = 600.0,
|
|
@@ -971,7 +971,12 @@ class AsyncThordataClient:
|
|
|
971
971
|
if port:
|
|
972
972
|
params["port"] = str(port)
|
|
973
973
|
|
|
974
|
-
|
|
974
|
+
if product == "unlimited":
|
|
975
|
+
username = os.getenv("THORDATA_UNLIMITED_USERNAME") or os.getenv(
|
|
976
|
+
"THORDATA_RESIDENTIAL_USERNAME"
|
|
977
|
+
)
|
|
978
|
+
else:
|
|
979
|
+
username = os.getenv("THORDATA_RESIDENTIAL_USERNAME")
|
|
975
980
|
if username:
|
|
976
981
|
params["td-customer"] = username
|
|
977
982
|
|
thordata/client.py
CHANGED
|
@@ -159,10 +159,10 @@ class ThordataClient:
|
|
|
159
159
|
).rstrip("/")
|
|
160
160
|
|
|
161
161
|
self._gateway_base_url = os.getenv(
|
|
162
|
-
"THORDATA_GATEWAY_BASE_URL", "https://
|
|
162
|
+
"THORDATA_GATEWAY_BASE_URL", "https://openapi.thordata.com/api/gateway"
|
|
163
163
|
)
|
|
164
164
|
self._child_base_url = os.getenv(
|
|
165
|
-
"THORDATA_CHILD_BASE_URL", "https://
|
|
165
|
+
"THORDATA_CHILD_BASE_URL", "https://openapi.thordata.com/api/child"
|
|
166
166
|
)
|
|
167
167
|
|
|
168
168
|
# URL Construction
|
|
@@ -183,7 +183,7 @@ class ThordataClient:
|
|
|
183
183
|
self._proxy_users_url = f"{shared_api_base}/proxy-users"
|
|
184
184
|
|
|
185
185
|
whitelist_base = os.getenv(
|
|
186
|
-
"THORDATA_WHITELIST_BASE_URL", "https://
|
|
186
|
+
"THORDATA_WHITELIST_BASE_URL", "https://openapi.thordata.com/api"
|
|
187
187
|
)
|
|
188
188
|
self._whitelist_url = f"{whitelist_base}/whitelisted-ips"
|
|
189
189
|
|
|
@@ -405,7 +405,7 @@ class ThordataClient:
|
|
|
405
405
|
file_name: str,
|
|
406
406
|
spider_id: str,
|
|
407
407
|
spider_name: str,
|
|
408
|
-
parameters: dict[str, Any],
|
|
408
|
+
parameters: dict[str, Any] | list[dict[str, Any]],
|
|
409
409
|
universal_params: dict[str, Any] | None = None,
|
|
410
410
|
) -> str:
|
|
411
411
|
config = ScraperTaskConfig(
|
|
@@ -490,7 +490,7 @@ class ThordataClient:
|
|
|
490
490
|
file_name: str,
|
|
491
491
|
spider_id: str,
|
|
492
492
|
spider_name: str,
|
|
493
|
-
parameters: dict[str, Any],
|
|
493
|
+
parameters: dict[str, Any] | list[dict[str, Any]],
|
|
494
494
|
common_settings: CommonSettings,
|
|
495
495
|
) -> str:
|
|
496
496
|
config = VideoTaskConfig(
|
|
@@ -639,7 +639,7 @@ class ThordataClient:
|
|
|
639
639
|
file_name: str,
|
|
640
640
|
spider_id: str,
|
|
641
641
|
spider_name: str,
|
|
642
|
-
parameters: dict[str, Any],
|
|
642
|
+
parameters: dict[str, Any] | list[dict[str, Any]],
|
|
643
643
|
universal_params: dict[str, Any] | None = None,
|
|
644
644
|
*,
|
|
645
645
|
max_wait: float = 600.0,
|
|
@@ -862,7 +862,12 @@ class ThordataClient:
|
|
|
862
862
|
if port:
|
|
863
863
|
params["port"] = str(port)
|
|
864
864
|
|
|
865
|
-
|
|
865
|
+
if product == "unlimited":
|
|
866
|
+
username = os.getenv("THORDATA_UNLIMITED_USERNAME") or os.getenv(
|
|
867
|
+
"THORDATA_RESIDENTIAL_USERNAME"
|
|
868
|
+
)
|
|
869
|
+
else:
|
|
870
|
+
username = os.getenv("THORDATA_RESIDENTIAL_USERNAME")
|
|
866
871
|
if username:
|
|
867
872
|
params["td-customer"] = username
|
|
868
873
|
|
thordata/enums.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Enumerations for the Thordata Python SDK.
|
|
3
|
-
Moved to thordata.types in v1.
|
|
3
|
+
Moved to thordata.types in v1.6.0.
|
|
4
4
|
This file is kept for backward compatibility.
|
|
5
5
|
"""
|
|
6
6
|
|
|
@@ -21,7 +21,7 @@ from .types import (
|
|
|
21
21
|
SessionType,
|
|
22
22
|
TaskStatus,
|
|
23
23
|
TimeRange,
|
|
24
|
-
normalize_enum_value,
|
|
24
|
+
normalize_enum_value,
|
|
25
25
|
)
|
|
26
26
|
|
|
27
27
|
__all__ = [
|
thordata/exceptions.py
CHANGED
|
@@ -15,6 +15,7 @@ Exception Hierarchy:
|
|
|
15
15
|
|
|
16
16
|
from __future__ import annotations
|
|
17
17
|
|
|
18
|
+
from collections.abc import Mapping
|
|
18
19
|
from typing import Any
|
|
19
20
|
|
|
20
21
|
# =============================================================================
|
|
@@ -235,6 +236,46 @@ class ThordataNotCollectedError(ThordataAPIError):
|
|
|
235
236
|
# =============================================================================
|
|
236
237
|
|
|
237
238
|
|
|
239
|
+
def _extract_request_id(payload: Any) -> str | None:
|
|
240
|
+
if isinstance(payload, Mapping):
|
|
241
|
+
for key in ("request_id", "requestId", "x_request_id", "x-request-id"):
|
|
242
|
+
val = payload.get(key)
|
|
243
|
+
if val is not None:
|
|
244
|
+
return str(val)
|
|
245
|
+
return None
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def _extract_retry_after(payload: Any) -> int | None:
|
|
249
|
+
if isinstance(payload, Mapping):
|
|
250
|
+
for key in ("retry_after", "retryAfter", "retry-after"):
|
|
251
|
+
val = payload.get(key)
|
|
252
|
+
if isinstance(val, int):
|
|
253
|
+
return val
|
|
254
|
+
if isinstance(val, str) and val.isdigit():
|
|
255
|
+
return int(val)
|
|
256
|
+
return None
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def _build_error_message(
|
|
260
|
+
message: str,
|
|
261
|
+
*,
|
|
262
|
+
status_code: int | None,
|
|
263
|
+
code: int | None,
|
|
264
|
+
request_id: str | None,
|
|
265
|
+
) -> str:
|
|
266
|
+
parts: list[str] = [message]
|
|
267
|
+
meta: list[str] = []
|
|
268
|
+
if status_code is not None:
|
|
269
|
+
meta.append(f"http={status_code}")
|
|
270
|
+
if code is not None and code != status_code:
|
|
271
|
+
meta.append(f"code={code}")
|
|
272
|
+
if request_id:
|
|
273
|
+
meta.append(f"request_id={request_id}")
|
|
274
|
+
if meta:
|
|
275
|
+
parts.append("(" + ", ".join(meta) + ")")
|
|
276
|
+
return " ".join(parts)
|
|
277
|
+
|
|
278
|
+
|
|
238
279
|
def raise_for_code(
|
|
239
280
|
message: str,
|
|
240
281
|
*,
|
|
@@ -266,49 +307,59 @@ def raise_for_code(
|
|
|
266
307
|
# Determine the effective error code.
|
|
267
308
|
# Prefer payload `code` when present and not success (200),
|
|
268
309
|
# otherwise fall back to HTTP status when it indicates an error.
|
|
310
|
+
# Determine the effective error code for routing.
|
|
269
311
|
effective_code: int | None = None
|
|
270
|
-
|
|
271
312
|
if code is not None and code != 200:
|
|
272
313
|
effective_code = code
|
|
273
|
-
elif status_code is not None and status_code
|
|
314
|
+
elif status_code is not None and status_code >= 400:
|
|
274
315
|
effective_code = status_code
|
|
275
316
|
else:
|
|
276
317
|
effective_code = code if code is not None else status_code
|
|
277
318
|
|
|
319
|
+
# Extract additional context from payload
|
|
320
|
+
final_request_id = request_id or _extract_request_id(payload)
|
|
321
|
+
|
|
322
|
+
# Build a consistent, informative error message
|
|
323
|
+
final_message = _build_error_message(
|
|
324
|
+
message,
|
|
325
|
+
status_code=status_code,
|
|
326
|
+
code=code,
|
|
327
|
+
request_id=final_request_id,
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
# Prepare common arguments for exception constructors
|
|
278
331
|
kwargs = {
|
|
279
332
|
"status_code": status_code,
|
|
280
333
|
"code": code,
|
|
281
334
|
"payload": payload,
|
|
282
|
-
"request_id":
|
|
335
|
+
"request_id": final_request_id,
|
|
283
336
|
}
|
|
284
337
|
|
|
338
|
+
# --- Route to the correct exception class ---
|
|
339
|
+
|
|
285
340
|
# Not collected (API payload code 300, often retryable, not billed)
|
|
286
|
-
# Check this FIRST since 300 is in API_CODES, not HTTP_STATUS_CODES
|
|
287
341
|
if effective_code in ThordataNotCollectedError.API_CODES:
|
|
288
|
-
raise ThordataNotCollectedError(
|
|
342
|
+
raise ThordataNotCollectedError(final_message, **kwargs)
|
|
289
343
|
|
|
290
|
-
# Auth errors
|
|
344
|
+
# Auth errors (401, 403)
|
|
291
345
|
if effective_code in ThordataAuthError.HTTP_STATUS_CODES:
|
|
292
|
-
raise ThordataAuthError(
|
|
346
|
+
raise ThordataAuthError(final_message, **kwargs)
|
|
293
347
|
|
|
294
|
-
# Rate limit errors
|
|
348
|
+
# Rate limit errors (429, 402)
|
|
295
349
|
if effective_code in ThordataRateLimitError.HTTP_STATUS_CODES:
|
|
296
|
-
|
|
297
|
-
retry_after
|
|
298
|
-
if isinstance(payload, dict):
|
|
299
|
-
retry_after = payload.get("retry_after")
|
|
300
|
-
raise ThordataRateLimitError(message, retry_after=retry_after, **kwargs)
|
|
350
|
+
retry_after = _extract_retry_after(payload)
|
|
351
|
+
raise ThordataRateLimitError(final_message, retry_after=retry_after, **kwargs)
|
|
301
352
|
|
|
302
|
-
# Server errors
|
|
353
|
+
# Server errors (5xx)
|
|
303
354
|
if effective_code is not None and 500 <= effective_code < 600:
|
|
304
|
-
raise ThordataServerError(
|
|
355
|
+
raise ThordataServerError(final_message, **kwargs)
|
|
305
356
|
|
|
306
|
-
# Validation errors
|
|
357
|
+
# Validation errors (400, 422)
|
|
307
358
|
if effective_code in ThordataValidationError.HTTP_STATUS_CODES:
|
|
308
|
-
raise ThordataValidationError(
|
|
359
|
+
raise ThordataValidationError(final_message, **kwargs)
|
|
309
360
|
|
|
310
|
-
#
|
|
311
|
-
raise ThordataAPIError(
|
|
361
|
+
# Fallback to generic API error if no specific match
|
|
362
|
+
raise ThordataAPIError(final_message, **kwargs)
|
|
312
363
|
|
|
313
364
|
|
|
314
365
|
# =============================================================================
|
thordata/models.py
CHANGED
thordata/retry.py
CHANGED
|
@@ -186,7 +186,7 @@ def with_retry(
|
|
|
186
186
|
if isinstance(e, ThordataRateLimitError) and e.retry_after:
|
|
187
187
|
delay = max(delay, e.retry_after)
|
|
188
188
|
|
|
189
|
-
logger.
|
|
189
|
+
logger.info(
|
|
190
190
|
f"Retry attempt {attempt + 1}/{config.max_retries} "
|
|
191
191
|
f"after {delay:.2f}s due to: {e}"
|
|
192
192
|
)
|
thordata/tools/__init__.py
CHANGED
|
@@ -5,15 +5,19 @@ High-level abstractions for specific scraping targets.
|
|
|
5
5
|
|
|
6
6
|
from .base import ToolRequest, VideoToolRequest
|
|
7
7
|
from .code import GitHub
|
|
8
|
-
from .ecommerce import Amazon
|
|
8
|
+
from .ecommerce import Amazon, Walmart, eBay
|
|
9
|
+
from .professional import Crunchbase, Glassdoor, Indeed
|
|
9
10
|
from .search import GoogleMaps, GooglePlay, GoogleShopping
|
|
10
11
|
from .social import Facebook, Instagram, LinkedIn, Reddit, TikTok, Twitter
|
|
12
|
+
from .travel import Airbnb, Booking, Zillow
|
|
11
13
|
from .video import YouTube
|
|
12
14
|
|
|
13
15
|
__all__ = [
|
|
14
16
|
"ToolRequest",
|
|
15
17
|
"VideoToolRequest",
|
|
16
18
|
"Amazon",
|
|
19
|
+
"eBay",
|
|
20
|
+
"Walmart",
|
|
17
21
|
"GoogleMaps",
|
|
18
22
|
"GoogleShopping",
|
|
19
23
|
"GooglePlay",
|
|
@@ -25,4 +29,10 @@ __all__ = [
|
|
|
25
29
|
"Reddit",
|
|
26
30
|
"YouTube",
|
|
27
31
|
"GitHub",
|
|
32
|
+
"Indeed",
|
|
33
|
+
"Glassdoor",
|
|
34
|
+
"Crunchbase",
|
|
35
|
+
"Booking",
|
|
36
|
+
"Zillow",
|
|
37
|
+
"Airbnb",
|
|
28
38
|
]
|
thordata/tools/code.py
CHANGED
|
@@ -14,13 +14,26 @@ class GitHub:
|
|
|
14
14
|
|
|
15
15
|
@dataclass
|
|
16
16
|
class Repository(ToolRequest):
|
|
17
|
-
"""Github Repository Scraper"""
|
|
17
|
+
"""Github Repository Scraper by Repo URL"""
|
|
18
18
|
|
|
19
19
|
SPIDER_ID = "github_repository_by-repo-url"
|
|
20
20
|
SPIDER_NAME = "github.com"
|
|
21
|
-
|
|
22
21
|
repo_url: str
|
|
23
|
-
|
|
24
|
-
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class RepositoryBySearchUrl(ToolRequest):
|
|
25
|
+
"""Github Repository Scraper by Search URL"""
|
|
26
|
+
|
|
27
|
+
SPIDER_ID = "github_repository_by-search-url"
|
|
28
|
+
SPIDER_NAME = "github.com"
|
|
29
|
+
search_url: str
|
|
25
30
|
page_turning: int | None = None
|
|
26
31
|
max_num: int | None = None
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class RepositoryByUrl(ToolRequest):
|
|
35
|
+
"""Github Repository Scraper by URL"""
|
|
36
|
+
|
|
37
|
+
SPIDER_ID = "github_repository_by-url"
|
|
38
|
+
SPIDER_NAME = "github.com"
|
|
39
|
+
url: str
|
thordata/tools/ecommerce.py
CHANGED
|
@@ -12,9 +12,10 @@ from .base import ToolRequest
|
|
|
12
12
|
class Amazon:
|
|
13
13
|
"""Namespace for Amazon tools."""
|
|
14
14
|
|
|
15
|
+
# --- Product Details (5 methods) ---
|
|
15
16
|
@dataclass
|
|
16
|
-
class
|
|
17
|
-
"""Amazon Product Details Scraper"""
|
|
17
|
+
class ProductByAsin(ToolRequest):
|
|
18
|
+
"""Amazon Product Details Scraper by ASIN."""
|
|
18
19
|
|
|
19
20
|
SPIDER_ID = "amazon_product_by-asin"
|
|
20
21
|
SPIDER_NAME = "amazon.com"
|
|
@@ -22,16 +23,112 @@ class Amazon:
|
|
|
22
23
|
asin: str
|
|
23
24
|
domain: str = "amazon.com"
|
|
24
25
|
|
|
26
|
+
# Backward compatible alias
|
|
27
|
+
Product = ProductByAsin
|
|
28
|
+
|
|
25
29
|
@dataclass
|
|
26
|
-
class
|
|
27
|
-
"""Amazon
|
|
30
|
+
class ProductByUrl(ToolRequest):
|
|
31
|
+
"""Amazon Product Details Scraper by URL."""
|
|
28
32
|
|
|
29
|
-
SPIDER_ID = "
|
|
33
|
+
SPIDER_ID = "amazon_product_by-url"
|
|
30
34
|
SPIDER_NAME = "amazon.com"
|
|
31
35
|
|
|
32
36
|
url: str
|
|
33
37
|
zip_code: str | None = None
|
|
34
38
|
|
|
39
|
+
@dataclass
|
|
40
|
+
class ProductByKeywords(ToolRequest):
|
|
41
|
+
"""Amazon Product Details Scraper by Keywords."""
|
|
42
|
+
|
|
43
|
+
SPIDER_ID = "amazon_product_by-keywords"
|
|
44
|
+
SPIDER_NAME = "amazon.com"
|
|
45
|
+
|
|
46
|
+
keyword: str
|
|
47
|
+
page_turning: int | None = None
|
|
48
|
+
lowest_price: float | None = None
|
|
49
|
+
highest_price: float | None = None
|
|
50
|
+
|
|
51
|
+
@dataclass
|
|
52
|
+
class ProductByCategoryUrl(ToolRequest):
|
|
53
|
+
"""Amazon Product Details Scraper by Category URL."""
|
|
54
|
+
|
|
55
|
+
SPIDER_ID = "amazon_product_by-category-url"
|
|
56
|
+
SPIDER_NAME = "amazon.com"
|
|
57
|
+
|
|
58
|
+
url: str
|
|
59
|
+
sort_by: str | None = None
|
|
60
|
+
page_turning: int | None = None
|
|
61
|
+
|
|
62
|
+
@dataclass
|
|
63
|
+
class ProductByBestSellers(ToolRequest):
|
|
64
|
+
"""Amazon Product Details Scraper by Best Sellers URL."""
|
|
65
|
+
|
|
66
|
+
SPIDER_ID = "amazon_product_by-best-sellers"
|
|
67
|
+
SPIDER_NAME = "amazon.com"
|
|
68
|
+
|
|
69
|
+
url: str
|
|
70
|
+
page_turning: int | None = None
|
|
71
|
+
|
|
72
|
+
# --- Other Amazon Tools ---
|
|
73
|
+
|
|
74
|
+
@dataclass
|
|
75
|
+
class GlobalProductByUrl(ToolRequest):
|
|
76
|
+
"""Amazon Global Product Details Scraper by URL"""
|
|
77
|
+
|
|
78
|
+
SPIDER_ID = "amazon_global-product_by-url"
|
|
79
|
+
SPIDER_NAME = "amazon.com"
|
|
80
|
+
|
|
81
|
+
url: str
|
|
82
|
+
|
|
83
|
+
# Backward compatible alias
|
|
84
|
+
GlobalProduct = GlobalProductByUrl
|
|
85
|
+
|
|
86
|
+
@dataclass
|
|
87
|
+
class GlobalProductByCategoryUrl(ToolRequest):
|
|
88
|
+
"""Amazon Global Product Details Scraper by Category URL"""
|
|
89
|
+
|
|
90
|
+
SPIDER_ID = "amazon_global-product_by-category-url"
|
|
91
|
+
SPIDER_NAME = "amazon.com"
|
|
92
|
+
|
|
93
|
+
url: str
|
|
94
|
+
sort_by: str | None = None
|
|
95
|
+
get_sponsored: str | None = None
|
|
96
|
+
maximum: int | None = None
|
|
97
|
+
|
|
98
|
+
@dataclass
|
|
99
|
+
class GlobalProductBySellerUrl(ToolRequest):
|
|
100
|
+
"""Amazon Global Product Details Scraper by Seller URL"""
|
|
101
|
+
|
|
102
|
+
SPIDER_ID = "amazon_global-product_by-seller-url"
|
|
103
|
+
SPIDER_NAME = "amazon.com"
|
|
104
|
+
|
|
105
|
+
url: str
|
|
106
|
+
maximum: int | None = None
|
|
107
|
+
|
|
108
|
+
@dataclass
|
|
109
|
+
class GlobalProductByKeywords(ToolRequest):
|
|
110
|
+
"""Amazon Global Product Details Scraper by Keywords"""
|
|
111
|
+
|
|
112
|
+
SPIDER_ID = "amazon_global-product_by-keywords"
|
|
113
|
+
SPIDER_NAME = "amazon.com"
|
|
114
|
+
|
|
115
|
+
keyword: str
|
|
116
|
+
domain: str = "https://www.amazon.com"
|
|
117
|
+
lowest_price: str | None = None
|
|
118
|
+
highest_price: str | None = None
|
|
119
|
+
page_turning: int | None = None
|
|
120
|
+
|
|
121
|
+
@dataclass
|
|
122
|
+
class GlobalProductByKeywordsBrand(ToolRequest):
|
|
123
|
+
"""Amazon Global Product Details Scraper by Keywords and Brand"""
|
|
124
|
+
|
|
125
|
+
SPIDER_ID = "amazon_global-product_by-keywords-brand"
|
|
126
|
+
SPIDER_NAME = "amazon.com"
|
|
127
|
+
|
|
128
|
+
keyword: str
|
|
129
|
+
brands: str
|
|
130
|
+
page_turning: int | None = None
|
|
131
|
+
|
|
35
132
|
@dataclass
|
|
36
133
|
class Review(ToolRequest):
|
|
37
134
|
"""Amazon Product Review Scraper"""
|
|
@@ -59,9 +156,96 @@ class Amazon:
|
|
|
59
156
|
SPIDER_NAME = "amazon.com"
|
|
60
157
|
|
|
61
158
|
keyword: str
|
|
62
|
-
domain: str = "amazon.com"
|
|
159
|
+
domain: str = "https://www.amazon.com/"
|
|
63
160
|
page_turning: int = 1
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
class eBay:
|
|
164
|
+
"""Namespace for eBay tools."""
|
|
165
|
+
|
|
166
|
+
@dataclass
|
|
167
|
+
class ProductByUrl(ToolRequest):
|
|
168
|
+
"""eBay Information Scraper by URL"""
|
|
169
|
+
|
|
170
|
+
SPIDER_ID = "ebay_ebay_by-url"
|
|
171
|
+
SPIDER_NAME = "ebay.com"
|
|
172
|
+
url: str
|
|
173
|
+
|
|
174
|
+
@dataclass
|
|
175
|
+
class ProductByCategoryUrl(ToolRequest):
|
|
176
|
+
"""eBay Information Scraper by Category URL"""
|
|
177
|
+
|
|
178
|
+
SPIDER_ID = "ebay_ebay_by-category-url"
|
|
179
|
+
SPIDER_NAME = "ebay.com"
|
|
180
|
+
url: str
|
|
181
|
+
count: str | None = None
|
|
182
|
+
|
|
183
|
+
@dataclass
|
|
184
|
+
class ProductByKeywords(ToolRequest):
|
|
185
|
+
"""eBay Information Scraper by Keywords"""
|
|
186
|
+
|
|
187
|
+
SPIDER_ID = "ebay_ebay_by-keywords"
|
|
188
|
+
SPIDER_NAME = "ebay.com"
|
|
189
|
+
keywords: str
|
|
190
|
+
count: str | None = None
|
|
191
|
+
|
|
192
|
+
@dataclass
|
|
193
|
+
class ProductByListUrl(ToolRequest):
|
|
194
|
+
"""eBay Information Scraper by List URL"""
|
|
195
|
+
|
|
196
|
+
SPIDER_ID = "ebay_ebay_by-listurl"
|
|
197
|
+
SPIDER_NAME = "ebay.com"
|
|
198
|
+
url: str
|
|
199
|
+
count: str | None = None
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
class Walmart:
|
|
203
|
+
"""Namespace for Walmart tools."""
|
|
204
|
+
|
|
205
|
+
@dataclass
|
|
206
|
+
class ProductByUrl(ToolRequest):
|
|
207
|
+
"""Walmart Product Information Scraper by URL"""
|
|
208
|
+
|
|
209
|
+
SPIDER_ID = "walmart_product_by-url"
|
|
210
|
+
SPIDER_NAME = "walmart.com"
|
|
211
|
+
url: str
|
|
212
|
+
all_variations: str | None = None
|
|
213
|
+
|
|
214
|
+
@dataclass
|
|
215
|
+
class ProductByCategoryUrl(ToolRequest):
|
|
216
|
+
"""Walmart Product Information Scraper by Category URL"""
|
|
217
|
+
|
|
218
|
+
SPIDER_ID = "walmart_product_by-category-url"
|
|
219
|
+
SPIDER_NAME = "walmart.com"
|
|
220
|
+
category_url: str
|
|
221
|
+
all_variations: str | None = None
|
|
222
|
+
page_turning: int | None = None
|
|
223
|
+
|
|
224
|
+
@dataclass
|
|
225
|
+
class ProductBySku(ToolRequest):
|
|
226
|
+
"""Walmart Product Information Scraper by SKU"""
|
|
227
|
+
|
|
228
|
+
SPIDER_ID = "walmart_product_by-sku"
|
|
229
|
+
SPIDER_NAME = "walmart.com"
|
|
230
|
+
sku: str
|
|
231
|
+
all_variations: str | None = None
|
|
232
|
+
|
|
233
|
+
@dataclass
|
|
234
|
+
class ProductByKeywords(ToolRequest):
|
|
235
|
+
"""Walmart Product Information Scraper by Keywords"""
|
|
236
|
+
|
|
237
|
+
SPIDER_ID = "walmart_product_by-keywords"
|
|
238
|
+
SPIDER_NAME = "walmart.com"
|
|
239
|
+
keyword: str
|
|
240
|
+
domain: str = "https://www.walmart.com/"
|
|
241
|
+
all_variations: str | None = None
|
|
242
|
+
page_turning: int | None = None
|
|
243
|
+
|
|
244
|
+
@dataclass
|
|
245
|
+
class ProductByZipcodes(ToolRequest):
|
|
246
|
+
"""Walmart Product Information Scraper by Zipcodes"""
|
|
247
|
+
|
|
248
|
+
SPIDER_ID = "walmart_product_by-zipcodes"
|
|
249
|
+
SPIDER_NAME = "walmart.com"
|
|
250
|
+
url: str
|
|
251
|
+
zip_code: str | None = None
|