thordata-sdk 0.4.0__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {thordata_sdk-0.4.0 → thordata_sdk-0.5.0}/PKG-INFO +228 -10
- {thordata_sdk-0.4.0 → thordata_sdk-0.5.0}/README.md +223 -6
- {thordata_sdk-0.4.0 → thordata_sdk-0.5.0}/pyproject.toml +26 -29
- {thordata_sdk-0.4.0 → thordata_sdk-0.5.0}/src/thordata/__init__.py +33 -36
- {thordata_sdk-0.4.0 → thordata_sdk-0.5.0}/src/thordata/_utils.py +21 -21
- {thordata_sdk-0.4.0 → thordata_sdk-0.5.0}/src/thordata/async_client.py +230 -192
- {thordata_sdk-0.4.0 → thordata_sdk-0.5.0}/src/thordata/client.py +281 -222
- {thordata_sdk-0.4.0 → thordata_sdk-0.5.0}/src/thordata/enums.py +32 -6
- {thordata_sdk-0.4.0 → thordata_sdk-0.5.0}/src/thordata/exceptions.py +60 -31
- {thordata_sdk-0.4.0 → thordata_sdk-0.5.0}/src/thordata/models.py +173 -146
- {thordata_sdk-0.4.0 → thordata_sdk-0.5.0}/src/thordata/parameters.py +7 -6
- {thordata_sdk-0.4.0 → thordata_sdk-0.5.0}/src/thordata/retry.py +109 -111
- {thordata_sdk-0.4.0 → thordata_sdk-0.5.0}/src/thordata_sdk.egg-info/PKG-INFO +228 -10
- {thordata_sdk-0.4.0 → thordata_sdk-0.5.0}/src/thordata_sdk.egg-info/SOURCES.txt +1 -0
- {thordata_sdk-0.4.0 → thordata_sdk-0.5.0}/src/thordata_sdk.egg-info/requires.txt +3 -1
- {thordata_sdk-0.4.0 → thordata_sdk-0.5.0}/tests/test_async_client.py +3 -2
- {thordata_sdk-0.4.0 → thordata_sdk-0.5.0}/tests/test_async_client_errors.py +8 -4
- {thordata_sdk-0.4.0 → thordata_sdk-0.5.0}/tests/test_client.py +8 -7
- {thordata_sdk-0.4.0 → thordata_sdk-0.5.0}/tests/test_client_errors.py +2 -2
- {thordata_sdk-0.4.0 → thordata_sdk-0.5.0}/tests/test_enums.py +4 -3
- thordata_sdk-0.5.0/tests/test_examples_demo_serp_api.py +50 -0
- {thordata_sdk-0.4.0 → thordata_sdk-0.5.0}/tests/test_exceptions.py +7 -6
- {thordata_sdk-0.4.0 → thordata_sdk-0.5.0}/tests/test_models.py +11 -13
- {thordata_sdk-0.4.0 → thordata_sdk-0.5.0}/LICENSE +0 -0
- {thordata_sdk-0.4.0 → thordata_sdk-0.5.0}/setup.cfg +0 -0
- {thordata_sdk-0.4.0 → thordata_sdk-0.5.0}/src/thordata_sdk.egg-info/dependency_links.txt +0 -0
- {thordata_sdk-0.4.0 → thordata_sdk-0.5.0}/src/thordata_sdk.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: thordata-sdk
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary: The Official Python SDK for Thordata - AI Data Infrastructure & Proxy Network.
|
|
5
5
|
Author-email: Thordata Developer Team <support@thordata.com>
|
|
6
6
|
License: MIT
|
|
@@ -16,7 +16,6 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
|
16
16
|
Classifier: Topic :: Internet :: WWW/HTTP
|
|
17
17
|
Classifier: Topic :: Internet :: Proxy Servers
|
|
18
18
|
Classifier: Programming Language :: Python :: 3
|
|
19
|
-
Classifier: Programming Language :: Python :: 3.8
|
|
20
19
|
Classifier: Programming Language :: Python :: 3.9
|
|
21
20
|
Classifier: Programming Language :: Python :: 3.10
|
|
22
21
|
Classifier: Programming Language :: Python :: 3.11
|
|
@@ -24,15 +23,17 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
24
23
|
Classifier: License :: OSI Approved :: MIT License
|
|
25
24
|
Classifier: Operating System :: OS Independent
|
|
26
25
|
Classifier: Typing :: Typed
|
|
27
|
-
Requires-Python: >=3.
|
|
26
|
+
Requires-Python: >=3.9
|
|
28
27
|
Description-Content-Type: text/markdown
|
|
29
28
|
License-File: LICENSE
|
|
30
29
|
Requires-Dist: requests>=2.25.0
|
|
31
|
-
Requires-Dist: aiohttp>=3.
|
|
30
|
+
Requires-Dist: aiohttp>=3.9.0
|
|
32
31
|
Provides-Extra: dev
|
|
33
32
|
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
34
33
|
Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
|
|
35
34
|
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
|
35
|
+
Requires-Dist: pytest-httpserver>=1.0.0; extra == "dev"
|
|
36
|
+
Requires-Dist: python-dotenv>=1.0.0; extra == "dev"
|
|
36
37
|
Requires-Dist: black>=23.0.0; extra == "dev"
|
|
37
38
|
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
38
39
|
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
@@ -249,6 +250,28 @@ for result in results.get("organic", []):
|
|
|
249
250
|
print(f"{result['title']}: {result['link']}")
|
|
250
251
|
```
|
|
251
252
|
|
|
253
|
+
#### General Calling Method
|
|
254
|
+
|
|
255
|
+
```python
|
|
256
|
+
from thordata import ThordataClient, Engine
|
|
257
|
+
|
|
258
|
+
client = ThordataClient(scraper_token="YOUR_SCRAPER_TOKEN")
|
|
259
|
+
|
|
260
|
+
results = client.serp_search(
|
|
261
|
+
query="pizza",
|
|
262
|
+
engine=Engine.GOOGLE, # or "google"
|
|
263
|
+
num=10,
|
|
264
|
+
country="us",
|
|
265
|
+
language="en",
|
|
266
|
+
search_type="news", # corresponds to tbm=nws
|
|
267
|
+
# Other parameters are passed in via kwargs
|
|
268
|
+
ibp="some_ibp_value",
|
|
269
|
+
lsig="some_lsig_value",
|
|
270
|
+
)
|
|
271
|
+
```
|
|
272
|
+
|
|
273
|
+
**Note**: All parameters above will be assembled into Thordata SERP API request parameters.
|
|
274
|
+
|
|
252
275
|
#### Advanced Search Options
|
|
253
276
|
|
|
254
277
|
```python
|
|
@@ -292,7 +315,202 @@ yandex_results = client.serp_search("AI news", engine=Engine.YANDEX)
|
|
|
292
315
|
ddg_results = client.serp_search("AI news", engine=Engine.DUCKDUCKGO)
|
|
293
316
|
```
|
|
294
317
|
|
|
295
|
-
|
|
318
|
+
---
|
|
319
|
+
|
|
320
|
+
## 🔧 SERP API Parameter Mapping
|
|
321
|
+
|
|
322
|
+
Thordata's SERP API supports multiple search engines and sub-features (Google Search/Shopping/News, etc.).
|
|
323
|
+
This SDK wraps common parameters through `ThordataClient.serp_search` and `SerpRequest`, while other parameters can be passed directly through `**kwargs`.
|
|
324
|
+
|
|
325
|
+
### Google Search Parameter Mapping
|
|
326
|
+
|
|
327
|
+
| Document Parameter | SDK Field/Usage | Description |
|
|
328
|
+
|-------------------|-----------------|-------------|
|
|
329
|
+
| q | query | Search keyword |
|
|
330
|
+
| engine | engine | Engine.GOOGLE / "google" |
|
|
331
|
+
| google_domain | google_domain | e.g., "google.co.uk" |
|
|
332
|
+
| gl | country | Country/region, e.g., "us" |
|
|
333
|
+
| hl | language | Language, e.g., "en", "zh-CN" |
|
|
334
|
+
| cr | countries_filter | Multi-country filter, e.g., "countryFR |
|
|
335
|
+
| lr | languages_filter | Multi-language filter, e.g., "lang_en |
|
|
336
|
+
| location | location | Exact location, e.g., "India" |
|
|
337
|
+
| uule | uule | Base64 encoded location string |
|
|
338
|
+
| tbm | search_type | "images"→tbm=isch, "shopping"→tbm=shop, "news"→tbm=nws, "videos"→tbm=vid, other values passed through as-is |
|
|
339
|
+
| start | start | Result offset for pagination |
|
|
340
|
+
| num | num | Number of results per page |
|
|
341
|
+
| ludocid | ludocid | Google Place ID |
|
|
342
|
+
| kgmid | kgmid | Google Knowledge Graph ID |
|
|
343
|
+
| ibp | ibp="..." (kwargs) | Passed through **kwargs |
|
|
344
|
+
| lsig | lsig="..." (kwargs) | Same as above |
|
|
345
|
+
| si | si="..." (kwargs) | Same as above |
|
|
346
|
+
| uds | uds="ADV" (kwargs) | Same as above |
|
|
347
|
+
| tbs | time_filter or tbs="..." | time_filter="week" generates tbs=qdr:w, can also pass complete tbs directly |
|
|
348
|
+
| safe | safe_search | True → safe=active, False → safe=off |
|
|
349
|
+
| nfpr | no_autocorrect | True → nfpr=1 |
|
|
350
|
+
| filter | filter_duplicates | True → filter=1, False → filter=0 |
|
|
351
|
+
|
|
352
|
+
**Example: Google Search Basic Usage**
|
|
353
|
+
|
|
354
|
+
```python
|
|
355
|
+
results = client.serp_search(
|
|
356
|
+
query="python web scraping best practices",
|
|
357
|
+
engine=Engine.GOOGLE,
|
|
358
|
+
country="us",
|
|
359
|
+
language="en",
|
|
360
|
+
num=10,
|
|
361
|
+
time_filter="week", # Last week
|
|
362
|
+
safe_search=True, # Adult content filter
|
|
363
|
+
)
|
|
364
|
+
```
|
|
365
|
+
|
|
366
|
+
### Google Shopping Parameter Mapping
|
|
367
|
+
|
|
368
|
+
Shopping still uses engine="google", search_type="shopping" to select Shopping mode:
|
|
369
|
+
|
|
370
|
+
```python
|
|
371
|
+
results = client.serp_search(
|
|
372
|
+
query="iPhone 15",
|
|
373
|
+
engine=Engine.GOOGLE,
|
|
374
|
+
search_type="shopping", # tbm=shop
|
|
375
|
+
country="us",
|
|
376
|
+
language="en",
|
|
377
|
+
num=20,
|
|
378
|
+
min_price=500, # Parameters below passed through kwargs
|
|
379
|
+
max_price=1500,
|
|
380
|
+
sort_by=1, # 1=price low to high, 2=high to low
|
|
381
|
+
free_shipping=True,
|
|
382
|
+
on_sale=True,
|
|
383
|
+
small_business=True,
|
|
384
|
+
direct_link=True,
|
|
385
|
+
shoprs="FILTER_ID_HERE",
|
|
386
|
+
)
|
|
387
|
+
shopping_items = results.get("shopping_results", [])
|
|
388
|
+
```
|
|
389
|
+
|
|
390
|
+
| Document Parameter | SDK Field/Usage | Description |
|
|
391
|
+
|-------------------|-----------------|-------------|
|
|
392
|
+
| q | query | Search keyword |
|
|
393
|
+
| google_domain | google_domain | Same as above |
|
|
394
|
+
| gl | country | Same as above |
|
|
395
|
+
| hl | language | Same as above |
|
|
396
|
+
| location | location | Same as above |
|
|
397
|
+
| uule | uule | Same as above |
|
|
398
|
+
| start | start | Offset |
|
|
399
|
+
| num | num | Quantity |
|
|
400
|
+
| tbs | time_filter or tbs="..." | Same as above |
|
|
401
|
+
| shoprs | shoprs="..." (kwargs) | Filter ID |
|
|
402
|
+
| min_price | min_price=... (kwargs) | Minimum price |
|
|
403
|
+
| max_price | max_price=... (kwargs) | Maximum price |
|
|
404
|
+
| sort_by | sort_by=1/2 (kwargs) | Sort order |
|
|
405
|
+
| free_shipping | free_shipping=True/False (kwargs) | Free shipping |
|
|
406
|
+
| on_sale | on_sale=True/False (kwargs) | On sale |
|
|
407
|
+
| small_business | small_business=True/False (kwargs) | Small business |
|
|
408
|
+
| direct_link | direct_link=True/False (kwargs) | Include direct links |
|
|
409
|
+
|
|
410
|
+
### Google Local Parameter Mapping
|
|
411
|
+
|
|
412
|
+
Google Local is mainly about location-based local searches.
|
|
413
|
+
In the SDK, you can use search_type="local" to mark Local mode (tbm passed through as "local"), combined with location + uule.
|
|
414
|
+
|
|
415
|
+
```python
|
|
416
|
+
results = client.serp_search(
|
|
417
|
+
query="pizza near me",
|
|
418
|
+
engine=Engine.GOOGLE,
|
|
419
|
+
search_type="local",
|
|
420
|
+
google_domain="google.com",
|
|
421
|
+
country="us",
|
|
422
|
+
language="en",
|
|
423
|
+
location="San Francisco",
|
|
424
|
+
uule="w+CAIQICIFU2FuIEZyYW5jaXNjbw", # Example value
|
|
425
|
+
start=0, # Local only accepts 0, 20, 40...
|
|
426
|
+
)
|
|
427
|
+
local_results = results.get("local_results", results.get("organic", []))
|
|
428
|
+
```
|
|
429
|
+
|
|
430
|
+
| Document Parameter | SDK Field/Usage | Description |
|
|
431
|
+
|-------------------|-----------------|-------------|
|
|
432
|
+
| q | query | Search term |
|
|
433
|
+
| google_domain | google_domain | Domain |
|
|
434
|
+
| gl | country | Country |
|
|
435
|
+
| hl | language | Language |
|
|
436
|
+
| location | location | Local location |
|
|
437
|
+
| uule | uule | Encoded location |
|
|
438
|
+
| start | start | Offset (must be 0,20,40...) |
|
|
439
|
+
| ludocid | ludocid | Place ID (commonly used in Local results) |
|
|
440
|
+
| tbs | time_filter or tbs="..." | Advanced filtering |
|
|
441
|
+
|
|
442
|
+
### Google Videos Parameter Mapping
|
|
443
|
+
|
|
444
|
+
```python
|
|
445
|
+
results = client.serp_search(
|
|
446
|
+
query="python async tutorial",
|
|
447
|
+
engine=Engine.GOOGLE,
|
|
448
|
+
search_type="videos", # tbm=vid
|
|
449
|
+
country="us",
|
|
450
|
+
language="en",
|
|
451
|
+
languages_filter="lang_en|lang_fr",
|
|
452
|
+
location="United States",
|
|
453
|
+
uule="ENCODED_LOCATION_HERE",
|
|
454
|
+
num=10,
|
|
455
|
+
time_filter="month",
|
|
456
|
+
safe_search=True,
|
|
457
|
+
filter_duplicates=True,
|
|
458
|
+
)
|
|
459
|
+
video_results = results.get("video_results", results.get("organic", []))
|
|
460
|
+
```
|
|
461
|
+
|
|
462
|
+
| Document Parameter | SDK Field/Usage | Description |
|
|
463
|
+
|-------------------|-----------------|-------------|
|
|
464
|
+
| q | query | Search term |
|
|
465
|
+
| google_domain | google_domain | Domain |
|
|
466
|
+
| gl | country | Country |
|
|
467
|
+
| hl | language | Language |
|
|
468
|
+
| lr | languages_filter | Multi-language filter |
|
|
469
|
+
| location | location | Geographic location |
|
|
470
|
+
| uule | uule | Encoded location |
|
|
471
|
+
| start | start | Offset |
|
|
472
|
+
| num | num | Quantity |
|
|
473
|
+
| tbs | time_filter or tbs="..." | Time and advanced filtering |
|
|
474
|
+
| safe | safe_search | Adult content filter |
|
|
475
|
+
| nfpr | no_autocorrect | Disable auto-correction |
|
|
476
|
+
| filter | filter_duplicates | Remove duplicates |
|
|
477
|
+
|
|
478
|
+
### Google News Parameter Mapping
|
|
479
|
+
|
|
480
|
+
Google News has a set of exclusive token parameters for precise control of "topics/media/sections/stories".
|
|
481
|
+
|
|
482
|
+
```python
|
|
483
|
+
results = client.serp_search(
|
|
484
|
+
query="AI regulation",
|
|
485
|
+
engine=Engine.GOOGLE,
|
|
486
|
+
search_type="news", # tbm=nws
|
|
487
|
+
country="us",
|
|
488
|
+
language="en",
|
|
489
|
+
topic_token="YOUR_TOPIC_TOKEN", # Optional
|
|
490
|
+
publication_token="YOUR_PUBLICATION_TOKEN", # Optional
|
|
491
|
+
section_token="YOUR_SECTION_TOKEN", # Optional
|
|
492
|
+
story_token="YOUR_STORY_TOKEN", # Optional
|
|
493
|
+
so=1, # 0=relevance, 1=time
|
|
494
|
+
)
|
|
495
|
+
news_results = results.get("news_results", results.get("organic", []))
|
|
496
|
+
```
|
|
497
|
+
|
|
498
|
+
| Document Parameter | SDK Field/Usage | Description |
|
|
499
|
+
|-------------------|-----------------|-------------|
|
|
500
|
+
| q | query | Search term |
|
|
501
|
+
| gl | country | Country |
|
|
502
|
+
| hl | language | Language |
|
|
503
|
+
| topic_token | topic_token="..." (kwargs) | Topic token |
|
|
504
|
+
| publication_token | publication_token="..." (kwargs) | Media token |
|
|
505
|
+
| section_token | section_token="..." (kwargs) | Section token |
|
|
506
|
+
| story_token | story_token="..." (kwargs) | Story token |
|
|
507
|
+
| so | so=0/1 (kwargs) | Sort: 0=relevance, 1=time |
|
|
508
|
+
|
|
509
|
+
---
|
|
510
|
+
|
|
511
|
+
👉 For more SERP modes and parameter mappings, see docs/serp_reference.md.
|
|
512
|
+
|
|
513
|
+
## 🔓 Web Unlocker (Universal Scraping API)
|
|
296
514
|
|
|
297
515
|
Automatically bypass anti-bot protections:
|
|
298
516
|
|
|
@@ -357,7 +575,7 @@ with open("screenshot.png", "wb") as f:
|
|
|
357
575
|
f.write(png_bytes)
|
|
358
576
|
```
|
|
359
577
|
|
|
360
|
-
###
|
|
578
|
+
### Web Scraper API (Async Tasks)
|
|
361
579
|
|
|
362
580
|
For complex scraping jobs that run asynchronously:
|
|
363
581
|
|
|
@@ -392,7 +610,7 @@ if status in ("ready", "success"):
|
|
|
392
610
|
print(f"Download: {download_url}")
|
|
393
611
|
```
|
|
394
612
|
|
|
395
|
-
###
|
|
613
|
+
### Async Client (High Concurrency)
|
|
396
614
|
|
|
397
615
|
For maximum performance with concurrent requests:
|
|
398
616
|
|
|
@@ -447,7 +665,7 @@ async def search_multiple():
|
|
|
447
665
|
asyncio.run(search_multiple())
|
|
448
666
|
```
|
|
449
667
|
|
|
450
|
-
###
|
|
668
|
+
### Location APIs
|
|
451
669
|
|
|
452
670
|
Discover available geo-targeting options:
|
|
453
671
|
|
|
@@ -479,7 +697,7 @@ for asn in asns[:5]:
|
|
|
479
697
|
print(f" {asn['asn_code']}: {asn['asn_name']}")
|
|
480
698
|
```
|
|
481
699
|
|
|
482
|
-
###
|
|
700
|
+
### Error Handling
|
|
483
701
|
|
|
484
702
|
```python
|
|
485
703
|
from thordata import (
|
|
@@ -510,7 +728,7 @@ except ThordataError as e:
|
|
|
510
728
|
print(f"General error: {e}")
|
|
511
729
|
```
|
|
512
730
|
|
|
513
|
-
###
|
|
731
|
+
### Retry Configuration
|
|
514
732
|
|
|
515
733
|
Customize automatic retry behavior:
|
|
516
734
|
|
|
@@ -208,6 +208,28 @@ for result in results.get("organic", []):
|
|
|
208
208
|
print(f"{result['title']}: {result['link']}")
|
|
209
209
|
```
|
|
210
210
|
|
|
211
|
+
#### General Calling Method
|
|
212
|
+
|
|
213
|
+
```python
|
|
214
|
+
from thordata import ThordataClient, Engine
|
|
215
|
+
|
|
216
|
+
client = ThordataClient(scraper_token="YOUR_SCRAPER_TOKEN")
|
|
217
|
+
|
|
218
|
+
results = client.serp_search(
|
|
219
|
+
query="pizza",
|
|
220
|
+
engine=Engine.GOOGLE, # or "google"
|
|
221
|
+
num=10,
|
|
222
|
+
country="us",
|
|
223
|
+
language="en",
|
|
224
|
+
search_type="news", # corresponds to tbm=nws
|
|
225
|
+
# Other parameters are passed in via kwargs
|
|
226
|
+
ibp="some_ibp_value",
|
|
227
|
+
lsig="some_lsig_value",
|
|
228
|
+
)
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
**Note**: All parameters above will be assembled into Thordata SERP API request parameters.
|
|
232
|
+
|
|
211
233
|
#### Advanced Search Options
|
|
212
234
|
|
|
213
235
|
```python
|
|
@@ -251,7 +273,202 @@ yandex_results = client.serp_search("AI news", engine=Engine.YANDEX)
|
|
|
251
273
|
ddg_results = client.serp_search("AI news", engine=Engine.DUCKDUCKGO)
|
|
252
274
|
```
|
|
253
275
|
|
|
254
|
-
|
|
276
|
+
---
|
|
277
|
+
|
|
278
|
+
## 🔧 SERP API Parameter Mapping
|
|
279
|
+
|
|
280
|
+
Thordata's SERP API supports multiple search engines and sub-features (Google Search/Shopping/News, etc.).
|
|
281
|
+
This SDK wraps common parameters through `ThordataClient.serp_search` and `SerpRequest`, while other parameters can be passed directly through `**kwargs`.
|
|
282
|
+
|
|
283
|
+
### Google Search Parameter Mapping
|
|
284
|
+
|
|
285
|
+
| Document Parameter | SDK Field/Usage | Description |
|
|
286
|
+
|-------------------|-----------------|-------------|
|
|
287
|
+
| q | query | Search keyword |
|
|
288
|
+
| engine | engine | Engine.GOOGLE / "google" |
|
|
289
|
+
| google_domain | google_domain | e.g., "google.co.uk" |
|
|
290
|
+
| gl | country | Country/region, e.g., "us" |
|
|
291
|
+
| hl | language | Language, e.g., "en", "zh-CN" |
|
|
292
|
+
| cr | countries_filter | Multi-country filter, e.g., "countryFR |
|
|
293
|
+
| lr | languages_filter | Multi-language filter, e.g., "lang_en |
|
|
294
|
+
| location | location | Exact location, e.g., "India" |
|
|
295
|
+
| uule | uule | Base64 encoded location string |
|
|
296
|
+
| tbm | search_type | "images"→tbm=isch, "shopping"→tbm=shop, "news"→tbm=nws, "videos"→tbm=vid, other values passed through as-is |
|
|
297
|
+
| start | start | Result offset for pagination |
|
|
298
|
+
| num | num | Number of results per page |
|
|
299
|
+
| ludocid | ludocid | Google Place ID |
|
|
300
|
+
| kgmid | kgmid | Google Knowledge Graph ID |
|
|
301
|
+
| ibp | ibp="..." (kwargs) | Passed through **kwargs |
|
|
302
|
+
| lsig | lsig="..." (kwargs) | Same as above |
|
|
303
|
+
| si | si="..." (kwargs) | Same as above |
|
|
304
|
+
| uds | uds="ADV" (kwargs) | Same as above |
|
|
305
|
+
| tbs | time_filter or tbs="..." | time_filter="week" generates tbs=qdr:w, can also pass complete tbs directly |
|
|
306
|
+
| safe | safe_search | True → safe=active, False → safe=off |
|
|
307
|
+
| nfpr | no_autocorrect | True → nfpr=1 |
|
|
308
|
+
| filter | filter_duplicates | True → filter=1, False → filter=0 |
|
|
309
|
+
|
|
310
|
+
**Example: Google Search Basic Usage**
|
|
311
|
+
|
|
312
|
+
```python
|
|
313
|
+
results = client.serp_search(
|
|
314
|
+
query="python web scraping best practices",
|
|
315
|
+
engine=Engine.GOOGLE,
|
|
316
|
+
country="us",
|
|
317
|
+
language="en",
|
|
318
|
+
num=10,
|
|
319
|
+
time_filter="week", # Last week
|
|
320
|
+
safe_search=True, # Adult content filter
|
|
321
|
+
)
|
|
322
|
+
```
|
|
323
|
+
|
|
324
|
+
### Google Shopping Parameter Mapping
|
|
325
|
+
|
|
326
|
+
Shopping still uses engine="google", search_type="shopping" to select Shopping mode:
|
|
327
|
+
|
|
328
|
+
```python
|
|
329
|
+
results = client.serp_search(
|
|
330
|
+
query="iPhone 15",
|
|
331
|
+
engine=Engine.GOOGLE,
|
|
332
|
+
search_type="shopping", # tbm=shop
|
|
333
|
+
country="us",
|
|
334
|
+
language="en",
|
|
335
|
+
num=20,
|
|
336
|
+
min_price=500, # Parameters below passed through kwargs
|
|
337
|
+
max_price=1500,
|
|
338
|
+
sort_by=1, # 1=price low to high, 2=high to low
|
|
339
|
+
free_shipping=True,
|
|
340
|
+
on_sale=True,
|
|
341
|
+
small_business=True,
|
|
342
|
+
direct_link=True,
|
|
343
|
+
shoprs="FILTER_ID_HERE",
|
|
344
|
+
)
|
|
345
|
+
shopping_items = results.get("shopping_results", [])
|
|
346
|
+
```
|
|
347
|
+
|
|
348
|
+
| Document Parameter | SDK Field/Usage | Description |
|
|
349
|
+
|-------------------|-----------------|-------------|
|
|
350
|
+
| q | query | Search keyword |
|
|
351
|
+
| google_domain | google_domain | Same as above |
|
|
352
|
+
| gl | country | Same as above |
|
|
353
|
+
| hl | language | Same as above |
|
|
354
|
+
| location | location | Same as above |
|
|
355
|
+
| uule | uule | Same as above |
|
|
356
|
+
| start | start | Offset |
|
|
357
|
+
| num | num | Quantity |
|
|
358
|
+
| tbs | time_filter or tbs="..." | Same as above |
|
|
359
|
+
| shoprs | shoprs="..." (kwargs) | Filter ID |
|
|
360
|
+
| min_price | min_price=... (kwargs) | Minimum price |
|
|
361
|
+
| max_price | max_price=... (kwargs) | Maximum price |
|
|
362
|
+
| sort_by | sort_by=1/2 (kwargs) | Sort order |
|
|
363
|
+
| free_shipping | free_shipping=True/False (kwargs) | Free shipping |
|
|
364
|
+
| on_sale | on_sale=True/False (kwargs) | On sale |
|
|
365
|
+
| small_business | small_business=True/False (kwargs) | Small business |
|
|
366
|
+
| direct_link | direct_link=True/False (kwargs) | Include direct links |
|
|
367
|
+
|
|
368
|
+
### Google Local Parameter Mapping
|
|
369
|
+
|
|
370
|
+
Google Local is mainly about location-based local searches.
|
|
371
|
+
In the SDK, you can use search_type="local" to mark Local mode (tbm passed through as "local"), combined with location + uule.
|
|
372
|
+
|
|
373
|
+
```python
|
|
374
|
+
results = client.serp_search(
|
|
375
|
+
query="pizza near me",
|
|
376
|
+
engine=Engine.GOOGLE,
|
|
377
|
+
search_type="local",
|
|
378
|
+
google_domain="google.com",
|
|
379
|
+
country="us",
|
|
380
|
+
language="en",
|
|
381
|
+
location="San Francisco",
|
|
382
|
+
uule="w+CAIQICIFU2FuIEZyYW5jaXNjbw", # Example value
|
|
383
|
+
start=0, # Local only accepts 0, 20, 40...
|
|
384
|
+
)
|
|
385
|
+
local_results = results.get("local_results", results.get("organic", []))
|
|
386
|
+
```
|
|
387
|
+
|
|
388
|
+
| Document Parameter | SDK Field/Usage | Description |
|
|
389
|
+
|-------------------|-----------------|-------------|
|
|
390
|
+
| q | query | Search term |
|
|
391
|
+
| google_domain | google_domain | Domain |
|
|
392
|
+
| gl | country | Country |
|
|
393
|
+
| hl | language | Language |
|
|
394
|
+
| location | location | Local location |
|
|
395
|
+
| uule | uule | Encoded location |
|
|
396
|
+
| start | start | Offset (must be 0,20,40...) |
|
|
397
|
+
| ludocid | ludocid | Place ID (commonly used in Local results) |
|
|
398
|
+
| tbs | time_filter or tbs="..." | Advanced filtering |
|
|
399
|
+
|
|
400
|
+
### Google Videos Parameter Mapping
|
|
401
|
+
|
|
402
|
+
```python
|
|
403
|
+
results = client.serp_search(
|
|
404
|
+
query="python async tutorial",
|
|
405
|
+
engine=Engine.GOOGLE,
|
|
406
|
+
search_type="videos", # tbm=vid
|
|
407
|
+
country="us",
|
|
408
|
+
language="en",
|
|
409
|
+
languages_filter="lang_en|lang_fr",
|
|
410
|
+
location="United States",
|
|
411
|
+
uule="ENCODED_LOCATION_HERE",
|
|
412
|
+
num=10,
|
|
413
|
+
time_filter="month",
|
|
414
|
+
safe_search=True,
|
|
415
|
+
filter_duplicates=True,
|
|
416
|
+
)
|
|
417
|
+
video_results = results.get("video_results", results.get("organic", []))
|
|
418
|
+
```
|
|
419
|
+
|
|
420
|
+
| Document Parameter | SDK Field/Usage | Description |
|
|
421
|
+
|-------------------|-----------------|-------------|
|
|
422
|
+
| q | query | Search term |
|
|
423
|
+
| google_domain | google_domain | Domain |
|
|
424
|
+
| gl | country | Country |
|
|
425
|
+
| hl | language | Language |
|
|
426
|
+
| lr | languages_filter | Multi-language filter |
|
|
427
|
+
| location | location | Geographic location |
|
|
428
|
+
| uule | uule | Encoded location |
|
|
429
|
+
| start | start | Offset |
|
|
430
|
+
| num | num | Quantity |
|
|
431
|
+
| tbs | time_filter or tbs="..." | Time and advanced filtering |
|
|
432
|
+
| safe | safe_search | Adult content filter |
|
|
433
|
+
| nfpr | no_autocorrect | Disable auto-correction |
|
|
434
|
+
| filter | filter_duplicates | Remove duplicates |
|
|
435
|
+
|
|
436
|
+
### Google News Parameter Mapping
|
|
437
|
+
|
|
438
|
+
Google News has a set of exclusive token parameters for precise control of "topics/media/sections/stories".
|
|
439
|
+
|
|
440
|
+
```python
|
|
441
|
+
results = client.serp_search(
|
|
442
|
+
query="AI regulation",
|
|
443
|
+
engine=Engine.GOOGLE,
|
|
444
|
+
search_type="news", # tbm=nws
|
|
445
|
+
country="us",
|
|
446
|
+
language="en",
|
|
447
|
+
topic_token="YOUR_TOPIC_TOKEN", # Optional
|
|
448
|
+
publication_token="YOUR_PUBLICATION_TOKEN", # Optional
|
|
449
|
+
section_token="YOUR_SECTION_TOKEN", # Optional
|
|
450
|
+
story_token="YOUR_STORY_TOKEN", # Optional
|
|
451
|
+
so=1, # 0=relevance, 1=time
|
|
452
|
+
)
|
|
453
|
+
news_results = results.get("news_results", results.get("organic", []))
|
|
454
|
+
```
|
|
455
|
+
|
|
456
|
+
| Document Parameter | SDK Field/Usage | Description |
|
|
457
|
+
|-------------------|-----------------|-------------|
|
|
458
|
+
| q | query | Search term |
|
|
459
|
+
| gl | country | Country |
|
|
460
|
+
| hl | language | Language |
|
|
461
|
+
| topic_token | topic_token="..." (kwargs) | Topic token |
|
|
462
|
+
| publication_token | publication_token="..." (kwargs) | Media token |
|
|
463
|
+
| section_token | section_token="..." (kwargs) | Section token |
|
|
464
|
+
| story_token | story_token="..." (kwargs) | Story token |
|
|
465
|
+
| so | so=0/1 (kwargs) | Sort: 0=relevance, 1=time |
|
|
466
|
+
|
|
467
|
+
---
|
|
468
|
+
|
|
469
|
+
👉 For more SERP modes and parameter mappings, see docs/serp_reference.md.
|
|
470
|
+
|
|
471
|
+
## 🔓 Web Unlocker (Universal Scraping API)
|
|
255
472
|
|
|
256
473
|
Automatically bypass anti-bot protections:
|
|
257
474
|
|
|
@@ -316,7 +533,7 @@ with open("screenshot.png", "wb") as f:
|
|
|
316
533
|
f.write(png_bytes)
|
|
317
534
|
```
|
|
318
535
|
|
|
319
|
-
###
|
|
536
|
+
### Web Scraper API (Async Tasks)
|
|
320
537
|
|
|
321
538
|
For complex scraping jobs that run asynchronously:
|
|
322
539
|
|
|
@@ -351,7 +568,7 @@ if status in ("ready", "success"):
|
|
|
351
568
|
print(f"Download: {download_url}")
|
|
352
569
|
```
|
|
353
570
|
|
|
354
|
-
###
|
|
571
|
+
### Async Client (High Concurrency)
|
|
355
572
|
|
|
356
573
|
For maximum performance with concurrent requests:
|
|
357
574
|
|
|
@@ -406,7 +623,7 @@ async def search_multiple():
|
|
|
406
623
|
asyncio.run(search_multiple())
|
|
407
624
|
```
|
|
408
625
|
|
|
409
|
-
###
|
|
626
|
+
### Location APIs
|
|
410
627
|
|
|
411
628
|
Discover available geo-targeting options:
|
|
412
629
|
|
|
@@ -438,7 +655,7 @@ for asn in asns[:5]:
|
|
|
438
655
|
print(f" {asn['asn_code']}: {asn['asn_name']}")
|
|
439
656
|
```
|
|
440
657
|
|
|
441
|
-
###
|
|
658
|
+
### Error Handling
|
|
442
659
|
|
|
443
660
|
```python
|
|
444
661
|
from thordata import (
|
|
@@ -469,7 +686,7 @@ except ThordataError as e:
|
|
|
469
686
|
print(f"General error: {e}")
|
|
470
687
|
```
|
|
471
688
|
|
|
472
|
-
###
|
|
689
|
+
### Retry Configuration
|
|
473
690
|
|
|
474
691
|
Customize automatic retry behavior:
|
|
475
692
|
|