thordata-sdk 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- thordata/__init__.py +139 -135
- thordata/_utils.py +144 -126
- thordata/async_client.py +815 -768
- thordata/client.py +1040 -995
- thordata/demo.py +140 -0
- thordata/enums.py +384 -315
- thordata/exceptions.py +344 -344
- thordata/models.py +840 -725
- thordata/parameters.py +53 -53
- thordata/retry.py +380 -380
- {thordata_sdk-0.5.0.dist-info → thordata_sdk-0.7.0.dist-info}/METADATA +1053 -896
- thordata_sdk-0.7.0.dist-info/RECORD +15 -0
- {thordata_sdk-0.5.0.dist-info → thordata_sdk-0.7.0.dist-info}/licenses/LICENSE +21 -21
- thordata_sdk-0.5.0.dist-info/RECORD +0 -14
- {thordata_sdk-0.5.0.dist-info → thordata_sdk-0.7.0.dist-info}/WHEEL +0 -0
- {thordata_sdk-0.5.0.dist-info → thordata_sdk-0.7.0.dist-info}/top_level.txt +0 -0
thordata/async_client.py
CHANGED
|
@@ -1,768 +1,815 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Asynchronous client for the Thordata API.
|
|
3
|
-
|
|
4
|
-
This module provides the AsyncThordataClient for high-concurrency workloads,
|
|
5
|
-
built on aiohttp.
|
|
6
|
-
|
|
7
|
-
Example:
|
|
8
|
-
>>> import asyncio
|
|
9
|
-
>>> from thordata import AsyncThordataClient
|
|
10
|
-
>>>
|
|
11
|
-
>>> async def main():
|
|
12
|
-
... async with AsyncThordataClient(
|
|
13
|
-
... scraper_token="your_token",
|
|
14
|
-
... public_token="your_public_token",
|
|
15
|
-
... public_key="your_public_key"
|
|
16
|
-
... ) as client:
|
|
17
|
-
... response = await client.get("https://httpbin.org/ip")
|
|
18
|
-
... print(await response.json())
|
|
19
|
-
>>>
|
|
20
|
-
>>> asyncio.run(main())
|
|
21
|
-
"""
|
|
22
|
-
|
|
23
|
-
from __future__ import annotations
|
|
24
|
-
|
|
25
|
-
import asyncio
|
|
26
|
-
import logging
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
import aiohttp
|
|
31
|
-
|
|
32
|
-
from .
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
)
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
...
|
|
75
|
-
...
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
""
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
self.
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
self.
|
|
113
|
-
|
|
114
|
-
#
|
|
115
|
-
self.
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
self.
|
|
149
|
-
self.
|
|
150
|
-
self.
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
self.
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
"
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
#
|
|
186
|
-
#
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
""
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
self
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
)
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
if "
|
|
496
|
-
return
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
data =
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
)
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
"""
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
"
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
if isinstance(
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
1
|
+
"""
|
|
2
|
+
Asynchronous client for the Thordata API.
|
|
3
|
+
|
|
4
|
+
This module provides the AsyncThordataClient for high-concurrency workloads,
|
|
5
|
+
built on aiohttp.
|
|
6
|
+
|
|
7
|
+
Example:
|
|
8
|
+
>>> import asyncio
|
|
9
|
+
>>> from thordata import AsyncThordataClient
|
|
10
|
+
>>>
|
|
11
|
+
>>> async def main():
|
|
12
|
+
... async with AsyncThordataClient(
|
|
13
|
+
... scraper_token="your_token",
|
|
14
|
+
... public_token="your_public_token",
|
|
15
|
+
... public_key="your_public_key"
|
|
16
|
+
... ) as client:
|
|
17
|
+
... response = await client.get("https://httpbin.org/ip")
|
|
18
|
+
... print(await response.json())
|
|
19
|
+
>>>
|
|
20
|
+
>>> asyncio.run(main())
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
import asyncio
|
|
26
|
+
import logging
|
|
27
|
+
import os
|
|
28
|
+
from typing import Any, Dict, List, Optional, Union
|
|
29
|
+
|
|
30
|
+
import aiohttp
|
|
31
|
+
|
|
32
|
+
from . import __version__ as _sdk_version
|
|
33
|
+
from ._utils import (
|
|
34
|
+
build_auth_headers,
|
|
35
|
+
build_public_api_headers,
|
|
36
|
+
build_user_agent,
|
|
37
|
+
decode_base64_image,
|
|
38
|
+
extract_error_message,
|
|
39
|
+
parse_json_response,
|
|
40
|
+
)
|
|
41
|
+
from .enums import Engine, ProxyType
|
|
42
|
+
from .exceptions import (
|
|
43
|
+
ThordataConfigError,
|
|
44
|
+
ThordataNetworkError,
|
|
45
|
+
ThordataTimeoutError,
|
|
46
|
+
raise_for_code,
|
|
47
|
+
)
|
|
48
|
+
from .models import ProxyConfig, ScraperTaskConfig, SerpRequest, UniversalScrapeRequest
|
|
49
|
+
from .retry import RetryConfig
|
|
50
|
+
|
|
51
|
+
logger = logging.getLogger(__name__)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class AsyncThordataClient:
|
|
55
|
+
"""
|
|
56
|
+
The official asynchronous Python client for Thordata.
|
|
57
|
+
|
|
58
|
+
Designed for high-concurrency AI agents and data pipelines.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
scraper_token: The API token from your Dashboard.
|
|
62
|
+
public_token: The public API token.
|
|
63
|
+
public_key: The public API key.
|
|
64
|
+
proxy_host: Custom proxy gateway host.
|
|
65
|
+
proxy_port: Custom proxy gateway port.
|
|
66
|
+
timeout: Default request timeout in seconds.
|
|
67
|
+
retry_config: Configuration for automatic retries.
|
|
68
|
+
|
|
69
|
+
Example:
|
|
70
|
+
>>> async with AsyncThordataClient(
|
|
71
|
+
... scraper_token="token",
|
|
72
|
+
... public_token="pub_token",
|
|
73
|
+
... public_key="pub_key"
|
|
74
|
+
... ) as client:
|
|
75
|
+
... results = await client.serp_search("python")
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
# API Endpoints (same as sync client)
|
|
79
|
+
BASE_URL = "https://scraperapi.thordata.com"
|
|
80
|
+
UNIVERSAL_URL = "https://universalapi.thordata.com"
|
|
81
|
+
API_URL = "https://api.thordata.com/api/web-scraper-api"
|
|
82
|
+
LOCATIONS_URL = "https://api.thordata.com/api/locations"
|
|
83
|
+
|
|
84
|
+
def __init__(
|
|
85
|
+
self,
|
|
86
|
+
scraper_token: str,
|
|
87
|
+
public_token: Optional[str] = None,
|
|
88
|
+
public_key: Optional[str] = None,
|
|
89
|
+
proxy_host: str = "pr.thordata.net",
|
|
90
|
+
proxy_port: int = 9999,
|
|
91
|
+
timeout: int = 30,
|
|
92
|
+
retry_config: Optional[RetryConfig] = None,
|
|
93
|
+
scraperapi_base_url: Optional[str] = None,
|
|
94
|
+
universalapi_base_url: Optional[str] = None,
|
|
95
|
+
web_scraper_api_base_url: Optional[str] = None,
|
|
96
|
+
locations_base_url: Optional[str] = None,
|
|
97
|
+
) -> None:
|
|
98
|
+
"""Initialize the Async Thordata Client."""
|
|
99
|
+
if not scraper_token:
|
|
100
|
+
raise ThordataConfigError("scraper_token is required")
|
|
101
|
+
|
|
102
|
+
self.scraper_token = scraper_token
|
|
103
|
+
self.public_token = public_token
|
|
104
|
+
self.public_key = public_key
|
|
105
|
+
|
|
106
|
+
# Proxy configuration
|
|
107
|
+
self._proxy_host = proxy_host
|
|
108
|
+
self._proxy_port = proxy_port
|
|
109
|
+
self._default_timeout = aiohttp.ClientTimeout(total=timeout)
|
|
110
|
+
|
|
111
|
+
# Retry configuration
|
|
112
|
+
self._retry_config = retry_config or RetryConfig()
|
|
113
|
+
|
|
114
|
+
# Pre-calculate proxy auth
|
|
115
|
+
self._proxy_url = f"http://{proxy_host}:{proxy_port}"
|
|
116
|
+
self._proxy_auth = aiohttp.BasicAuth(
|
|
117
|
+
login=f"td-customer-{scraper_token}", password=""
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
# Base URLs (allow override via args or env vars for testing and custom routing)
|
|
121
|
+
scraperapi_base = (
|
|
122
|
+
scraperapi_base_url
|
|
123
|
+
or os.getenv("THORDATA_SCRAPERAPI_BASE_URL")
|
|
124
|
+
or self.BASE_URL
|
|
125
|
+
).rstrip("/")
|
|
126
|
+
|
|
127
|
+
universalapi_base = (
|
|
128
|
+
universalapi_base_url
|
|
129
|
+
or os.getenv("THORDATA_UNIVERSALAPI_BASE_URL")
|
|
130
|
+
or self.UNIVERSAL_URL
|
|
131
|
+
).rstrip("/")
|
|
132
|
+
|
|
133
|
+
web_scraper_api_base = (
|
|
134
|
+
web_scraper_api_base_url
|
|
135
|
+
or os.getenv("THORDATA_WEB_SCRAPER_API_BASE_URL")
|
|
136
|
+
or self.API_URL
|
|
137
|
+
).rstrip("/")
|
|
138
|
+
|
|
139
|
+
locations_base = (
|
|
140
|
+
locations_base_url
|
|
141
|
+
or os.getenv("THORDATA_LOCATIONS_BASE_URL")
|
|
142
|
+
or self.LOCATIONS_URL
|
|
143
|
+
).rstrip("/")
|
|
144
|
+
|
|
145
|
+
self._serp_url = f"{scraperapi_base}/request"
|
|
146
|
+
self._builder_url = f"{scraperapi_base}/builder"
|
|
147
|
+
self._universal_url = f"{universalapi_base}/request"
|
|
148
|
+
self._status_url = f"{web_scraper_api_base}/tasks-status"
|
|
149
|
+
self._download_url = f"{web_scraper_api_base}/tasks-download"
|
|
150
|
+
self._locations_base_url = locations_base
|
|
151
|
+
|
|
152
|
+
# Session initialized lazily
|
|
153
|
+
self._session: Optional[aiohttp.ClientSession] = None
|
|
154
|
+
|
|
155
|
+
async def __aenter__(self) -> AsyncThordataClient:
|
|
156
|
+
"""Async context manager entry."""
|
|
157
|
+
if self._session is None or self._session.closed:
|
|
158
|
+
self._session = aiohttp.ClientSession(
|
|
159
|
+
timeout=self._default_timeout,
|
|
160
|
+
trust_env=True,
|
|
161
|
+
headers={"User-Agent": build_user_agent(_sdk_version, "aiohttp")},
|
|
162
|
+
)
|
|
163
|
+
return self
|
|
164
|
+
|
|
165
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb) -> None:
|
|
166
|
+
"""Async context manager exit."""
|
|
167
|
+
await self.close()
|
|
168
|
+
|
|
169
|
+
async def close(self) -> None:
|
|
170
|
+
"""Close the underlying aiohttp session."""
|
|
171
|
+
if self._session and not self._session.closed:
|
|
172
|
+
await self._session.close()
|
|
173
|
+
self._session = None
|
|
174
|
+
|
|
175
|
+
def _get_session(self) -> aiohttp.ClientSession:
|
|
176
|
+
"""Get the session, raising if not initialized."""
|
|
177
|
+
if self._session is None or self._session.closed:
|
|
178
|
+
raise RuntimeError(
|
|
179
|
+
"Client session not initialized. "
|
|
180
|
+
"Use 'async with AsyncThordataClient(...) as client:'"
|
|
181
|
+
)
|
|
182
|
+
return self._session
|
|
183
|
+
|
|
184
|
+
# =========================================================================
|
|
185
|
+
# Proxy Network Methods
|
|
186
|
+
# =========================================================================
|
|
187
|
+
|
|
188
|
+
async def get(
|
|
189
|
+
self,
|
|
190
|
+
url: str,
|
|
191
|
+
*,
|
|
192
|
+
proxy_config: Optional[ProxyConfig] = None,
|
|
193
|
+
**kwargs: Any,
|
|
194
|
+
) -> aiohttp.ClientResponse:
|
|
195
|
+
"""
|
|
196
|
+
Send an async GET request through the Proxy Network.
|
|
197
|
+
|
|
198
|
+
Args:
|
|
199
|
+
url: The target URL.
|
|
200
|
+
proxy_config: Custom proxy configuration.
|
|
201
|
+
**kwargs: Additional aiohttp arguments.
|
|
202
|
+
|
|
203
|
+
Returns:
|
|
204
|
+
The aiohttp response object.
|
|
205
|
+
"""
|
|
206
|
+
session = self._get_session()
|
|
207
|
+
|
|
208
|
+
logger.debug(f"Async Proxy GET: {url}")
|
|
209
|
+
|
|
210
|
+
if proxy_config:
|
|
211
|
+
proxy_url, proxy_auth = proxy_config.to_aiohttp_config()
|
|
212
|
+
else:
|
|
213
|
+
proxy_url = self._proxy_url
|
|
214
|
+
proxy_auth = self._proxy_auth
|
|
215
|
+
|
|
216
|
+
try:
|
|
217
|
+
return await session.get(
|
|
218
|
+
url, proxy=proxy_url, proxy_auth=proxy_auth, **kwargs
|
|
219
|
+
)
|
|
220
|
+
except asyncio.TimeoutError as e:
|
|
221
|
+
raise ThordataTimeoutError(
|
|
222
|
+
f"Async request timed out: {e}", original_error=e
|
|
223
|
+
) from e
|
|
224
|
+
except aiohttp.ClientError as e:
|
|
225
|
+
raise ThordataNetworkError(
|
|
226
|
+
f"Async request failed: {e}", original_error=e
|
|
227
|
+
) from e
|
|
228
|
+
|
|
229
|
+
async def post(
|
|
230
|
+
self,
|
|
231
|
+
url: str,
|
|
232
|
+
*,
|
|
233
|
+
proxy_config: Optional[ProxyConfig] = None,
|
|
234
|
+
**kwargs: Any,
|
|
235
|
+
) -> aiohttp.ClientResponse:
|
|
236
|
+
"""
|
|
237
|
+
Send an async POST request through the Proxy Network.
|
|
238
|
+
|
|
239
|
+
Args:
|
|
240
|
+
url: The target URL.
|
|
241
|
+
proxy_config: Custom proxy configuration.
|
|
242
|
+
**kwargs: Additional aiohttp arguments.
|
|
243
|
+
|
|
244
|
+
Returns:
|
|
245
|
+
The aiohttp response object.
|
|
246
|
+
"""
|
|
247
|
+
session = self._get_session()
|
|
248
|
+
|
|
249
|
+
logger.debug(f"Async Proxy POST: {url}")
|
|
250
|
+
|
|
251
|
+
if proxy_config:
|
|
252
|
+
proxy_url, proxy_auth = proxy_config.to_aiohttp_config()
|
|
253
|
+
else:
|
|
254
|
+
proxy_url = self._proxy_url
|
|
255
|
+
proxy_auth = self._proxy_auth
|
|
256
|
+
|
|
257
|
+
try:
|
|
258
|
+
return await session.post(
|
|
259
|
+
url, proxy=proxy_url, proxy_auth=proxy_auth, **kwargs
|
|
260
|
+
)
|
|
261
|
+
except asyncio.TimeoutError as e:
|
|
262
|
+
raise ThordataTimeoutError(
|
|
263
|
+
f"Async request timed out: {e}", original_error=e
|
|
264
|
+
) from e
|
|
265
|
+
except aiohttp.ClientError as e:
|
|
266
|
+
raise ThordataNetworkError(
|
|
267
|
+
f"Async request failed: {e}", original_error=e
|
|
268
|
+
) from e
|
|
269
|
+
|
|
270
|
+
# =========================================================================
|
|
271
|
+
# SERP API Methods
|
|
272
|
+
# =========================================================================
|
|
273
|
+
|
|
274
|
+
async def serp_search(
|
|
275
|
+
self,
|
|
276
|
+
query: str,
|
|
277
|
+
*,
|
|
278
|
+
engine: Union[Engine, str] = Engine.GOOGLE,
|
|
279
|
+
num: int = 10,
|
|
280
|
+
country: Optional[str] = None,
|
|
281
|
+
language: Optional[str] = None,
|
|
282
|
+
search_type: Optional[str] = None,
|
|
283
|
+
device: Optional[str] = None,
|
|
284
|
+
render_js: Optional[bool] = None,
|
|
285
|
+
no_cache: Optional[bool] = None,
|
|
286
|
+
output_format: str = "json",
|
|
287
|
+
**kwargs: Any,
|
|
288
|
+
) -> Dict[str, Any]:
|
|
289
|
+
"""
|
|
290
|
+
Execute an async SERP search.
|
|
291
|
+
|
|
292
|
+
Args:
|
|
293
|
+
query: Search keywords.
|
|
294
|
+
engine: Search engine.
|
|
295
|
+
num: Number of results.
|
|
296
|
+
country: Country code for localization.
|
|
297
|
+
language: Language code.
|
|
298
|
+
search_type: Type of search.
|
|
299
|
+
device: Device type ('desktop', 'mobile', 'tablet').
|
|
300
|
+
render_js: Enable JavaScript rendering in SERP.
|
|
301
|
+
no_cache: Disable internal caching.
|
|
302
|
+
output_format: 'json' or 'html'.
|
|
303
|
+
**kwargs: Additional parameters.
|
|
304
|
+
|
|
305
|
+
Returns:
|
|
306
|
+
Parsed JSON results or dict with 'html' key.
|
|
307
|
+
"""
|
|
308
|
+
session = self._get_session()
|
|
309
|
+
|
|
310
|
+
engine_str = engine.value if isinstance(engine, Engine) else engine.lower()
|
|
311
|
+
|
|
312
|
+
request = SerpRequest(
|
|
313
|
+
query=query,
|
|
314
|
+
engine=engine_str,
|
|
315
|
+
num=num,
|
|
316
|
+
country=country,
|
|
317
|
+
language=language,
|
|
318
|
+
search_type=search_type,
|
|
319
|
+
device=device,
|
|
320
|
+
render_js=render_js,
|
|
321
|
+
no_cache=no_cache,
|
|
322
|
+
output_format=output_format,
|
|
323
|
+
extra_params=kwargs,
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
payload = request.to_payload()
|
|
327
|
+
headers = build_auth_headers(self.scraper_token)
|
|
328
|
+
|
|
329
|
+
logger.info(f"Async SERP Search: {engine_str} - {query}")
|
|
330
|
+
|
|
331
|
+
try:
|
|
332
|
+
async with session.post(
|
|
333
|
+
self._serp_url,
|
|
334
|
+
data=payload,
|
|
335
|
+
headers=headers,
|
|
336
|
+
) as response:
|
|
337
|
+
response.raise_for_status()
|
|
338
|
+
|
|
339
|
+
if output_format.lower() == "json":
|
|
340
|
+
data = await response.json()
|
|
341
|
+
|
|
342
|
+
if isinstance(data, dict):
|
|
343
|
+
code = data.get("code")
|
|
344
|
+
if code is not None and code != 200:
|
|
345
|
+
msg = extract_error_message(data)
|
|
346
|
+
raise_for_code(
|
|
347
|
+
f"SERP API Error: {msg}",
|
|
348
|
+
code=code,
|
|
349
|
+
payload=data,
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
return parse_json_response(data)
|
|
353
|
+
|
|
354
|
+
text = await response.text()
|
|
355
|
+
return {"html": text}
|
|
356
|
+
|
|
357
|
+
except asyncio.TimeoutError as e:
|
|
358
|
+
raise ThordataTimeoutError(
|
|
359
|
+
f"SERP request timed out: {e}",
|
|
360
|
+
original_error=e,
|
|
361
|
+
) from e
|
|
362
|
+
except aiohttp.ClientError as e:
|
|
363
|
+
raise ThordataNetworkError(
|
|
364
|
+
f"SERP request failed: {e}",
|
|
365
|
+
original_error=e,
|
|
366
|
+
) from e
|
|
367
|
+
|
|
368
|
+
async def serp_search_advanced(self, request: SerpRequest) -> Dict[str, Any]:
|
|
369
|
+
"""
|
|
370
|
+
Execute an async SERP search using a SerpRequest object.
|
|
371
|
+
"""
|
|
372
|
+
session = self._get_session()
|
|
373
|
+
|
|
374
|
+
payload = request.to_payload()
|
|
375
|
+
headers = build_auth_headers(self.scraper_token)
|
|
376
|
+
|
|
377
|
+
logger.info(f"Async SERP Advanced: {request.engine} - {request.query}")
|
|
378
|
+
|
|
379
|
+
try:
|
|
380
|
+
async with session.post(
|
|
381
|
+
self._serp_url,
|
|
382
|
+
data=payload,
|
|
383
|
+
headers=headers,
|
|
384
|
+
) as response:
|
|
385
|
+
response.raise_for_status()
|
|
386
|
+
|
|
387
|
+
if request.output_format.lower() == "json":
|
|
388
|
+
data = await response.json()
|
|
389
|
+
|
|
390
|
+
if isinstance(data, dict):
|
|
391
|
+
code = data.get("code")
|
|
392
|
+
if code is not None and code != 200:
|
|
393
|
+
msg = extract_error_message(data)
|
|
394
|
+
raise_for_code(
|
|
395
|
+
f"SERP API Error: {msg}",
|
|
396
|
+
code=code,
|
|
397
|
+
payload=data,
|
|
398
|
+
)
|
|
399
|
+
|
|
400
|
+
return parse_json_response(data)
|
|
401
|
+
|
|
402
|
+
text = await response.text()
|
|
403
|
+
return {"html": text}
|
|
404
|
+
|
|
405
|
+
except asyncio.TimeoutError as e:
|
|
406
|
+
raise ThordataTimeoutError(
|
|
407
|
+
f"SERP request timed out: {e}",
|
|
408
|
+
original_error=e,
|
|
409
|
+
) from e
|
|
410
|
+
except aiohttp.ClientError as e:
|
|
411
|
+
raise ThordataNetworkError(
|
|
412
|
+
f"SERP request failed: {e}",
|
|
413
|
+
original_error=e,
|
|
414
|
+
) from e
|
|
415
|
+
|
|
416
|
+
# =========================================================================
|
|
417
|
+
# Universal Scraping API Methods
|
|
418
|
+
# =========================================================================
|
|
419
|
+
|
|
420
|
+
async def universal_scrape(
|
|
421
|
+
self,
|
|
422
|
+
url: str,
|
|
423
|
+
*,
|
|
424
|
+
js_render: bool = False,
|
|
425
|
+
output_format: str = "html",
|
|
426
|
+
country: Optional[str] = None,
|
|
427
|
+
block_resources: Optional[str] = None,
|
|
428
|
+
wait: Optional[int] = None,
|
|
429
|
+
wait_for: Optional[str] = None,
|
|
430
|
+
**kwargs: Any,
|
|
431
|
+
) -> Union[str, bytes]:
|
|
432
|
+
"""
|
|
433
|
+
Async scrape using Universal API (Web Unlocker).
|
|
434
|
+
|
|
435
|
+
Args:
|
|
436
|
+
url: Target URL.
|
|
437
|
+
js_render: Enable JavaScript rendering.
|
|
438
|
+
output_format: "html" or "png".
|
|
439
|
+
country: Geo-targeting country.
|
|
440
|
+
block_resources: Resources to block.
|
|
441
|
+
wait: Wait time in ms.
|
|
442
|
+
wait_for: CSS selector to wait for.
|
|
443
|
+
|
|
444
|
+
Returns:
|
|
445
|
+
HTML string or PNG bytes.
|
|
446
|
+
"""
|
|
447
|
+
request = UniversalScrapeRequest(
|
|
448
|
+
url=url,
|
|
449
|
+
js_render=js_render,
|
|
450
|
+
output_format=output_format,
|
|
451
|
+
country=country,
|
|
452
|
+
block_resources=block_resources,
|
|
453
|
+
wait=wait,
|
|
454
|
+
wait_for=wait_for,
|
|
455
|
+
extra_params=kwargs,
|
|
456
|
+
)
|
|
457
|
+
|
|
458
|
+
return await self.universal_scrape_advanced(request)
|
|
459
|
+
|
|
460
|
+
async def universal_scrape_advanced(
|
|
461
|
+
self, request: UniversalScrapeRequest
|
|
462
|
+
) -> Union[str, bytes]:
|
|
463
|
+
"""
|
|
464
|
+
Async scrape using a UniversalScrapeRequest object.
|
|
465
|
+
"""
|
|
466
|
+
session = self._get_session()
|
|
467
|
+
|
|
468
|
+
payload = request.to_payload()
|
|
469
|
+
headers = build_auth_headers(self.scraper_token)
|
|
470
|
+
|
|
471
|
+
logger.info(f"Async Universal Scrape: {request.url}")
|
|
472
|
+
|
|
473
|
+
try:
|
|
474
|
+
async with session.post(
|
|
475
|
+
self._universal_url, data=payload, headers=headers
|
|
476
|
+
) as response:
|
|
477
|
+
response.raise_for_status()
|
|
478
|
+
|
|
479
|
+
try:
|
|
480
|
+
resp_json = await response.json()
|
|
481
|
+
except ValueError:
|
|
482
|
+
if request.output_format.lower() == "png":
|
|
483
|
+
return await response.read()
|
|
484
|
+
return await response.text()
|
|
485
|
+
|
|
486
|
+
# Check for API errors
|
|
487
|
+
if isinstance(resp_json, dict):
|
|
488
|
+
code = resp_json.get("code")
|
|
489
|
+
if code is not None and code != 200:
|
|
490
|
+
msg = extract_error_message(resp_json)
|
|
491
|
+
raise_for_code(
|
|
492
|
+
f"Universal API Error: {msg}", code=code, payload=resp_json
|
|
493
|
+
)
|
|
494
|
+
|
|
495
|
+
if "html" in resp_json:
|
|
496
|
+
return resp_json["html"]
|
|
497
|
+
|
|
498
|
+
if "png" in resp_json:
|
|
499
|
+
return decode_base64_image(resp_json["png"])
|
|
500
|
+
|
|
501
|
+
return str(resp_json)
|
|
502
|
+
|
|
503
|
+
except asyncio.TimeoutError as e:
|
|
504
|
+
raise ThordataTimeoutError(
|
|
505
|
+
f"Universal scrape timed out: {e}", original_error=e
|
|
506
|
+
) from e
|
|
507
|
+
except aiohttp.ClientError as e:
|
|
508
|
+
raise ThordataNetworkError(
|
|
509
|
+
f"Universal scrape failed: {e}", original_error=e
|
|
510
|
+
) from e
|
|
511
|
+
|
|
512
|
+
# =========================================================================
|
|
513
|
+
# Web Scraper API Methods
|
|
514
|
+
# =========================================================================
|
|
515
|
+
|
|
516
|
+
async def create_scraper_task(
|
|
517
|
+
self,
|
|
518
|
+
file_name: str,
|
|
519
|
+
spider_id: str,
|
|
520
|
+
spider_name: str,
|
|
521
|
+
parameters: Dict[str, Any],
|
|
522
|
+
universal_params: Optional[Dict[str, Any]] = None,
|
|
523
|
+
) -> str:
|
|
524
|
+
"""
|
|
525
|
+
Create an async Web Scraper task.
|
|
526
|
+
"""
|
|
527
|
+
config = ScraperTaskConfig(
|
|
528
|
+
file_name=file_name,
|
|
529
|
+
spider_id=spider_id,
|
|
530
|
+
spider_name=spider_name,
|
|
531
|
+
parameters=parameters,
|
|
532
|
+
universal_params=universal_params,
|
|
533
|
+
)
|
|
534
|
+
|
|
535
|
+
return await self.create_scraper_task_advanced(config)
|
|
536
|
+
|
|
537
|
+
async def create_scraper_task_advanced(self, config: ScraperTaskConfig) -> str:
|
|
538
|
+
"""
|
|
539
|
+
Create a task using ScraperTaskConfig.
|
|
540
|
+
"""
|
|
541
|
+
session = self._get_session()
|
|
542
|
+
|
|
543
|
+
payload = config.to_payload()
|
|
544
|
+
headers = build_auth_headers(self.scraper_token)
|
|
545
|
+
|
|
546
|
+
logger.info(f"Async Task Creation: {config.spider_name}")
|
|
547
|
+
|
|
548
|
+
try:
|
|
549
|
+
async with session.post(
|
|
550
|
+
self._builder_url, data=payload, headers=headers
|
|
551
|
+
) as response:
|
|
552
|
+
response.raise_for_status()
|
|
553
|
+
data = await response.json()
|
|
554
|
+
|
|
555
|
+
code = data.get("code")
|
|
556
|
+
if code != 200:
|
|
557
|
+
msg = extract_error_message(data)
|
|
558
|
+
raise_for_code(
|
|
559
|
+
f"Task creation failed: {msg}", code=code, payload=data
|
|
560
|
+
)
|
|
561
|
+
|
|
562
|
+
return data["data"]["task_id"]
|
|
563
|
+
|
|
564
|
+
except aiohttp.ClientError as e:
|
|
565
|
+
raise ThordataNetworkError(
|
|
566
|
+
f"Task creation failed: {e}", original_error=e
|
|
567
|
+
) from e
|
|
568
|
+
|
|
569
|
+
async def get_task_status(self, task_id: str) -> str:
|
|
570
|
+
"""
|
|
571
|
+
Check async task status.
|
|
572
|
+
|
|
573
|
+
Raises:
|
|
574
|
+
ThordataConfigError: If public credentials are missing.
|
|
575
|
+
ThordataAPIError: If API returns a non-200 code in JSON payload.
|
|
576
|
+
ThordataNetworkError: If network/HTTP request fails.
|
|
577
|
+
"""
|
|
578
|
+
self._require_public_credentials()
|
|
579
|
+
session = self._get_session()
|
|
580
|
+
|
|
581
|
+
headers = build_public_api_headers(
|
|
582
|
+
self.public_token or "", self.public_key or ""
|
|
583
|
+
)
|
|
584
|
+
payload = {"tasks_ids": task_id}
|
|
585
|
+
|
|
586
|
+
try:
|
|
587
|
+
async with session.post(
|
|
588
|
+
self._status_url, data=payload, headers=headers
|
|
589
|
+
) as response:
|
|
590
|
+
response.raise_for_status()
|
|
591
|
+
data = await response.json()
|
|
592
|
+
|
|
593
|
+
if isinstance(data, dict):
|
|
594
|
+
code = data.get("code")
|
|
595
|
+
if code is not None and code != 200:
|
|
596
|
+
msg = extract_error_message(data)
|
|
597
|
+
raise_for_code(
|
|
598
|
+
f"Task status API Error: {msg}",
|
|
599
|
+
code=code,
|
|
600
|
+
payload=data,
|
|
601
|
+
)
|
|
602
|
+
|
|
603
|
+
items = data.get("data") or []
|
|
604
|
+
for item in items:
|
|
605
|
+
if str(item.get("task_id")) == str(task_id):
|
|
606
|
+
return item.get("status", "unknown")
|
|
607
|
+
|
|
608
|
+
return "unknown"
|
|
609
|
+
|
|
610
|
+
raise ThordataNetworkError(
|
|
611
|
+
f"Unexpected task status response type: {type(data).__name__}",
|
|
612
|
+
original_error=None,
|
|
613
|
+
)
|
|
614
|
+
|
|
615
|
+
except asyncio.TimeoutError as e:
|
|
616
|
+
raise ThordataTimeoutError(
|
|
617
|
+
f"Async status check timed out: {e}", original_error=e
|
|
618
|
+
) from e
|
|
619
|
+
except aiohttp.ClientError as e:
|
|
620
|
+
raise ThordataNetworkError(
|
|
621
|
+
f"Async status check failed: {e}", original_error=e
|
|
622
|
+
) from e
|
|
623
|
+
|
|
624
|
+
async def safe_get_task_status(self, task_id: str) -> str:
|
|
625
|
+
"""
|
|
626
|
+
Backward-compatible status check.
|
|
627
|
+
|
|
628
|
+
Returns:
|
|
629
|
+
Status string, or "error" on any exception.
|
|
630
|
+
"""
|
|
631
|
+
try:
|
|
632
|
+
return await self.get_task_status(task_id)
|
|
633
|
+
except Exception:
|
|
634
|
+
return "error"
|
|
635
|
+
|
|
636
|
+
async def get_task_result(self, task_id: str, file_type: str = "json") -> str:
|
|
637
|
+
"""
|
|
638
|
+
Get download URL for completed task.
|
|
639
|
+
"""
|
|
640
|
+
self._require_public_credentials()
|
|
641
|
+
session = self._get_session()
|
|
642
|
+
|
|
643
|
+
headers = build_public_api_headers(
|
|
644
|
+
self.public_token or "", self.public_key or ""
|
|
645
|
+
)
|
|
646
|
+
payload = {"tasks_id": task_id, "type": file_type}
|
|
647
|
+
|
|
648
|
+
logger.info(f"Async getting result for Task: {task_id}")
|
|
649
|
+
|
|
650
|
+
try:
|
|
651
|
+
async with session.post(
|
|
652
|
+
self._download_url, data=payload, headers=headers
|
|
653
|
+
) as response:
|
|
654
|
+
data = await response.json()
|
|
655
|
+
code = data.get("code")
|
|
656
|
+
|
|
657
|
+
if code == 200 and data.get("data"):
|
|
658
|
+
return data["data"]["download"]
|
|
659
|
+
|
|
660
|
+
msg = extract_error_message(data)
|
|
661
|
+
raise_for_code(f"Get result failed: {msg}", code=code, payload=data)
|
|
662
|
+
# This line won't be reached, but satisfies mypy
|
|
663
|
+
raise RuntimeError("Unexpected state")
|
|
664
|
+
|
|
665
|
+
except aiohttp.ClientError as e:
|
|
666
|
+
raise ThordataNetworkError(
|
|
667
|
+
f"Get result failed: {e}", original_error=e
|
|
668
|
+
) from e
|
|
669
|
+
|
|
670
|
+
async def wait_for_task(
|
|
671
|
+
self,
|
|
672
|
+
task_id: str,
|
|
673
|
+
*,
|
|
674
|
+
poll_interval: float = 5.0,
|
|
675
|
+
max_wait: float = 600.0,
|
|
676
|
+
) -> str:
|
|
677
|
+
"""
|
|
678
|
+
Wait for a task to complete.
|
|
679
|
+
"""
|
|
680
|
+
|
|
681
|
+
import time
|
|
682
|
+
|
|
683
|
+
start = time.monotonic()
|
|
684
|
+
|
|
685
|
+
while (time.monotonic() - start) < max_wait:
|
|
686
|
+
status = await self.get_task_status(task_id)
|
|
687
|
+
|
|
688
|
+
logger.debug(f"Task {task_id} status: {status}")
|
|
689
|
+
|
|
690
|
+
terminal_statuses = {
|
|
691
|
+
"ready",
|
|
692
|
+
"success",
|
|
693
|
+
"finished",
|
|
694
|
+
"failed",
|
|
695
|
+
"error",
|
|
696
|
+
"cancelled",
|
|
697
|
+
}
|
|
698
|
+
|
|
699
|
+
if status.lower() in terminal_statuses:
|
|
700
|
+
return status
|
|
701
|
+
|
|
702
|
+
await asyncio.sleep(poll_interval)
|
|
703
|
+
|
|
704
|
+
raise TimeoutError(f"Task {task_id} did not complete within {max_wait} seconds")
|
|
705
|
+
|
|
706
|
+
# =========================================================================
|
|
707
|
+
# Location API Methods
|
|
708
|
+
# =========================================================================
|
|
709
|
+
|
|
710
|
+
async def list_countries(
|
|
711
|
+
self, proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL
|
|
712
|
+
) -> List[Dict[str, Any]]:
|
|
713
|
+
"""List supported countries."""
|
|
714
|
+
return await self._get_locations(
|
|
715
|
+
"countries",
|
|
716
|
+
proxy_type=(
|
|
717
|
+
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
718
|
+
),
|
|
719
|
+
)
|
|
720
|
+
|
|
721
|
+
async def list_states(
|
|
722
|
+
self,
|
|
723
|
+
country_code: str,
|
|
724
|
+
proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL,
|
|
725
|
+
) -> List[Dict[str, Any]]:
|
|
726
|
+
"""List supported states for a country."""
|
|
727
|
+
return await self._get_locations(
|
|
728
|
+
"states",
|
|
729
|
+
proxy_type=(
|
|
730
|
+
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
731
|
+
),
|
|
732
|
+
country_code=country_code,
|
|
733
|
+
)
|
|
734
|
+
|
|
735
|
+
async def list_cities(
|
|
736
|
+
self,
|
|
737
|
+
country_code: str,
|
|
738
|
+
state_code: Optional[str] = None,
|
|
739
|
+
proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL,
|
|
740
|
+
) -> List[Dict[str, Any]]:
|
|
741
|
+
"""List supported cities."""
|
|
742
|
+
kwargs = {
|
|
743
|
+
"proxy_type": (
|
|
744
|
+
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
745
|
+
),
|
|
746
|
+
"country_code": country_code,
|
|
747
|
+
}
|
|
748
|
+
if state_code:
|
|
749
|
+
kwargs["state_code"] = state_code
|
|
750
|
+
|
|
751
|
+
return await self._get_locations("cities", **kwargs)
|
|
752
|
+
|
|
753
|
+
async def list_asn(
|
|
754
|
+
self,
|
|
755
|
+
country_code: str,
|
|
756
|
+
proxy_type: Union[ProxyType, int] = ProxyType.RESIDENTIAL,
|
|
757
|
+
) -> List[Dict[str, Any]]:
|
|
758
|
+
"""List supported ASNs."""
|
|
759
|
+
return await self._get_locations(
|
|
760
|
+
"asn",
|
|
761
|
+
proxy_type=(
|
|
762
|
+
int(proxy_type) if isinstance(proxy_type, ProxyType) else proxy_type
|
|
763
|
+
),
|
|
764
|
+
country_code=country_code,
|
|
765
|
+
)
|
|
766
|
+
|
|
767
|
+
async def _get_locations(
|
|
768
|
+
self, endpoint: str, **kwargs: Any
|
|
769
|
+
) -> List[Dict[str, Any]]:
|
|
770
|
+
"""Internal async locations API call."""
|
|
771
|
+
self._require_public_credentials()
|
|
772
|
+
|
|
773
|
+
params = {
|
|
774
|
+
"token": self.public_token,
|
|
775
|
+
"key": self.public_key,
|
|
776
|
+
}
|
|
777
|
+
|
|
778
|
+
for key, value in kwargs.items():
|
|
779
|
+
params[key] = str(value)
|
|
780
|
+
|
|
781
|
+
url = f"{self._locations_base_url}/{endpoint}"
|
|
782
|
+
|
|
783
|
+
logger.debug(f"Async Locations API: {url}")
|
|
784
|
+
|
|
785
|
+
# Create temporary session for this request (no proxy needed)
|
|
786
|
+
async with aiohttp.ClientSession(trust_env=True) as temp_session:
|
|
787
|
+
async with temp_session.get(url, params=params) as response:
|
|
788
|
+
response.raise_for_status()
|
|
789
|
+
data = await response.json()
|
|
790
|
+
|
|
791
|
+
if isinstance(data, dict):
|
|
792
|
+
code = data.get("code")
|
|
793
|
+
if code is not None and code != 200:
|
|
794
|
+
msg = data.get("msg", "")
|
|
795
|
+
raise RuntimeError(
|
|
796
|
+
f"Locations API error ({endpoint}): code={code}, msg={msg}"
|
|
797
|
+
)
|
|
798
|
+
return data.get("data") or []
|
|
799
|
+
|
|
800
|
+
if isinstance(data, list):
|
|
801
|
+
return data
|
|
802
|
+
|
|
803
|
+
return []
|
|
804
|
+
|
|
805
|
+
# =========================================================================
|
|
806
|
+
# Helper Methods
|
|
807
|
+
# =========================================================================
|
|
808
|
+
|
|
809
|
+
def _require_public_credentials(self) -> None:
|
|
810
|
+
"""Ensure public API credentials are available."""
|
|
811
|
+
if not self.public_token or not self.public_key:
|
|
812
|
+
raise ThordataConfigError(
|
|
813
|
+
"public_token and public_key are required for this operation. "
|
|
814
|
+
"Please provide them when initializing AsyncThordataClient."
|
|
815
|
+
)
|