thordata-sdk 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
thordata/models.py CHANGED
@@ -7,16 +7,16 @@ IDE autocomplete and reduces parameter errors.
7
7
 
8
8
  Example:
9
9
  >>> from thordata.models import ProxyConfig, SerpRequest
10
- >>>
10
+ >>>
11
11
  >>> # Build a proxy URL with geo-targeting
12
12
  >>> proxy = ProxyConfig(
13
13
  ... username="myuser",
14
- ... password="mypass",
14
+ ... password="mypass",
15
15
  ... country="us",
16
16
  ... city="seattle"
17
17
  ... )
18
18
  >>> print(proxy.build_proxy_url())
19
-
19
+
20
20
  >>> # Configure a SERP request
21
21
  >>> serp = SerpRequest(query="python tutorial", engine="google", num=20)
22
22
  >>> print(serp.to_payload())
@@ -24,29 +24,30 @@ Example:
24
24
 
25
25
  from __future__ import annotations
26
26
 
27
+ import json
27
28
  import re
28
29
  import uuid
29
- import json
30
30
  from dataclasses import dataclass, field
31
- from typing import Optional, Dict, Any, List, Union
32
31
  from enum import Enum
33
-
32
+ from typing import Any, Dict, List, Optional, Union
34
33
 
35
34
  # =============================================================================
36
35
  # Proxy Product Types
37
36
  # =============================================================================
38
37
 
38
+
39
39
  class ProxyProduct(str, Enum):
40
40
  """
41
41
  Thordata proxy product types with their default ports.
42
-
42
+
43
43
  Each product type has a specific port on the proxy gateway.
44
44
  """
45
+
45
46
  RESIDENTIAL = "residential"
46
47
  MOBILE = "mobile"
47
48
  DATACENTER = "datacenter"
48
49
  ISP = "isp"
49
-
50
+
50
51
  @property
51
52
  def default_port(self) -> int:
52
53
  """Get the default port for this proxy product."""
@@ -63,14 +64,15 @@ class ProxyProduct(str, Enum):
63
64
  # Proxy Configuration Models
64
65
  # =============================================================================
65
66
 
67
+
66
68
  @dataclass
67
69
  class ProxyConfig:
68
70
  """
69
71
  Configuration for building a Thordata proxy URL.
70
-
72
+
71
73
  This class handles the complex username format required by Thordata proxies,
72
74
  where geo-targeting and session parameters are embedded in the username.
73
-
75
+
74
76
  Args:
75
77
  username: Your Thordata account username (the part after 'td-customer-').
76
78
  password: Your Thordata account password.
@@ -78,18 +80,18 @@ class ProxyConfig:
78
80
  host: Proxy gateway host. If None, uses default based on product.
79
81
  port: Proxy gateway port. If None, uses default based on product.
80
82
  protocol: Proxy protocol - 'http' or 'https'.
81
-
83
+
82
84
  # Geo-targeting (all optional)
83
85
  continent: Target continent code (af/an/as/eu/na/oc/sa).
84
86
  country: Target country code in ISO 3166-1 alpha-2 format.
85
87
  state: Target state name in lowercase.
86
88
  city: Target city name in lowercase.
87
89
  asn: Target ASN code (e.g., 'AS12322'). Must be used with country.
88
-
90
+
89
91
  # Session control (optional)
90
92
  session_id: Session identifier for sticky sessions.
91
93
  session_duration: Session duration in minutes (1-90).
92
-
94
+
93
95
  Example:
94
96
  >>> config = ProxyConfig(
95
97
  ... username="GnrqUwwu3obt",
@@ -103,45 +105,45 @@ class ProxyConfig:
103
105
  >>> print(config.build_proxy_url())
104
106
  http://td-customer-GnrqUwwu3obt-country-us-state-california-sessid-mysession123-sesstime-10:PkCSzvt30iww@....pr.thordata.net:9999
105
107
  """
106
-
108
+
107
109
  username: str
108
110
  password: str
109
111
  product: Union[ProxyProduct, str] = ProxyProduct.RESIDENTIAL
110
112
  host: Optional[str] = None
111
113
  port: Optional[int] = None
112
114
  protocol: str = "http"
113
-
115
+
114
116
  # Geo-targeting
115
117
  continent: Optional[str] = None
116
118
  country: Optional[str] = None
117
119
  state: Optional[str] = None
118
120
  city: Optional[str] = None
119
121
  asn: Optional[str] = None
120
-
122
+
121
123
  # Session control
122
124
  session_id: Optional[str] = None
123
125
  session_duration: Optional[int] = None # minutes, 1-90
124
-
126
+
125
127
  # Valid continent codes
126
128
  VALID_CONTINENTS = {"af", "an", "as", "eu", "na", "oc", "sa"}
127
-
129
+
128
130
  def __post_init__(self) -> None:
129
131
  """Validate configuration after initialization."""
130
132
  # Normalize product to enum
131
133
  if isinstance(self.product, str):
132
134
  self.product = ProxyProduct(self.product.lower())
133
-
135
+
134
136
  # Set default host and port based on product
135
137
  if self.host is None:
136
138
  # Extract user prefix from username if available
137
139
  # Default to generic host
138
140
  self.host = "pr.thordata.net"
139
-
141
+
140
142
  if self.port is None:
141
143
  self.port = self.product.default_port
142
-
144
+
143
145
  self._validate()
144
-
146
+
145
147
  def _validate(self) -> None:
146
148
  """Validate the proxy configuration."""
147
149
  # Validate protocol
@@ -149,7 +151,7 @@ class ProxyConfig:
149
151
  raise ValueError(
150
152
  f"Invalid protocol: {self.protocol}. Must be 'http' or 'https'."
151
153
  )
152
-
154
+
153
155
  # Validate session duration
154
156
  if self.session_duration is not None:
155
157
  if not 1 <= self.session_duration <= 90:
@@ -159,96 +161,96 @@ class ProxyConfig:
159
161
  )
160
162
  if not self.session_id:
161
163
  raise ValueError("session_duration requires session_id to be set")
162
-
164
+
163
165
  # Validate ASN requires country
164
166
  if self.asn and not self.country:
165
167
  raise ValueError("ASN targeting requires country to be specified")
166
-
168
+
167
169
  # Validate continent code
168
170
  if self.continent and self.continent.lower() not in self.VALID_CONTINENTS:
169
171
  raise ValueError(
170
172
  f"Invalid continent code: {self.continent}. "
171
173
  f"Must be one of: {', '.join(sorted(self.VALID_CONTINENTS))}"
172
174
  )
173
-
175
+
174
176
  # Validate country code format (2 letters)
175
177
  if self.country and not re.match(r"^[a-zA-Z]{2}$", self.country):
176
178
  raise ValueError(
177
179
  f"Invalid country code: {self.country}. "
178
180
  "Must be a 2-letter ISO 3166-1 alpha-2 code."
179
181
  )
180
-
182
+
181
183
  def build_username(self) -> str:
182
184
  """
183
185
  Build the complete username string with embedded parameters.
184
-
186
+
185
187
  Returns:
186
188
  The formatted username string for proxy authentication.
187
189
  """
188
190
  parts = [f"td-customer-{self.username}"]
189
-
191
+
190
192
  # Add geo-targeting parameters (order matters)
191
193
  if self.continent:
192
194
  parts.append(f"continent-{self.continent.lower()}")
193
-
195
+
194
196
  if self.country:
195
197
  parts.append(f"country-{self.country.lower()}")
196
-
198
+
197
199
  if self.state:
198
200
  parts.append(f"state-{self.state.lower()}")
199
-
201
+
200
202
  if self.city:
201
203
  parts.append(f"city-{self.city.lower()}")
202
-
204
+
203
205
  if self.asn:
204
206
  # Ensure ASN has correct format
205
207
  asn_value = self.asn.upper()
206
208
  if not asn_value.startswith("AS"):
207
209
  asn_value = f"AS{asn_value}"
208
210
  parts.append(f"asn-{asn_value}")
209
-
211
+
210
212
  # Add session parameters
211
213
  if self.session_id:
212
214
  parts.append(f"sessid-{self.session_id}")
213
-
215
+
214
216
  if self.session_duration:
215
217
  parts.append(f"sesstime-{self.session_duration}")
216
-
218
+
217
219
  return "-".join(parts)
218
-
220
+
219
221
  def build_proxy_url(self) -> str:
220
222
  """
221
223
  Build the complete proxy URL.
222
-
224
+
223
225
  Returns:
224
226
  The formatted proxy URL for use with requests/aiohttp.
225
227
  """
226
228
  username = self.build_username()
227
229
  return f"{self.protocol}://{username}:{self.password}@{self.host}:{self.port}"
228
-
230
+
229
231
  def to_proxies_dict(self) -> Dict[str, str]:
230
232
  """
231
233
  Build a proxies dict suitable for the requests library.
232
-
234
+
233
235
  Returns:
234
236
  Dict with 'http' and 'https' keys pointing to the proxy URL.
235
237
  """
236
238
  url = self.build_proxy_url()
237
239
  return {"http": url, "https": url}
238
-
240
+
239
241
  def to_aiohttp_config(self) -> tuple:
240
242
  """
241
243
  Get proxy configuration for aiohttp.
242
-
244
+
243
245
  Returns:
244
246
  Tuple of (proxy_url, proxy_auth) for aiohttp.
245
247
  """
246
248
  try:
247
249
  import aiohttp
250
+
248
251
  proxy_url = f"{self.protocol}://{self.host}:{self.port}"
249
252
  proxy_auth = aiohttp.BasicAuth(
250
- login=self.build_username(),
251
- password=self.password
253
+ login=self.build_username(), password=self.password
252
254
  )
253
255
  return proxy_url, proxy_auth
254
256
  except ImportError:
@@ -259,14 +261,14 @@ class ProxyConfig:
259
261
  class StickySession(ProxyConfig):
260
262
  """
261
263
  Convenience class for creating sticky session proxy configurations.
262
-
264
+
263
265
  A sticky session keeps the same IP address for a specified duration,
264
266
  useful for multi-step operations that require IP consistency.
265
-
267
+
266
268
  Args:
267
269
  duration_minutes: How long to keep the same IP (1-90 minutes).
268
270
  auto_session_id: If True, automatically generates a unique session ID.
269
-
271
+
270
272
  Example:
271
273
  >>> session = StickySession(
272
274
  ... username="myuser",
@@ -277,18 +279,18 @@ class StickySession(ProxyConfig):
277
279
  >>> # Each call to build_proxy_url() uses the same session
278
280
  >>> url = session.build_proxy_url()
279
281
  """
280
-
282
+
281
283
  duration_minutes: int = 10
282
284
  auto_session_id: bool = True
283
-
285
+
284
286
  def __post_init__(self) -> None:
285
287
  # Auto-generate session ID if requested and not provided
286
288
  if self.auto_session_id and not self.session_id:
287
289
  self.session_id = uuid.uuid4().hex[:12]
288
-
290
+
289
291
  # Set session_duration from duration_minutes
290
292
  self.session_duration = self.duration_minutes
291
-
293
+
292
294
  # Call parent post_init
293
295
  super().__post_init__()
294
296
 
@@ -297,86 +299,94 @@ class StickySession(ProxyConfig):
297
299
  # SERP API Models
298
300
  # =============================================================================
299
301
 
302
+
300
303
  @dataclass
301
304
  class SerpRequest:
302
305
  """
303
306
  Configuration for a SERP API request.
304
-
307
+
305
308
  Supports Google, Bing, Yandex, DuckDuckGo, and Baidu search engines.
306
-
309
+
307
310
  Args:
308
311
  query: The search query string (required).
309
312
  engine: Search engine to use (default: 'google').
310
313
  num: Number of results per page (default: 10).
311
314
  start: Result offset for pagination (default: 0).
312
-
315
+
313
316
  # Localization
314
317
  country: Country code for results (gl parameter for Google).
315
318
  language: Language code for interface (hl parameter for Google).
316
319
  google_domain: Google domain to use (e.g., 'google.co.uk').
317
-
320
+
318
321
  # Geo-targeting
319
322
  location: Location name for geo-targeting.
320
323
  uule: Encoded location parameter (use with location).
321
-
324
+
322
325
  # Search type
323
326
  search_type: Type of search (images, news, shopping, videos, etc.).
324
-
327
+
325
328
  # Filters
326
329
  safe_search: Enable safe search filtering.
327
330
  time_filter: Time range filter (hour, day, week, month, year).
328
-
331
+ no_autocorrect: Disable automatic spelling correction (nfpr).
332
+ filter_duplicates: Enable/disable duplicate filtering.
333
+
334
+ # Device & Rendering
335
+ device: Device type ('desktop', 'mobile', 'tablet').
336
+ render_js: Enable JavaScript rendering in SERP (render_js=True/False).
337
+ no_cache: Disable internal caching (no_cache=True/False).
338
+
339
+ # Output
340
+ output_format: 'json' (default) or 'html'.
341
+
329
342
  # Advanced
330
- device: Device type (desktop, mobile, tablet).
331
- extra_params: Additional parameters to pass through.
332
-
333
- Example:
334
- >>> req = SerpRequest(
335
- ... query="python programming",
336
- ... engine="google",
337
- ... num=20,
338
- ... country="us",
339
- ... language="en",
340
- ... search_type="news"
341
- ... )
342
- >>> payload = req.to_payload()
343
+ ludocid: Google Place ID.
344
+ kgmid: Google Knowledge Graph ID.
345
+
346
+ # Extra
347
+ extra_params: Additional parameters to pass through (ibp, lsig, si, uds, ...).
343
348
  """
344
-
349
+
345
350
  query: str
346
351
  engine: str = "google"
347
352
  num: int = 10
348
353
  start: int = 0
349
-
354
+
350
355
  # Localization
351
356
  country: Optional[str] = None # 'gl' for Google
352
357
  language: Optional[str] = None # 'hl' for Google
353
358
  google_domain: Optional[str] = None
354
359
  countries_filter: Optional[str] = None # 'cr' parameter
355
360
  languages_filter: Optional[str] = None # 'lr' parameter
356
-
361
+
357
362
  # Geo-targeting
358
363
  location: Optional[str] = None
359
364
  uule: Optional[str] = None # Encoded location
360
-
365
+
361
366
  # Search type
362
- search_type: Optional[str] = None # tbm parameter (isch, shop, nws, vid)
363
-
367
+ search_type: Optional[str] = None # tbm parameter (isch, shop, nws, vid, ...)
368
+
364
369
  # Filters
365
370
  safe_search: Optional[bool] = None
366
- time_filter: Optional[str] = None # tbs parameter
371
+ time_filter: Optional[str] = None # tbs parameter (time part)
367
372
  no_autocorrect: bool = False # nfpr parameter
368
- filter_duplicates: Optional[bool] = None
369
-
370
- # Advanced
371
- device: Optional[str] = None
372
-
373
+ filter_duplicates: Optional[bool] = None # filter parameter
374
+
375
+ # Device & Rendering
376
+ device: Optional[str] = None # 'desktop', 'mobile', 'tablet'
377
+ render_js: Optional[bool] = None # render_js parameter
378
+ no_cache: Optional[bool] = None # no_cache parameter
379
+
380
+ # Output format
381
+ output_format: str = "json" # 'json' or 'html'
382
+
373
383
  # Advanced Google parameters
374
384
  ludocid: Optional[str] = None # Google Place ID
375
385
  kgmid: Optional[str] = None # Knowledge Graph ID
376
-
386
+
377
387
  # Pass-through
378
388
  extra_params: Dict[str, Any] = field(default_factory=dict)
379
-
389
+
380
390
  # Search type mappings for tbm parameter
381
391
  SEARCH_TYPE_MAP = {
382
392
  "images": "isch",
@@ -389,7 +399,7 @@ class SerpRequest:
389
399
  "nws": "nws",
390
400
  "vid": "vid",
391
401
  }
392
-
402
+
393
403
  # Time filter mappings for tbs parameter
394
404
  TIME_FILTER_MAP = {
395
405
  "hour": "qdr:h",
@@ -398,7 +408,7 @@ class SerpRequest:
398
408
  "month": "qdr:m",
399
409
  "year": "qdr:y",
400
410
  }
401
-
411
+
402
412
  # Engine URL defaults
403
413
  ENGINE_URLS = {
404
414
  "google": "google.com",
@@ -407,93 +417,103 @@ class SerpRequest:
407
417
  "duckduckgo": "duckduckgo.com",
408
418
  "baidu": "baidu.com",
409
419
  }
410
-
420
+
411
421
  def to_payload(self) -> Dict[str, Any]:
412
422
  """
413
423
  Convert to API request payload.
414
-
424
+
415
425
  Returns:
416
426
  Dictionary ready to be sent to the SERP API.
417
427
  """
418
428
  engine = self.engine.lower()
419
-
429
+
420
430
  payload: Dict[str, Any] = {
421
431
  "engine": engine,
422
432
  "num": str(self.num),
423
- "json": "1",
433
+ # output_format: json=1 for JSON, json=0 for raw HTML
434
+ "json": "1" if self.output_format.lower() == "json" else "0",
424
435
  }
425
-
436
+
426
437
  # Handle query parameter (Yandex uses 'text', others use 'q')
427
438
  if engine == "yandex":
428
439
  payload["text"] = self.query
429
440
  else:
430
441
  payload["q"] = self.query
431
-
432
- # Set URL based on google_domain or engine default
442
+
443
+ # Set URL / domain based on google_domain or engine default
433
444
  if self.google_domain:
445
+ # 显式设置 google_domain 参数,同时设置 url
446
+ payload["google_domain"] = self.google_domain
434
447
  payload["url"] = self.google_domain
435
448
  elif engine in self.ENGINE_URLS:
436
449
  payload["url"] = self.ENGINE_URLS[engine]
437
-
450
+
438
451
  # Pagination
439
452
  if self.start > 0:
440
453
  payload["start"] = str(self.start)
441
-
454
+
442
455
  # Localization
443
456
  if self.country:
444
457
  payload["gl"] = self.country.lower()
445
-
458
+
446
459
  if self.language:
447
460
  payload["hl"] = self.language.lower()
448
-
461
+
449
462
  if self.countries_filter:
450
463
  payload["cr"] = self.countries_filter
451
-
464
+
452
465
  if self.languages_filter:
453
466
  payload["lr"] = self.languages_filter
454
-
467
+
455
468
  # Geo-targeting
456
469
  if self.location:
457
470
  payload["location"] = self.location
458
-
471
+
459
472
  if self.uule:
460
473
  payload["uule"] = self.uule
461
-
462
- # Search type
474
+
475
+ # Search type (tbm)
463
476
  if self.search_type:
464
477
  search_type_lower = self.search_type.lower()
465
478
  tbm_value = self.SEARCH_TYPE_MAP.get(search_type_lower, search_type_lower)
466
479
  payload["tbm"] = tbm_value
467
-
480
+
468
481
  # Filters
469
482
  if self.safe_search is not None:
470
483
  payload["safe"] = "active" if self.safe_search else "off"
471
-
484
+
472
485
  if self.time_filter:
473
486
  time_lower = self.time_filter.lower()
474
487
  tbs_value = self.TIME_FILTER_MAP.get(time_lower, time_lower)
475
488
  payload["tbs"] = tbs_value
476
-
489
+
477
490
  if self.no_autocorrect:
478
491
  payload["nfpr"] = "1"
479
-
492
+
480
493
  if self.filter_duplicates is not None:
481
494
  payload["filter"] = "1" if self.filter_duplicates else "0"
482
-
495
+
483
496
  # Device
484
497
  if self.device:
485
498
  payload["device"] = self.device.lower()
486
-
499
+
500
+ # Rendering & cache control
501
+ if self.render_js is not None:
502
+ payload["render_js"] = "True" if self.render_js else "False"
503
+
504
+ if self.no_cache is not None:
505
+ payload["no_cache"] = "True" if self.no_cache else "False"
506
+
487
507
  # Advanced Google parameters
488
508
  if self.ludocid:
489
509
  payload["ludocid"] = self.ludocid
490
-
510
+
491
511
  if self.kgmid:
492
512
  payload["kgmid"] = self.kgmid
493
-
494
- # Extra parameters
513
+
514
+ # Extra parameters (ibp, lsig, si, uds, etc.)
495
515
  payload.update(self.extra_params)
496
-
516
+
497
517
  return payload
498
518
 
499
519
 
@@ -501,13 +521,14 @@ class SerpRequest:
501
521
  # Universal Scraper (Web Unlocker) Models
502
522
  # =============================================================================
503
523
 
524
+
504
525
  @dataclass
505
526
  class UniversalScrapeRequest:
506
527
  """
507
528
  Configuration for a Universal Scraping API (Web Unlocker) request.
508
-
529
+
509
530
  This API bypasses anti-bot protections like Cloudflare, CAPTCHAs, etc.
510
-
531
+
511
532
  Args:
512
533
  url: Target URL to scrape (required).
513
534
  js_render: Enable JavaScript rendering with headless browser.
@@ -520,7 +541,7 @@ class UniversalScrapeRequest:
520
541
  headers: Custom request headers as list of {name, value} dicts.
521
542
  cookies: Custom cookies as list of {name, value} dicts.
522
543
  extra_params: Additional parameters to pass through.
523
-
544
+
524
545
  Example:
525
546
  >>> req = UniversalScrapeRequest(
526
547
  ... url="https://example.com",
@@ -532,7 +553,7 @@ class UniversalScrapeRequest:
532
553
  ... )
533
554
  >>> payload = req.to_payload()
534
555
  """
535
-
556
+
536
557
  url: str
537
558
  js_render: bool = False
538
559
  output_format: str = "html" # 'html' or 'png'
@@ -544,7 +565,7 @@ class UniversalScrapeRequest:
544
565
  headers: Optional[List[Dict[str, str]]] = None # [{"name": "...", "value": "..."}]
545
566
  cookies: Optional[List[Dict[str, str]]] = None # [{"name": "...", "value": "..."}]
546
567
  extra_params: Dict[str, Any] = field(default_factory=dict) # 这个必须用 field()
547
-
568
+
548
569
  def __post_init__(self) -> None:
549
570
  """Validate configuration."""
550
571
  valid_formats = {"html", "png"}
@@ -553,16 +574,16 @@ class UniversalScrapeRequest:
553
574
  f"Invalid output_format: {self.output_format}. "
554
575
  f"Must be one of: {', '.join(valid_formats)}"
555
576
  )
556
-
577
+
557
578
  if self.wait is not None and (self.wait < 0 or self.wait > 100000):
558
579
  raise ValueError(
559
580
  f"wait must be between 0 and 100000 milliseconds, got {self.wait}"
560
581
  )
561
-
582
+
562
583
  def to_payload(self) -> Dict[str, Any]:
563
584
  """
564
585
  Convert to API request payload.
565
-
586
+
566
587
  Returns:
567
588
  Dictionary ready to be sent to the Universal API.
568
589
  """
@@ -571,30 +592,30 @@ class UniversalScrapeRequest:
571
592
  "js_render": "True" if self.js_render else "False",
572
593
  "type": self.output_format.lower(),
573
594
  }
574
-
595
+
575
596
  if self.country:
576
597
  payload["country"] = self.country.lower()
577
-
598
+
578
599
  if self.block_resources:
579
600
  payload["block_resources"] = self.block_resources
580
-
601
+
581
602
  if self.clean_content:
582
603
  payload["clean_content"] = self.clean_content
583
-
604
+
584
605
  if self.wait is not None:
585
606
  payload["wait"] = str(self.wait)
586
-
607
+
587
608
  if self.wait_for:
588
609
  payload["wait_for"] = self.wait_for
589
-
610
+
590
611
  if self.headers:
591
612
  payload["headers"] = json.dumps(self.headers)
592
-
613
+
593
614
  if self.cookies:
594
615
  payload["cookies"] = json.dumps(self.cookies)
595
-
616
+
596
617
  payload.update(self.extra_params)
597
-
618
+
598
619
  return payload
599
620
 
600
621
 
@@ -602,13 +623,14 @@ class UniversalScrapeRequest:
602
623
  # Web Scraper Task Models
603
624
  # =============================================================================
604
625
 
626
+
605
627
  @dataclass
606
628
  class ScraperTaskConfig:
607
629
  """
608
630
  Configuration for creating a Web Scraper API task.
609
-
631
+
610
632
  Note: You must get spider_id and spider_name from the Thordata Dashboard.
611
-
633
+
612
634
  Args:
613
635
  file_name: Name for the output file.
614
636
  spider_id: Spider identifier from Dashboard.
@@ -616,7 +638,7 @@ class ScraperTaskConfig:
616
638
  parameters: Spider-specific parameters.
617
639
  universal_params: Global spider settings.
618
640
  include_errors: Include error details in output.
619
-
641
+
620
642
  Example:
621
643
  >>> config = ScraperTaskConfig(
622
644
  ... file_name="youtube_data",
@@ -629,18 +651,18 @@ class ScraperTaskConfig:
629
651
  ... )
630
652
  >>> payload = config.to_payload()
631
653
  """
632
-
654
+
633
655
  file_name: str
634
656
  spider_id: str
635
657
  spider_name: str
636
658
  parameters: Dict[str, Any]
637
659
  universal_params: Optional[Dict[str, Any]] = None
638
660
  include_errors: bool = True
639
-
661
+
640
662
  def to_payload(self) -> Dict[str, Any]:
641
663
  """
642
664
  Convert to API request payload.
643
-
665
+
644
666
  Returns:
645
667
  Dictionary ready to be sent to the Web Scraper API.
646
668
  """
@@ -651,10 +673,10 @@ class ScraperTaskConfig:
651
673
  "spider_parameters": json.dumps([self.parameters]),
652
674
  "spider_errors": "true" if self.include_errors else "false",
653
675
  }
654
-
676
+
655
677
  if self.universal_params:
656
678
  payload["spider_universal"] = json.dumps(self.universal_params)
657
-
679
+
658
680
  return payload
659
681
 
660
682
 
@@ -662,37 +684,42 @@ class ScraperTaskConfig:
662
684
  # Response Models
663
685
  # =============================================================================
664
686
 
687
+
665
688
  @dataclass
666
689
  class TaskStatusResponse:
667
690
  """
668
691
  Response from task status check.
669
-
692
+
670
693
  Attributes:
671
694
  task_id: The task identifier.
672
695
  status: Current task status.
673
696
  progress: Optional progress percentage.
674
697
  message: Optional status message.
675
698
  """
676
-
699
+
677
700
  task_id: str
678
701
  status: str
679
702
  progress: Optional[int] = None
680
703
  message: Optional[str] = None
681
-
704
+
682
705
  def is_complete(self) -> bool:
683
706
  """Check if the task has completed (success or failure)."""
684
707
  terminal_statuses = {
685
- "ready", "success", "finished",
686
- "failed", "error", "cancelled"
708
+ "ready",
709
+ "success",
710
+ "finished",
711
+ "failed",
712
+ "error",
713
+ "cancelled",
687
714
  }
688
715
  return self.status.lower() in terminal_statuses
689
-
716
+
690
717
  def is_success(self) -> bool:
691
718
  """Check if the task completed successfully."""
692
719
  success_statuses = {"ready", "success", "finished"}
693
720
  return self.status.lower() in success_statuses
694
-
721
+
695
722
  def is_failed(self) -> bool:
696
723
  """Check if the task failed."""
697
724
  failure_statuses = {"failed", "error"}
698
- return self.status.lower() in failure_statuses
725
+ return self.status.lower() in failure_statuses