thordata-sdk 0.2.2__tar.gz → 0.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.4
1
+ Metadata-Version: 2.1
2
2
  Name: thordata_sdk
3
- Version: 0.2.2
3
+ Version: 0.2.4
4
4
  Summary: The official Python SDK for Thordata Proxy & Scraper Infrastructure.
5
5
  Home-page: https://github.com/Thordata/thordata-python-sdk
6
6
  Author: Thordata Developer Team
@@ -22,20 +22,6 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
22
22
  Requires-Python: >=3.8
23
23
  Description-Content-Type: text/markdown
24
24
  License-File: LICENSE
25
- Requires-Dist: requests>=2.25.0
26
- Requires-Dist: aiohttp>=3.8.0
27
- Dynamic: author
28
- Dynamic: author-email
29
- Dynamic: classifier
30
- Dynamic: description
31
- Dynamic: description-content-type
32
- Dynamic: home-page
33
- Dynamic: license
34
- Dynamic: license-file
35
- Dynamic: project-url
36
- Dynamic: requires-dist
37
- Dynamic: requires-python
38
- Dynamic: summary
39
25
 
40
26
  # Thordata Python SDK
41
27
 
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name='thordata_sdk',
5
- version='0.2.2', # Bump version due to breaking auth changes
5
+ version='0.2.4', # Bump version due to breaking auth changes
6
6
  packages=find_packages(include=['thordata_sdk', 'thordata_sdk.*']),
7
7
  install_requires=[
8
8
  'requests>=2.25.0', # Standard synchronous HTTP
@@ -1,8 +1,9 @@
1
1
  # Expose main clients
2
2
  from .client import ThordataClient
3
3
  from .async_client import AsyncThordataClient
4
+ from .enums import Engine, GoogleSearchType
4
5
 
5
6
  # Version of the thordata-sdk package
6
- __version__ = "0.2.2"
7
+ __version__ = "0.2.4"
7
8
 
8
9
  __all__ = ["ThordataClient", "AsyncThordataClient"]
@@ -1,7 +1,12 @@
1
1
  import aiohttp
2
2
  import logging
3
3
  import json
4
- from typing import Optional, Dict, Any
4
+ import base64
5
+ from typing import Optional, Dict, Any, Union
6
+
7
+ # 复用我们刚刚写好的逻辑和枚举
8
+ from .enums import Engine
9
+ from .parameters import normalize_serp_params
5
10
 
6
11
  logger = logging.getLogger(__name__)
7
12
 
@@ -9,11 +14,6 @@ logger = logging.getLogger(__name__)
9
14
  class AsyncThordataClient:
10
15
  """
11
16
  Thordata Asynchronous Client (built on aiohttp).
12
- Designed for high-concurrency and low-latency data collection tasks.
13
-
14
- Usage:
15
- async with AsyncThordataClient(...) as client:
16
- await client.get("http://example.com")
17
17
  """
18
18
 
19
19
  def __init__(
@@ -24,22 +24,19 @@ class AsyncThordataClient:
24
24
  proxy_host: str = "gate.thordata.com",
25
25
  proxy_port: int = 22225
26
26
  ):
27
- """
28
- Initialize the asynchronous client.
29
- """
30
27
  self.scraper_token = scraper_token
31
28
  self.public_token = public_token
32
29
  self.public_key = public_key
33
30
 
34
- # Proxy Authentication
35
31
  self.proxy_auth = aiohttp.BasicAuth(login=scraper_token, password='')
36
32
  self.proxy_url = f"http://{proxy_host}:{proxy_port}"
37
33
 
38
- # API Endpoints
39
34
  self.base_url = "https://scraperapi.thordata.com"
35
+ self.universal_url = "https://universalapi.thordata.com"
40
36
  self.api_url = "https://api.thordata.com/api/web-scraper-api"
41
37
 
42
38
  self.SERP_API_URL = f"{self.base_url}/request"
39
+ self.UNIVERSAL_API_URL = f"{self.universal_url}/request"
43
40
  self.SCRAPER_BUILDER_URL = f"{self.base_url}/builder"
44
41
  self.SCRAPER_STATUS_URL = f"{self.api_url}/tasks-status"
45
42
  self.SCRAPER_DOWNLOAD_URL = f"{self.api_url}/tasks-download"
@@ -55,21 +52,14 @@ class AsyncThordataClient:
55
52
  await self.close()
56
53
 
57
54
  async def close(self):
58
- """Close the underlying aiohttp session."""
59
55
  if self._session and not self._session.closed:
60
56
  await self._session.close()
61
57
  self._session = None
62
58
 
63
- # --- Proxy Usage ---
64
-
59
+ # --- Proxy (Unchanged) ---
65
60
  async def get(self, url: str, **kwargs) -> aiohttp.ClientResponse:
66
- """
67
- Send an asynchronous GET request through the Thordata Proxy.
68
- """
69
61
  if self._session is None:
70
62
  raise RuntimeError("Client session not initialized.")
71
-
72
- logger.debug(f"Async Proxy Request: {url}")
73
63
  try:
74
64
  return await self._session.get(
75
65
  url,
@@ -81,43 +71,37 @@ class AsyncThordataClient:
81
71
  logger.error(f"Async Request failed: {e}")
82
72
  raise
83
73
 
84
- # --- SERP API ---
85
-
74
+ # --- SERP (Optimized) ---
86
75
  async def serp_search(
87
- self, query: str, engine: str = "google", num: int = 10, **kwargs
76
+ self,
77
+ query: str,
78
+ engine: Union[Engine, str] = Engine.GOOGLE,
79
+ num: int = 10,
80
+ **kwargs
88
81
  ) -> Dict[str, Any]:
89
- """Async SERP search."""
82
+ """
83
+ Execute a real-time SERP search (Async).
84
+ """
90
85
  if self._session is None:
91
86
  raise RuntimeError("Client session not initialized.")
92
87
 
93
- payload = {
94
- "q": query,
95
- "num": str(num),
96
- "json": "1",
97
- "engine": engine.lower(),
98
- **kwargs
99
- }
100
- if engine.lower() == 'yandex':
101
- payload['text'] = payload.pop('q')
102
- if 'url' not in payload:
103
- payload['url'] = "yandex.com"
104
- elif 'url' not in payload:
105
- if engine == 'google':
106
- payload['url'] = "google.com"
107
- elif engine == 'bing':
108
- payload['url'] = "bing.com"
88
+ # 1. 转换枚举
89
+ engine_str = engine.value if isinstance(engine, Engine) else engine.lower()
90
+
91
+ # 2. 调用 parameters.py 复用逻辑 (Don't Repeat Yourself!)
92
+ payload = normalize_serp_params(engine_str, query, num=num, **kwargs)
109
93
 
110
94
  headers = {
111
95
  "Authorization": f"Bearer {self.scraper_token}",
112
96
  "Content-Type": "application/x-www-form-urlencoded"
113
97
  }
114
98
 
99
+ # 3. 发送请求
115
100
  async with self._session.post(
116
101
  self.SERP_API_URL, data=payload, headers=headers
117
102
  ) as response:
118
103
  response.raise_for_status()
119
104
  data = await response.json()
120
- # Handle double-encoding
121
105
  if isinstance(data, str):
122
106
  try:
123
107
  data = json.loads(data)
@@ -125,17 +109,80 @@ class AsyncThordataClient:
125
109
  pass
126
110
  return data
127
111
 
128
- # --- Web Scraper API ---
112
+ # --- Universal (Unchanged) ---
113
+ async def universal_scrape(
114
+ self,
115
+ url: str,
116
+ js_render: bool = False,
117
+ output_format: str = "HTML",
118
+ country: str = None,
119
+ block_resources: bool = False
120
+ ) -> Union[str, bytes]:
121
+ if self._session is None:
122
+ raise RuntimeError("Client session not initialized.")
123
+
124
+ headers = {
125
+ "Authorization": f"Bearer {self.scraper_token}",
126
+ "Content-Type": "application/x-www-form-urlencoded"
127
+ }
128
+
129
+ payload = {
130
+ "url": url,
131
+ "js_render": "True" if js_render else "False",
132
+ "type": output_format.lower(),
133
+ "block_resources": "True" if block_resources else "False"
134
+ }
135
+ if country:
136
+ payload["country"] = country
137
+
138
+ async with self._session.post(
139
+ self.UNIVERSAL_API_URL, data=payload, headers=headers
140
+ ) as response:
141
+ response.raise_for_status()
129
142
 
143
+ try:
144
+ resp_json = await response.json()
145
+ except Exception:
146
+ if output_format.upper() == "PNG":
147
+ return await response.read()
148
+ return await response.text()
149
+
150
+ if isinstance(resp_json, dict) and resp_json.get("code") \
151
+ and resp_json.get("code") != 200:
152
+ raise Exception(f"Universal API Error: {resp_json}")
153
+
154
+ if "html" in resp_json:
155
+ return resp_json["html"]
156
+
157
+ if "png" in resp_json:
158
+ png_str = resp_json["png"]
159
+ if not png_str:
160
+ raise Exception("API returned empty PNG data")
161
+
162
+ # 🛠️ FIX: 移除 Data URI Scheme 前缀
163
+ if "," in png_str:
164
+ png_str = png_str.split(",", 1)[1]
165
+
166
+ png_str = png_str.replace("\n", "").replace("\r", "")
167
+ missing_padding = len(png_str) % 4
168
+ if missing_padding:
169
+ png_str += '=' * (4 - missing_padding)
170
+ return base64.b64decode(png_str)
171
+
172
+ return str(resp_json)
173
+
174
+ # --- Web Scraper (Optimized) ---
130
175
  async def create_scraper_task(
131
176
  self,
132
177
  file_name: str,
133
178
  spider_id: str,
179
+ spider_name: str,
134
180
  individual_params: Dict[str, Any],
135
- spider_name: str = "youtube.com",
136
181
  universal_params: Dict[str, Any] = None
137
182
  ) -> str:
138
- """Create an async scraping task."""
183
+ """
184
+ Create an Asynchronous Web Scraper Task.
185
+ """
139
186
  if self._session is None:
140
187
  raise RuntimeError("Client session not initialized.")
141
188
 
@@ -144,6 +191,7 @@ class AsyncThordataClient:
144
191
  "Content-Type": "application/x-www-form-urlencoded"
145
192
  }
146
193
 
194
+ # 简化 Payload 构建,移除不必要的检查
147
195
  payload = {
148
196
  "file_name": file_name,
149
197
  "spider_id": spider_id,
@@ -163,8 +211,8 @@ class AsyncThordataClient:
163
211
  raise Exception(f"Creation failed: {data}")
164
212
  return data["data"]["task_id"]
165
213
 
214
+ # --- Status & Result (Unchanged) ---
166
215
  async def get_task_status(self, task_id: str) -> str:
167
- """Check task status."""
168
216
  headers = {
169
217
  "token": self.public_token,
170
218
  "key": self.public_key,
@@ -183,7 +231,6 @@ class AsyncThordataClient:
183
231
  return "Unknown"
184
232
 
185
233
  async def get_task_result(self, task_id: str, file_type: str = "json") -> str:
186
- """Get download link."""
187
234
  headers = {
188
235
  "token": self.public_token,
189
236
  "key": self.public_key,
@@ -1,7 +1,11 @@
1
1
  import requests
2
2
  import logging
3
3
  import json
4
- from typing import Dict, Any
4
+ import base64
5
+ from typing import Dict, Any, Union, Optional
6
+
7
+ from .enums import Engine
8
+ from .parameters import normalize_serp_params
5
9
 
6
10
  # Configure a library-specific logger
7
11
  logger = logging.getLogger(__name__)
@@ -14,7 +18,8 @@ class ThordataClient:
14
18
  Handles authentication for:
15
19
  1. Proxy Network (HTTP/HTTPS)
16
20
  2. SERP API (Real-time Search)
17
- 3. Web Scraper API (Async Task Management)
21
+ 3. Universal Scraping API (Single Page)
22
+ 4. Web Scraper API (Async Task Management)
18
23
  """
19
24
 
20
25
  def __init__(
@@ -39,16 +44,18 @@ class ThordataClient:
39
44
  self.public_token = public_token
40
45
  self.public_key = public_key
41
46
 
42
- # Proxy Configuration (User: Scraper Token, Pass: Empty)
47
+ # Proxy Configuration
43
48
  self.proxy_url = (
44
49
  f"http://{self.scraper_token}:@{proxy_host}:{proxy_port}"
45
50
  )
46
51
 
47
52
  # API Endpoints
48
53
  self.base_url = "https://scraperapi.thordata.com"
54
+ self.universal_url = "https://universalapi.thordata.com"
49
55
  self.api_url = "https://api.thordata.com/api/web-scraper-api"
50
56
 
51
57
  self.SERP_API_URL = f"{self.base_url}/request"
58
+ self.UNIVERSAL_API_URL = f"{self.universal_url}/request"
52
59
  self.SCRAPER_BUILDER_URL = f"{self.base_url}/builder"
53
60
  self.SCRAPER_STATUS_URL = f"{self.api_url}/tasks-status"
54
61
  self.SCRAPER_DOWNLOAD_URL = f"{self.api_url}/tasks-download"
@@ -62,51 +69,39 @@ class ThordataClient:
62
69
  def get(self, url: str, **kwargs) -> requests.Response:
63
70
  """
64
71
  Send a GET request through the Thordata Proxy Network.
65
-
66
- Args:
67
- url (str): The target URL.
68
- **kwargs: Additional arguments passed to requests.get().
69
-
70
- Returns:
71
- requests.Response: The HTTP response.
72
72
  """
73
73
  logger.debug(f"Proxy Request: {url}")
74
74
  kwargs.setdefault("timeout", 30)
75
75
  return self.session.get(url, **kwargs)
76
76
 
77
77
  def serp_search(
78
- self, query: str, engine: str = "google", num: int = 10, **kwargs
78
+ self,
79
+ query: str,
80
+ engine: Union[Engine, str] = Engine.GOOGLE, # 既可以是枚举,也可以是字符串
81
+ num: int = 10,
82
+ **kwargs # 这里接收所有额外参数 (比如 type="maps")
79
83
  ) -> Dict[str, Any]:
80
84
  """
81
85
  Execute a real-time SERP search.
86
+
87
+ Args:
88
+ query: Keywords
89
+ engine: 'google', 'bing', 'yandex' etc.
90
+ num: Number of results (default 10)
91
+ **kwargs: Extra parameters (e.g., type="shopping", location="London")
82
92
  """
83
- payload = {
84
- "q": query,
85
- "num": str(num),
86
- "json": "1",
87
- "engine": engine.lower(),
88
- **kwargs
89
- }
93
+ # 兼容处理:如果用户传的是枚举对象,取它的值;如果是字符串,转小写
94
+ engine_str = engine.value if isinstance(engine, Engine) else engine.lower()
90
95
 
91
- # Engine-specific parameter adjustments
92
- if engine.lower() == 'yandex':
93
- payload['text'] = payload.pop('q')
94
- if 'url' not in payload:
95
- payload['url'] = "yandex.com"
96
- elif 'url' not in payload:
97
- if engine == 'google':
98
- payload['url'] = "google.com"
99
- elif engine == 'bing':
100
- payload['url'] = "bing.com"
101
- elif engine == 'duckduckgo':
102
- payload['url'] = "duckduckgo.com"
96
+ # 调用 parameters.py 里的逻辑
97
+ payload = normalize_serp_params(engine_str, query, num=num, **kwargs)
103
98
 
104
99
  headers = {
105
100
  "Authorization": f"Bearer {self.scraper_token}",
106
101
  "Content-Type": "application/x-www-form-urlencoded"
107
102
  }
108
103
 
109
- logger.info(f"SERP Search: {engine} - {query}")
104
+ logger.info(f"SERP Search: {engine_str} - {query}")
110
105
  try:
111
106
  response = self.session.post(
112
107
  self.SERP_API_URL,
@@ -115,35 +110,114 @@ class ThordataClient:
115
110
  timeout=60
116
111
  )
117
112
  response.raise_for_status()
113
+
118
114
  data = response.json()
119
-
120
- # Handle potential double-encoded JSON strings
121
115
  if isinstance(data, str):
122
- try:
123
- data = json.loads(data)
124
- except json.JSONDecodeError:
125
- pass
116
+ try: data = json.loads(data)
117
+ except: pass
126
118
  return data
127
119
  except Exception as e:
128
120
  logger.error(f"SERP Request Failed: {e}")
129
121
  raise
130
122
 
123
+
124
+ def universal_scrape(
125
+ self,
126
+ url: str,
127
+ js_render: bool = False,
128
+ output_format: str = "HTML",
129
+ country: str = None,
130
+ block_resources: bool = False
131
+ ) -> Union[str, bytes]:
132
+ """
133
+ Unlock target pages via the Universal Scraping API.
134
+ """
135
+ headers = {
136
+ "Authorization": f"Bearer {self.scraper_token}",
137
+ "Content-Type": "application/x-www-form-urlencoded"
138
+ }
139
+
140
+ payload = {
141
+ "url": url,
142
+ "js_render": "True" if js_render else "False",
143
+ "type": output_format.lower(),
144
+ "block_resources": "True" if block_resources else "False"
145
+ }
146
+ if country:
147
+ payload["country"] = country
148
+
149
+ logger.info(f"Universal Scrape: {url}")
150
+
151
+ try:
152
+ response = self.session.post(
153
+ self.UNIVERSAL_API_URL,
154
+ data=payload,
155
+ headers=headers,
156
+ timeout=60
157
+ )
158
+ response.raise_for_status()
159
+
160
+ # Parse JSON wrapper
161
+ try:
162
+ resp_json = response.json()
163
+ except json.JSONDecodeError:
164
+ # Fallback for raw response
165
+ if output_format.upper() == "PNG":
166
+ return response.content
167
+ return response.text
168
+
169
+ # Check API errors
170
+ if isinstance(resp_json, dict) and resp_json.get("code") \
171
+ and resp_json.get("code") != 200:
172
+ raise Exception(f"Universal API Error: {resp_json}")
173
+
174
+ # Extract HTML
175
+ if "html" in resp_json:
176
+ return resp_json["html"]
177
+
178
+ # Extract PNG
179
+ if "png" in resp_json:
180
+ png_str = resp_json["png"]
181
+ if not png_str:
182
+ raise Exception("API returned empty PNG data")
183
+
184
+ # 🛠️ FIX: 移除 Data URI Scheme 前缀 (data:image/png;base64,)
185
+ if "," in png_str:
186
+ png_str = png_str.split(",", 1)[1]
187
+
188
+ # Base64 解码 (处理 padding)
189
+ png_str = png_str.replace("\n", "").replace("\r", "")
190
+ missing_padding = len(png_str) % 4
191
+ if missing_padding:
192
+ png_str += '=' * (4 - missing_padding)
193
+
194
+ return base64.b64decode(png_str)
195
+
196
+ return str(resp_json)
197
+
198
+ except Exception as e:
199
+ logger.error(f"Universal Scrape Failed: {e}")
200
+ raise
201
+
131
202
  def create_scraper_task(
132
203
  self,
133
204
  file_name: str,
134
- spider_id: str,
135
- individual_params: Dict[str, Any],
136
- spider_name: str = "youtube.com",
205
+ spider_id: str, # 必须传,用户从仪表板获取
206
+ spider_name: str, # 必须传,例如 "youtube.com"
207
+ individual_params: Dict[str, Any], # 用户把具体的参数打包在这个字典里传进来
137
208
  universal_params: Dict[str, Any] = None
138
209
  ) -> str:
139
210
  """
140
- Create an Asynchronous Web Scraper Task.
211
+ Create a generic Web Scraper Task.
212
+
213
+ Note: Check the Thordata Dashboard to get the correct 'spider_id' and 'spider_name'.
141
214
  """
142
215
  headers = {
143
216
  "Authorization": f"Bearer {self.scraper_token}",
144
217
  "Content-Type": "application/x-www-form-urlencoded"
145
218
  }
146
219
 
220
+ # 直接打包发送,不替用户做太多复杂的校验,保证兼容性
147
221
  payload = {
148
222
  "spider_name": spider_name,
149
223
  "spider_id": spider_id,
@@ -154,7 +228,7 @@ class ThordataClient:
154
228
  if universal_params:
155
229
  payload["spider_universal"] = json.dumps(universal_params)
156
230
 
157
- logger.info(f"Creating Scraper Task: {spider_id}")
231
+ logger.info(f"Creating Scraper Task: {spider_name} (ID: {spider_id})")
158
232
  try:
159
233
  response = self.session.post(
160
234
  self.SCRAPER_BUILDER_URL,
@@ -174,7 +248,6 @@ class ThordataClient:
174
248
  def get_task_status(self, task_id: str) -> str:
175
249
  """
176
250
  Check the status of a task.
177
- Returns: 'Running', 'Ready', 'Failed', or 'Unknown'.
178
251
  """
179
252
  headers = {
180
253
  "token": self.public_token,
@@ -0,0 +1,20 @@
1
+ # thordata_sdk/enums.py
2
+ from enum import Enum
3
+
4
+ class Engine(str, Enum):
5
+ """SERP 核心支持的四大引擎"""
6
+ GOOGLE = "google"
7
+ BING = "bing"
8
+ YANDEX = "yandex"
9
+ DUCKDUCKGO = "duckduckgo"
10
+ BAIDU = "baidu"
11
+
12
+ class GoogleSearchType(str, Enum):
13
+ """Google 搜索的常见子类型 (参考你的截图)"""
14
+ SEARCH = "search" # 默认网页搜索
15
+ MAPS = "maps" # 地图
16
+ SHOPPING = "shopping" # 购物
17
+ NEWS = "news" # 新闻
18
+ IMAGES = "images" # 图片
19
+ VIDEOS = "videos" # 视频
20
+ # 其他冷门的先不写,用户可以通过字符串传参
@@ -0,0 +1,41 @@
1
+ # thordata_sdk/parameters.py
2
+ from typing import Dict, Any
3
+
4
+ def normalize_serp_params(engine: str, query: str, **kwargs) -> Dict[str, Any]:
5
+ """
6
+ 统一不同搜索引擎的参数差异。
7
+ """
8
+ # 1. 基础参数
9
+ payload = {
10
+ "num": str(kwargs.get("num", 10)),
11
+ "json": "1",
12
+ "engine": engine,
13
+ }
14
+
15
+ # 2. 处理查询关键词 (Yandex 用 text,其他用 q)
16
+ if engine == "yandex":
17
+ payload["text"] = query
18
+ # 如果用户没传 url,给个默认的
19
+ if "url" not in kwargs:
20
+ payload["url"] = "yandex.com"
21
+ else:
22
+ payload["q"] = query
23
+
24
+ # 3. 处理默认 URL (如果用户没传)
25
+ if "url" not in kwargs:
26
+ defaults = {
27
+ "google": "google.com",
28
+ "bing": "bing.com",
29
+ "duckduckgo": "duckduckgo.com",
30
+ "baidu": "baidu.com"
31
+ }
32
+ if engine in defaults:
33
+ payload["url"] = defaults[engine]
34
+
35
+ # 4. 把用户传入的其他所有参数(比如 type="shopping", google_domain="google.co.uk")都透传进去
36
+ # 这样你就不用去定义那几十种类型了,用户传啥就是啥
37
+ for k, v in kwargs.items():
38
+ if k not in ["num", "engine", "q", "text"]: # 避免覆盖
39
+ payload[k] = v
40
+
41
+ return payload
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.4
2
- Name: thordata_sdk
3
- Version: 0.2.2
1
+ Metadata-Version: 2.1
2
+ Name: thordata-sdk
3
+ Version: 0.2.4
4
4
  Summary: The official Python SDK for Thordata Proxy & Scraper Infrastructure.
5
5
  Home-page: https://github.com/Thordata/thordata-python-sdk
6
6
  Author: Thordata Developer Team
@@ -22,20 +22,6 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
22
22
  Requires-Python: >=3.8
23
23
  Description-Content-Type: text/markdown
24
24
  License-File: LICENSE
25
- Requires-Dist: requests>=2.25.0
26
- Requires-Dist: aiohttp>=3.8.0
27
- Dynamic: author
28
- Dynamic: author-email
29
- Dynamic: classifier
30
- Dynamic: description
31
- Dynamic: description-content-type
32
- Dynamic: home-page
33
- Dynamic: license
34
- Dynamic: license-file
35
- Dynamic: project-url
36
- Dynamic: requires-dist
37
- Dynamic: requires-python
38
- Dynamic: summary
39
25
 
40
26
  # Thordata Python SDK
41
27
 
@@ -1,11 +1,11 @@
1
1
  LICENSE
2
2
  README.md
3
3
  setup.py
4
- tests/test_async_client.py
5
- tests/test_client.py
6
4
  thordata_sdk/__init__.py
7
5
  thordata_sdk/async_client.py
8
6
  thordata_sdk/client.py
7
+ thordata_sdk/enums.py
8
+ thordata_sdk/parameters.py
9
9
  thordata_sdk.egg-info/PKG-INFO
10
10
  thordata_sdk.egg-info/SOURCES.txt
11
11
  thordata_sdk.egg-info/dependency_links.txt
@@ -1,59 +0,0 @@
1
- import pytest
2
- import aiohttp
3
- from aioresponses import aioresponses
4
- from thordata_sdk import AsyncThordataClient
5
-
6
- # Mark all tests in this module as async
7
- pytestmark = pytest.mark.asyncio
8
-
9
- # Mock Credentials
10
- TEST_SCRAPER = "async_scraper_token"
11
- TEST_PUB_TOKEN = "async_public_token"
12
- TEST_PUB_KEY = "async_key"
13
- TEST_HOST = "gate.thordata.com"
14
- TEST_PORT = 22225
15
-
16
- @pytest.fixture
17
- async def async_client():
18
- """Fixture for AsyncThordataClient with context management."""
19
- client = AsyncThordataClient(
20
- scraper_token=TEST_SCRAPER,
21
- public_token=TEST_PUB_TOKEN,
22
- public_key=TEST_PUB_KEY,
23
- proxy_host=TEST_HOST,
24
- proxy_port=TEST_PORT
25
- )
26
- async with client:
27
- yield client
28
-
29
- async def test_async_client_initialization(async_client):
30
- """Test async client properties."""
31
- expected_url = f"http://{TEST_HOST}:{TEST_PORT}"
32
-
33
- assert async_client.proxy_url == expected_url
34
- assert isinstance(async_client.proxy_auth, aiohttp.BasicAuth)
35
- assert async_client.proxy_auth.login == TEST_SCRAPER
36
-
37
- async def test_async_successful_request(async_client):
38
- """Test successful async proxy request."""
39
- mock_url = "http://example.com/async_test"
40
- mock_data = {"status": "async_ok"}
41
-
42
- with aioresponses() as m:
43
- m.get(mock_url, status=200, payload=mock_data)
44
-
45
- response = await async_client.get(mock_url)
46
-
47
- assert response.status == 200
48
- data = await response.json()
49
- assert data == mock_data
50
-
51
- async def test_async_http_error_handling(async_client):
52
- """Test async HTTP error."""
53
- error_url = "http://example.com/async_error"
54
-
55
- with aioresponses() as m:
56
- m.get(error_url, status=401)
57
-
58
- response = await async_client.get(error_url)
59
- assert response.status == 401
@@ -1,53 +0,0 @@
1
- import requests
2
- import requests_mock
3
- import pytest
4
- from thordata_sdk.client import ThordataClient
5
-
6
- # Mock Credentials
7
- TEST_SCRAPER = "mock_scraper_token"
8
- TEST_PUB_TOKEN = "mock_public_token"
9
- TEST_PUB_KEY = "mock_public_key"
10
- TEST_HOST = "gate.thordata.com"
11
- TEST_PORT = 22225
12
-
13
- @pytest.fixture
14
- def client():
15
- """Fixture to create a ThordataClient instance."""
16
- return ThordataClient(
17
- scraper_token=TEST_SCRAPER,
18
- public_token=TEST_PUB_TOKEN,
19
- public_key=TEST_PUB_KEY,
20
- proxy_host=TEST_HOST,
21
- proxy_port=TEST_PORT
22
- )
23
-
24
- def test_client_initialization(client):
25
- """Test client initialization and proxy URL construction."""
26
- expected_url = f"http://{TEST_SCRAPER}:@{TEST_HOST}:{TEST_PORT}"
27
-
28
- # Verify proxy configuration in session
29
- assert client.session.proxies["http"] == expected_url
30
- assert client.session.proxies["https"] == expected_url
31
-
32
- def test_successful_request(client):
33
- """Test a successful proxy request (200 OK)."""
34
- mock_url = "http://example.com/test"
35
- mock_data = {"status": "ok"}
36
-
37
- with requests_mock.Mocker() as m:
38
- m.get(mock_url, status_code=200, json=mock_data)
39
-
40
- response = client.get(mock_url)
41
-
42
- assert response.status_code == 200
43
- assert response.json() == mock_data
44
-
45
- def test_http_error_handling(client):
46
- """Test handling of HTTP errors (e.g., 403 Forbidden)."""
47
- error_url = "http://example.com/error"
48
-
49
- with requests_mock.Mocker() as m:
50
- m.get(error_url, status_code=403)
51
-
52
- response = client.get(error_url)
53
- assert response.status_code == 403
File without changes
File without changes
File without changes