thordata-sdk 1.4.0__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,23 @@
1
+ """
2
+ Core networking modules for Thordata SDK.
3
+ """
4
+
5
+ from .async_http_client import AsyncThordataHttpSession
6
+ from .http_client import ThordataHttpSession
7
+ from .tunnel import (
8
+ HAS_PYSOCKS,
9
+ UpstreamProxySocketFactory,
10
+ create_tls_in_tls,
11
+ parse_upstream_proxy,
12
+ socks5_handshake,
13
+ )
14
+
15
+ __all__ = [
16
+ "ThordataHttpSession",
17
+ "AsyncThordataHttpSession",
18
+ "parse_upstream_proxy",
19
+ "UpstreamProxySocketFactory",
20
+ "create_tls_in_tls",
21
+ "socks5_handshake",
22
+ "HAS_PYSOCKS",
23
+ ]
@@ -0,0 +1,91 @@
1
+ """
2
+ Core Async HTTP Session management for Thordata SDK.
3
+ Wraps aiohttp.ClientSession with retry logic and standard headers.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import asyncio
9
+ import logging
10
+ from typing import Any
11
+
12
+ import aiohttp
13
+
14
+ from .. import __version__ as _sdk_version
15
+ from .._utils import build_user_agent
16
+ from ..exceptions import ThordataNetworkError, ThordataTimeoutError
17
+ from ..retry import RetryConfig, with_retry
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class AsyncThordataHttpSession:
23
+ """
24
+ Async wrapper for HTTP requests with built-in retry logic.
25
+ """
26
+
27
+ def __init__(self, timeout: int = 30, retry_config: RetryConfig | None = None):
28
+ self._timeout = aiohttp.ClientTimeout(total=timeout)
29
+ self._retry_config = retry_config or RetryConfig()
30
+ self._session: aiohttp.ClientSession | None = None
31
+ self._headers = {
32
+ "User-Agent": build_user_agent(_sdk_version, "aiohttp"),
33
+ "Accept-Encoding": "gzip, deflate",
34
+ }
35
+
36
+ async def _ensure_session(self) -> aiohttp.ClientSession:
37
+ if self._session is None or self._session.closed:
38
+ self._session = aiohttp.ClientSession(
39
+ timeout=self._timeout, headers=self._headers, trust_env=True
40
+ )
41
+ return self._session
42
+
43
+ async def close(self) -> None:
44
+ if self._session and not self._session.closed:
45
+ await self._session.close()
46
+ self._session = None
47
+
48
+ async def request(
49
+ self,
50
+ method: str,
51
+ url: str,
52
+ params: dict[str, Any] | None = None,
53
+ data: Any = None,
54
+ headers: dict[str, str] | None = None,
55
+ timeout: int | None = None,
56
+ proxy: str | None = None,
57
+ proxy_auth: aiohttp.BasicAuth | None = None,
58
+ ) -> aiohttp.ClientResponse:
59
+ """
60
+ Execute async HTTP request with automatic retry logic.
61
+ """
62
+ session = await self._ensure_session()
63
+
64
+ # Determine timeout
65
+ req_timeout = aiohttp.ClientTimeout(total=timeout) if timeout else self._timeout
66
+
67
+ @with_retry(self._retry_config)
68
+ async def _do_request() -> aiohttp.ClientResponse:
69
+ try:
70
+ return await session.request(
71
+ method=method,
72
+ url=url,
73
+ params=params,
74
+ data=data,
75
+ headers=headers,
76
+ timeout=req_timeout,
77
+ proxy=proxy,
78
+ proxy_auth=proxy_auth,
79
+ )
80
+ except asyncio.TimeoutError as e:
81
+ # Map asyncio timeout to SDK timeout for retry handler
82
+ raise ThordataTimeoutError(
83
+ f"Async request timed out: {e}", original_error=e
84
+ ) from e
85
+ except aiohttp.ClientError as e:
86
+ # Map aiohttp errors to SDK network error
87
+ raise ThordataNetworkError(
88
+ f"Async request failed: {e}", original_error=e
89
+ ) from e
90
+
91
+ return await _do_request()
@@ -0,0 +1,79 @@
1
+ """
2
+ Core HTTP Session management for Thordata SDK.
3
+ Handles authentication injection, retries, and session lifecycle.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from typing import Any
9
+
10
+ import requests
11
+
12
+ from .. import __version__ as _sdk_version
13
+ from .._utils import build_user_agent
14
+ from ..exceptions import ThordataNetworkError, ThordataTimeoutError
15
+ from ..retry import RetryConfig, with_retry
16
+
17
+
18
+ class ThordataHttpSession:
19
+ """
20
+ Wrapper around requests.Session with built-in retry and Thordata headers.
21
+ """
22
+
23
+ def __init__(
24
+ self,
25
+ timeout: int = 30,
26
+ retry_config: RetryConfig | None = None,
27
+ trust_env: bool = True,
28
+ ):
29
+ self._session = requests.Session()
30
+ self._session.trust_env = trust_env
31
+ self._timeout = timeout
32
+ self._retry_config = retry_config or RetryConfig()
33
+
34
+ # Default Headers
35
+ self._session.headers.update(
36
+ {
37
+ "User-Agent": build_user_agent(_sdk_version, "requests"),
38
+ "Accept-Encoding": "gzip, deflate",
39
+ }
40
+ )
41
+
42
+ def close(self) -> None:
43
+ self._session.close()
44
+
45
+ def request(
46
+ self,
47
+ method: str,
48
+ url: str,
49
+ params: dict[str, Any] | None = None,
50
+ data: Any = None,
51
+ headers: dict[str, str] | None = None,
52
+ timeout: int | None = None,
53
+ stream: bool = False,
54
+ ) -> requests.Response:
55
+ """
56
+ Execute HTTP request with automatic retry logic.
57
+ """
58
+ effective_timeout = timeout if timeout is not None else self._timeout
59
+
60
+ @with_retry(self._retry_config)
61
+ def _do_request() -> requests.Response:
62
+ return self._session.request(
63
+ method=method,
64
+ url=url,
65
+ params=params,
66
+ data=data,
67
+ headers=headers,
68
+ timeout=effective_timeout,
69
+ stream=stream,
70
+ )
71
+
72
+ try:
73
+ return _do_request()
74
+ except requests.Timeout as e:
75
+ raise ThordataTimeoutError(
76
+ f"Request timed out: {e}", original_error=e
77
+ ) from e
78
+ except requests.RequestException as e:
79
+ raise ThordataNetworkError(f"Request failed: {e}", original_error=e) from e
@@ -0,0 +1,287 @@
1
+ """
2
+ Low-level networking tunnel utilities.
3
+ Handles Upstream Proxies, TLS-in-TLS wrapping, and SOCKS handshakes.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import base64
9
+ import contextlib
10
+ import logging
11
+ import socket
12
+ import ssl
13
+ import time
14
+ from typing import Any
15
+ from urllib.parse import urlparse
16
+
17
+ try:
18
+ import socks
19
+
20
+ HAS_PYSOCKS = True
21
+ except ImportError:
22
+ HAS_PYSOCKS = False
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ def parse_upstream_proxy() -> dict[str, Any] | None:
28
+ import os
29
+
30
+ upstream_url = os.environ.get("THORDATA_UPSTREAM_PROXY", "").strip()
31
+ if not upstream_url:
32
+ return None
33
+
34
+ parsed = urlparse(upstream_url)
35
+ scheme = (parsed.scheme or "").lower()
36
+
37
+ # Normalize scheme
38
+ if scheme in ("socks5", "socks5h"):
39
+ scheme = "socks5"
40
+ elif scheme in ("http", "https"):
41
+ scheme = "http"
42
+ else:
43
+ return None
44
+
45
+ return {
46
+ "scheme": scheme,
47
+ "host": parsed.hostname or "127.0.0.1",
48
+ "port": parsed.port or 7890,
49
+ "username": parsed.username,
50
+ "password": parsed.password,
51
+ }
52
+
53
+
54
+ class UpstreamProxySocketFactory:
55
+ """Creates sockets routed through an upstream proxy."""
56
+
57
+ def __init__(self, upstream_config: dict[str, Any]):
58
+ self.config = upstream_config
59
+
60
+ def create_connection(
61
+ self,
62
+ address: tuple[str, int],
63
+ timeout: float | None = None,
64
+ ) -> socket.socket:
65
+ scheme = self.config["scheme"]
66
+ if timeout is None or timeout <= 0:
67
+ timeout = 30.0
68
+
69
+ if scheme == "socks5":
70
+ return self._create_socks_connection(address, timeout)
71
+ else:
72
+ return self._create_http_tunnel(address, timeout)
73
+
74
+ def _create_socks_connection(
75
+ self, address: tuple[str, int], timeout: float
76
+ ) -> socket.socket:
77
+ if not HAS_PYSOCKS:
78
+ raise ImportError("PySocks is required for upstream SOCKS proxy.")
79
+
80
+ sock = socks.socksocket(socket.AF_INET, socket.SOCK_STREAM)
81
+ sock.set_proxy(
82
+ socks.SOCKS5,
83
+ self.config["host"],
84
+ self.config["port"],
85
+ rdns=True,
86
+ username=self.config.get("username"),
87
+ password=self.config.get("password"),
88
+ )
89
+ sock.settimeout(timeout)
90
+ try:
91
+ sock.connect(address)
92
+ except Exception:
93
+ sock.close()
94
+ raise
95
+ return sock
96
+
97
+ def _create_http_tunnel(
98
+ self, address: tuple[str, int], timeout: float
99
+ ) -> socket.socket:
100
+ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
101
+ sock.settimeout(timeout)
102
+
103
+ try:
104
+ # 1. Connect to Upstream
105
+ sock.connect((self.config["host"], self.config["port"]))
106
+
107
+ # 2. Send CONNECT
108
+ target_host, target_port = address
109
+ connect_req = f"CONNECT {target_host}:{target_port} HTTP/1.1\r\n"
110
+ connect_req += f"Host: {target_host}:{target_port}\r\n"
111
+
112
+ if self.config.get("username"):
113
+ creds = f"{self.config['username']}:{self.config.get('password','')}"
114
+ b64_creds = base64.b64encode(creds.encode()).decode()
115
+ connect_req += f"Proxy-Authorization: Basic {b64_creds}\r\n"
116
+
117
+ connect_req += "\r\n"
118
+ sock.sendall(connect_req.encode())
119
+
120
+ # 3. Read Response (Byte by byte to avoid over-reading)
121
+ resp = b""
122
+ while b"\r\n\r\n" not in resp:
123
+ chunk = sock.recv(1)
124
+ if not chunk:
125
+ raise ConnectionError(
126
+ "Upstream proxy closed connection during CONNECT"
127
+ )
128
+ resp += chunk
129
+
130
+ # Fix: Decode bytes safely for string formatting
131
+ status_line = resp.split(b"\r\n")[0]
132
+ if b"200" not in status_line:
133
+ status_str = status_line.decode("utf-8", errors="replace")
134
+ raise ConnectionError(f"Upstream proxy CONNECT failed: {status_str}")
135
+
136
+ except Exception:
137
+ sock.close()
138
+ raise
139
+
140
+ return sock
141
+
142
+
143
+ class TLSInTLSSocket:
144
+ """
145
+ A wrapper around SSLObject to make it behave like a socket for requests.
146
+ """
147
+
148
+ def __init__(
149
+ self,
150
+ outer: ssl.SSLSocket,
151
+ ssl_obj: ssl.SSLObject,
152
+ incoming: ssl.MemoryBIO,
153
+ outgoing: ssl.MemoryBIO,
154
+ ):
155
+ self._outer = outer
156
+ self._ssl = ssl_obj
157
+ self._incoming = incoming
158
+ self._outgoing = outgoing
159
+ self._timeout: float | None = None
160
+
161
+ def settimeout(self, t: float | None) -> None:
162
+ self._timeout = t
163
+ self._outer.settimeout(t)
164
+
165
+ def sendall(self, data: bytes) -> None:
166
+ try:
167
+ self._ssl.write(data)
168
+ enc = self._outgoing.read()
169
+ if enc:
170
+ self._outer.sendall(enc)
171
+ except ssl.SSLWantReadError:
172
+ pass
173
+
174
+ def recv(self, bufsize: int) -> bytes:
175
+ while True:
176
+ try:
177
+ return self._ssl.read(bufsize)
178
+ except ssl.SSLWantReadError:
179
+ try:
180
+ data = self._outer.recv(8192)
181
+ if not data:
182
+ return b""
183
+ self._incoming.write(data)
184
+ except socket.timeout:
185
+ raise
186
+ except ssl.SSLWantWriteError:
187
+ enc = self._outgoing.read()
188
+ if enc:
189
+ self._outer.sendall(enc)
190
+
191
+ def close(self) -> None:
192
+ with contextlib.suppress(Exception):
193
+ self._outer.close()
194
+
195
+
196
+ def create_tls_in_tls(
197
+ outer_sock: ssl.SSLSocket, hostname: str, timeout: float
198
+ ) -> TLSInTLSSocket:
199
+ ctx = ssl.create_default_context()
200
+ incoming = ssl.MemoryBIO()
201
+ outgoing = ssl.MemoryBIO()
202
+ ssl_obj = ctx.wrap_bio(incoming, outgoing, server_hostname=hostname)
203
+
204
+ outer_sock.settimeout(timeout)
205
+ start_time = time.time()
206
+
207
+ while True:
208
+ if time.time() - start_time > timeout:
209
+ raise TimeoutError("TLS handshake timed out")
210
+ try:
211
+ ssl_obj.do_handshake()
212
+ break
213
+ except ssl.SSLWantReadError:
214
+ data = outgoing.read()
215
+ if data:
216
+ outer_sock.sendall(data)
217
+ try:
218
+ chunk = outer_sock.recv(4096)
219
+ if not chunk:
220
+ raise ConnectionError("Connection closed")
221
+ incoming.write(chunk)
222
+ except socket.timeout:
223
+ pass
224
+ except ssl.SSLWantWriteError:
225
+ data = outgoing.read()
226
+ if data:
227
+ outer_sock.sendall(data)
228
+
229
+ return TLSInTLSSocket(outer_sock, ssl_obj, incoming, outgoing)
230
+
231
+
232
+ def socks5_handshake(
233
+ sock: socket.socket,
234
+ target_host: str,
235
+ target_port: int,
236
+ user: str | None,
237
+ pwd: str | None,
238
+ ) -> socket.socket:
239
+ # 1. Auth Method
240
+ if user and pwd:
241
+ sock.sendall(b"\x05\x02\x00\x02")
242
+ else:
243
+ sock.sendall(b"\x05\x01\x00")
244
+
245
+ resp = sock.recv(2)
246
+ if not resp or resp[0] != 0x05:
247
+ # Fix: Decode bytes safely
248
+ resp_str = resp.decode("utf-8", errors="replace") if resp else "Empty"
249
+ raise ConnectionError(f"Invalid SOCKS5 init response: {resp_str}")
250
+
251
+ if resp[1] == 0x02: # User/Pass
252
+ u_bytes = (user or "").encode()
253
+ p_bytes = (pwd or "").encode()
254
+ auth_payload = (
255
+ b"\x01" + bytes([len(u_bytes)]) + u_bytes + bytes([len(p_bytes)]) + p_bytes
256
+ )
257
+ sock.sendall(auth_payload)
258
+ auth_resp = sock.recv(2)
259
+ if not auth_resp or auth_resp[1] != 0x00:
260
+ raise ConnectionError("SOCKS5 Authentication failed")
261
+ elif resp[1] == 0xFF:
262
+ raise ConnectionError("No acceptable authentication methods")
263
+
264
+ # 2. Connect
265
+ req = (
266
+ b"\x05\x01\x00\x03"
267
+ + bytes([len(target_host)])
268
+ + target_host.encode()
269
+ + target_port.to_bytes(2, "big")
270
+ )
271
+ sock.sendall(req)
272
+
273
+ # 3. Response
274
+ resp = sock.recv(4)
275
+ if not resp or resp[1] != 0x00:
276
+ err = str(resp[1]) if (resp and len(resp) > 1) else "Empty/Unknown"
277
+ raise ConnectionError(f"SOCKS5 Connect failed, error code: {err}")
278
+
279
+ atype = resp[3]
280
+ if atype == 1:
281
+ sock.recv(4 + 2)
282
+ elif atype == 3:
283
+ sock.recv(sock.recv(1)[0] + 2)
284
+ elif atype == 4:
285
+ sock.recv(16 + 2)
286
+
287
+ return sock