src-py-lib 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,279 @@
1
+ """Shared HTTP transport with timeouts, retries, and useful errors."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import logging
7
+ import random
8
+ import time
9
+ import urllib.parse
10
+ from collections.abc import Iterable, Mapping
11
+ from dataclasses import dataclass, field
12
+ from typing import Final, cast
13
+
14
+ import httpx
15
+
16
+ from src_py_lib.utils.json_types import JSONDict, json_dict
17
+ from src_py_lib.utils.logging import event, record_http_attempt, record_http_retry
18
+
19
+ DEFAULT_TIMEOUT_SECONDS: Final[float] = 30.0
20
+ DEFAULT_MAX_CONNECTIONS: Final[int] = 20
21
+ DEFAULT_MAX_ATTEMPTS: Final[int] = 3
22
+ DEFAULT_RETRY_BASE_DELAY_SECONDS: Final[float] = 0.5
23
+ DEFAULT_RETRY_MAX_DELAY_SECONDS: Final[float] = 30.0
24
+ RETRYABLE_STATUS_CODES: Final[frozenset[int]] = frozenset({408, 429, 500, 502, 503, 504})
25
+ ERROR_BODY_PREVIEW_CHARS: Final[int] = 500
26
+ REDACTED_HEADER_VALUE: Final[str] = "[redacted]"
27
+ SENSITIVE_HEADER_FRAGMENTS: Final[tuple[str, ...]] = (
28
+ "api-key",
29
+ "api_key",
30
+ "authorization",
31
+ "cookie",
32
+ "password",
33
+ "secret",
34
+ "token",
35
+ )
36
+
37
+ logger = logging.getLogger(__name__)
38
+
39
+
40
+ class HTTPClientError(RuntimeError):
41
+ """Raised when an HTTP request fails after retries."""
42
+
43
+ def __init__(
44
+ self,
45
+ message: str,
46
+ *,
47
+ status_code: int | None = None,
48
+ body: str = "",
49
+ headers: Mapping[str, str] | None = None,
50
+ ) -> None:
51
+ super().__init__(message)
52
+ self.status_code = status_code
53
+ self.body = body
54
+ self.headers = {key.lower(): value for key, value in dict(headers or {}).items()}
55
+
56
+
57
+ @dataclass
58
+ class HTTPClient:
59
+ """HTTPX-backed HTTP client for JSON APIs with pooled connections."""
60
+
61
+ timeout: float | httpx.Timeout = DEFAULT_TIMEOUT_SECONDS
62
+ user_agent: str = "src-py-lib"
63
+ max_connections: int = DEFAULT_MAX_CONNECTIONS
64
+ max_attempts: int = DEFAULT_MAX_ATTEMPTS
65
+ retry_base_delay_seconds: float = DEFAULT_RETRY_BASE_DELAY_SECONDS
66
+ retry_max_delay_seconds: float = DEFAULT_RETRY_MAX_DELAY_SECONDS
67
+ retryable_status_codes: frozenset[int] = RETRYABLE_STATUS_CODES
68
+ transport: httpx.BaseTransport | None = field(default=None, repr=False)
69
+ _client: httpx.Client = field(init=False, repr=False)
70
+
71
+ def __post_init__(self) -> None:
72
+ if self.max_connections < 1:
73
+ raise ValueError("max_connections must be at least 1")
74
+ if self.max_attempts < 1:
75
+ raise ValueError("max_attempts must be at least 1")
76
+ self._client = httpx.Client(
77
+ timeout=self.timeout,
78
+ limits=httpx.Limits(
79
+ max_connections=self.max_connections,
80
+ max_keepalive_connections=self.max_connections,
81
+ ),
82
+ transport=self.transport,
83
+ )
84
+
85
+ def __enter__(self) -> HTTPClient:
86
+ return self
87
+
88
+ def __exit__(self, *_args: object) -> None:
89
+ self.close()
90
+
91
+ def close(self) -> None:
92
+ """Close the underlying pooled HTTP transport."""
93
+ self._client.close()
94
+
95
+ def request(
96
+ self,
97
+ method: str,
98
+ url: str,
99
+ *,
100
+ headers: Mapping[str, str] | None = None,
101
+ query: Mapping[str, str | int | float | bool | None] | None = None,
102
+ json_body: object | None = None,
103
+ data: bytes | None = None,
104
+ ) -> bytes:
105
+ """Make an HTTP request and return raw response bytes."""
106
+ request_url = _with_query(url, query)
107
+ body = data
108
+ request_headers = {"User-Agent": self.user_agent, **dict(headers or {})}
109
+ if json_body is not None:
110
+ body = json.dumps(json_body).encode("utf-8")
111
+ request_headers.setdefault("Content-Type", "application/json")
112
+ for attempt in range(1, self.max_attempts + 1):
113
+ try:
114
+ with event(
115
+ "http_request",
116
+ level="debug",
117
+ method=method,
118
+ url=_safe_url(request_url),
119
+ attempt=attempt,
120
+ request_headers=_headers_for_log(request_headers),
121
+ request_bytes=len(body or b""),
122
+ ) as fields:
123
+ response = self._client.request(
124
+ method,
125
+ request_url,
126
+ headers=request_headers,
127
+ content=body,
128
+ )
129
+ payload = response.content
130
+ fields["status_code"] = response.status_code
131
+ fields["reason_phrase"] = response.reason_phrase
132
+ fields["response_headers"] = _headers_for_log(response.headers)
133
+ fields["response_bytes"] = len(payload)
134
+ http_version = _response_http_version(response)
135
+ if http_version is not None:
136
+ fields["http_version"] = http_version
137
+ record_http_attempt(
138
+ request_bytes=len(body or b""),
139
+ response_bytes=len(payload),
140
+ status_code=response.status_code,
141
+ )
142
+ if response.status_code >= 400:
143
+ body_text = _body_preview(payload)
144
+ if not self._should_retry(response.status_code, attempt):
145
+ raise HTTPClientError(
146
+ f"HTTP {response.status_code} for {method} "
147
+ f"{_safe_url(request_url)}: {body_text}",
148
+ status_code=response.status_code,
149
+ body=body_text,
150
+ headers=dict(response.headers),
151
+ )
152
+ record_http_retry()
153
+ self._sleep_before_retry(attempt, response.headers.get("Retry-After"))
154
+ else:
155
+ return payload
156
+ except HTTPClientError:
157
+ raise
158
+ except httpx.TransportError as exception:
159
+ record_http_attempt(request_bytes=len(body or b""), transport_error=True)
160
+ if not self._should_retry(None, attempt):
161
+ failure = (
162
+ "timed out" if isinstance(exception, httpx.TimeoutException) else "failed"
163
+ )
164
+ raise HTTPClientError(
165
+ f"HTTP request {failure} for {method} {_safe_url(request_url)}: "
166
+ f"{_exception_message(exception)}"
167
+ ) from exception
168
+ record_http_retry()
169
+ self._sleep_before_retry(attempt, None)
170
+ raise AssertionError("HTTP retry loop exited without returning or raising")
171
+
172
+ def json(
173
+ self,
174
+ method: str,
175
+ url: str,
176
+ *,
177
+ headers: Mapping[str, str] | None = None,
178
+ query: Mapping[str, str | int | float | bool | None] | None = None,
179
+ json_body: object | None = None,
180
+ ) -> JSONDict:
181
+ """Make an HTTP request and decode a JSON object response."""
182
+ raw = self.request(method, url, headers=headers, query=query, json_body=json_body)
183
+ try:
184
+ return json_dict(json.loads(raw.decode("utf-8")) if raw else {})
185
+ except json.JSONDecodeError as exception:
186
+ raise HTTPClientError(
187
+ f"Invalid JSON response from {method} {_safe_url(url)}"
188
+ ) from exception
189
+
190
+ def _should_retry(self, status_code: int | None, attempt: int) -> bool:
191
+ if attempt >= self.max_attempts:
192
+ return False
193
+ return status_code is None or status_code in self.retryable_status_codes
194
+
195
+ def _sleep_before_retry(self, attempt: int, retry_after: str | None) -> None:
196
+ delay = retry_after_seconds(retry_after)
197
+ if delay is None:
198
+ delay = min(
199
+ self.retry_base_delay_seconds * (2 ** (attempt - 1)),
200
+ self.retry_max_delay_seconds,
201
+ ) * random.uniform(0.5, 1.5)
202
+ logger.warning("HTTP request failed; retrying in %.2fs (attempt %d).", delay, attempt + 1)
203
+ time.sleep(delay)
204
+
205
+
206
+ def _with_query(
207
+ url: str,
208
+ query: Mapping[str, str | int | float | bool | None] | None,
209
+ ) -> str:
210
+ if not query:
211
+ return url
212
+ filtered = {key: value for key, value in query.items() if value is not None}
213
+ separator = "&" if urllib.parse.urlsplit(url).query else "?"
214
+ return f"{url}{separator}{urllib.parse.urlencode(filtered)}"
215
+
216
+
217
+ def _safe_url(url: str) -> str:
218
+ split = urllib.parse.urlsplit(url)
219
+ return urllib.parse.urlunsplit((split.scheme, split.netloc, split.path, split.query, ""))
220
+
221
+
222
+ def _headers_for_log(headers: Mapping[str, str] | httpx.Headers) -> dict[str, str | list[str]]:
223
+ values: dict[str, str | list[str]] = {}
224
+ for name, value in _header_items(headers):
225
+ key = name.lower()
226
+ logged_value = REDACTED_HEADER_VALUE if _is_sensitive_header(key) else value
227
+ existing = values.get(key)
228
+ if existing is None:
229
+ values[key] = logged_value
230
+ elif isinstance(existing, list):
231
+ existing.append(logged_value)
232
+ else:
233
+ values[key] = [existing, logged_value]
234
+ return {key: values[key] for key in sorted(values)}
235
+
236
+
237
+ def _header_items(headers: Mapping[str, str] | httpx.Headers) -> Iterable[tuple[str, str]]:
238
+ if isinstance(headers, httpx.Headers):
239
+ return headers.multi_items()
240
+ return headers.items()
241
+
242
+
243
+ def _is_sensitive_header(name: str) -> bool:
244
+ lowered = name.lower()
245
+ return any(fragment in lowered for fragment in SENSITIVE_HEADER_FRAGMENTS)
246
+
247
+
248
+ def _response_http_version(response: httpx.Response) -> str | None:
249
+ version = response.extensions.get("http_version")
250
+ if isinstance(version, bytes):
251
+ return version.decode("latin-1", errors="replace")
252
+ if isinstance(version, str):
253
+ return version
254
+ return None
255
+
256
+
257
+ def _body_preview(raw: bytes) -> str:
258
+ text = raw.decode("utf-8", errors="replace").strip()
259
+ if len(text) <= ERROR_BODY_PREVIEW_CHARS:
260
+ return text
261
+ return f"{text[:ERROR_BODY_PREVIEW_CHARS]}... (+{len(text) - ERROR_BODY_PREVIEW_CHARS} chars)"
262
+
263
+
264
+ def _exception_message(exception: Exception) -> str:
265
+ return str(exception) or type(exception).__name__
266
+
267
+
268
+ def retry_after_seconds(value: str | None) -> float | None:
269
+ if not value:
270
+ return None
271
+ try:
272
+ return max(float(value), 0.0)
273
+ except ValueError:
274
+ return None
275
+
276
+
277
+ def cast_json_dict(value: object) -> JSONDict:
278
+ """Compatibility wrapper for call sites that want an explicit boundary cast."""
279
+ return cast(JSONDict, value) if isinstance(value, dict) else {}
@@ -0,0 +1,42 @@
1
+ """Small on-disk JSON cache helpers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from collections.abc import Callable, Mapping
7
+ from pathlib import Path
8
+ from typing import Any, TypeVar, cast
9
+
10
+ Entry = TypeVar("Entry")
11
+
12
+
13
+ def load_json_cache(
14
+ path: Path,
15
+ parse: Callable[[Any], Entry] | None = None,
16
+ ) -> dict[str, Entry]:
17
+ """Load `path` as a string-keyed cache. Missing files return `{}`."""
18
+ if not path.exists():
19
+ return {}
20
+ raw = cast(dict[str, Any], json.loads(path.read_text(encoding="utf-8")))
21
+ if parse is None:
22
+ return cast(dict[str, Entry], raw)
23
+ return {key: parse(value) for key, value in raw.items()}
24
+
25
+
26
+ def save_json_cache(path: Path, cache: Mapping[str, object]) -> None:
27
+ """Write a string-keyed JSON cache with stable formatting."""
28
+ path.parent.mkdir(parents=True, exist_ok=True)
29
+ path.write_text(json.dumps(dict(cache), indent=2, sort_keys=True) + "\n", encoding="utf-8")
30
+
31
+
32
+ def load_json_subset(
33
+ path: Path,
34
+ keys: list[str],
35
+ parse: Callable[[Any], Entry] | None = None,
36
+ ) -> dict[str, Entry]:
37
+ """Load only `keys` that are present in a string-keyed JSON cache."""
38
+ cache = load_json_cache(path, parse=parse)
39
+ return {key: cache[key] for key in keys if key in cache}
40
+
41
+
42
+ __all__ = ["load_json_cache", "load_json_subset", "save_json_cache"]
@@ -0,0 +1,54 @@
1
+ """Small JSON type aliases and projection helpers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any, TypeAlias, cast
6
+
7
+ JSONValue: TypeAlias = None | bool | int | float | str | list["JSONValue"] | dict[str, "JSONValue"]
8
+ JSONDict: TypeAlias = dict[str, JSONValue]
9
+ JSONArray: TypeAlias = list[JSONValue]
10
+
11
+
12
+ def json_dict(value: object) -> JSONDict:
13
+ """Return `value` as a JSON object, or an empty object when it is not one."""
14
+ return cast(JSONDict, value) if isinstance(value, dict) else {}
15
+
16
+
17
+ def json_list(value: object) -> JSONArray:
18
+ """Return `value` as a JSON array, or an empty array when it is not one."""
19
+ return cast(JSONArray, value) if isinstance(value, list) else []
20
+
21
+
22
+ def json_dicts(value: object) -> list[JSONDict]:
23
+ """Return `value` as a list of JSON objects, filtering non-objects out."""
24
+ if not isinstance(value, list):
25
+ return []
26
+ items = cast(list[object], value)
27
+ return [cast(JSONDict, item) for item in items if isinstance(item, dict)]
28
+
29
+
30
+ def json_strs(value: object) -> list[str]:
31
+ """Return `value` as a list of strings, filtering non-strings out."""
32
+ if not isinstance(value, list):
33
+ return []
34
+ items = cast(list[object], value)
35
+ return [item for item in items if isinstance(item, str)]
36
+
37
+
38
+ def json_str(mapping: JSONDict, key: str, default: str = "") -> str:
39
+ """Read a string value from a JSON object."""
40
+ value = mapping.get(key)
41
+ return value if isinstance(value, str) else default
42
+
43
+
44
+ def json_int(mapping: JSONDict, key: str, default: int = 0) -> int:
45
+ """Read an integer value from a JSON object, excluding booleans."""
46
+ value = mapping.get(key)
47
+ return value if isinstance(value, int) and not isinstance(value, bool) else default
48
+
49
+
50
+ def require_json_dict(value: Any, *, where: str) -> JSONDict:
51
+ """Return `value` as a JSON object, or raise a clear error."""
52
+ if isinstance(value, dict):
53
+ return cast(JSONDict, value)
54
+ raise TypeError(f"{where} must be a JSON object")