svc-infra 0.1.595__py3-none-any.whl → 0.1.706__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of svc-infra might be problematic. Click here for more details.

Files changed (256) hide show
  1. svc_infra/__init__.py +58 -2
  2. svc_infra/apf_payments/models.py +133 -42
  3. svc_infra/apf_payments/provider/aiydan.py +121 -47
  4. svc_infra/apf_payments/provider/base.py +30 -9
  5. svc_infra/apf_payments/provider/stripe.py +156 -62
  6. svc_infra/apf_payments/schemas.py +18 -9
  7. svc_infra/apf_payments/service.py +98 -41
  8. svc_infra/apf_payments/settings.py +5 -1
  9. svc_infra/api/__init__.py +61 -0
  10. svc_infra/api/fastapi/__init__.py +15 -0
  11. svc_infra/api/fastapi/admin/__init__.py +3 -0
  12. svc_infra/api/fastapi/admin/add.py +245 -0
  13. svc_infra/api/fastapi/apf_payments/router.py +128 -70
  14. svc_infra/api/fastapi/apf_payments/setup.py +13 -6
  15. svc_infra/api/fastapi/auth/__init__.py +65 -0
  16. svc_infra/api/fastapi/auth/_cookies.py +6 -2
  17. svc_infra/api/fastapi/auth/add.py +17 -14
  18. svc_infra/api/fastapi/auth/gaurd.py +45 -16
  19. svc_infra/api/fastapi/auth/mfa/models.py +3 -1
  20. svc_infra/api/fastapi/auth/mfa/pre_auth.py +10 -6
  21. svc_infra/api/fastapi/auth/mfa/router.py +15 -8
  22. svc_infra/api/fastapi/auth/mfa/security.py +1 -2
  23. svc_infra/api/fastapi/auth/mfa/utils.py +2 -1
  24. svc_infra/api/fastapi/auth/mfa/verify.py +9 -2
  25. svc_infra/api/fastapi/auth/policy.py +0 -1
  26. svc_infra/api/fastapi/auth/providers.py +3 -1
  27. svc_infra/api/fastapi/auth/routers/apikey_router.py +6 -6
  28. svc_infra/api/fastapi/auth/routers/oauth_router.py +146 -52
  29. svc_infra/api/fastapi/auth/routers/session_router.py +6 -2
  30. svc_infra/api/fastapi/auth/security.py +31 -10
  31. svc_infra/api/fastapi/auth/sender.py +8 -1
  32. svc_infra/api/fastapi/auth/state.py +3 -1
  33. svc_infra/api/fastapi/auth/ws_security.py +275 -0
  34. svc_infra/api/fastapi/billing/router.py +73 -0
  35. svc_infra/api/fastapi/billing/setup.py +19 -0
  36. svc_infra/api/fastapi/cache/add.py +9 -5
  37. svc_infra/api/fastapi/db/__init__.py +5 -1
  38. svc_infra/api/fastapi/db/http.py +3 -1
  39. svc_infra/api/fastapi/db/nosql/__init__.py +39 -1
  40. svc_infra/api/fastapi/db/nosql/mongo/add.py +47 -32
  41. svc_infra/api/fastapi/db/nosql/mongo/crud_router.py +30 -11
  42. svc_infra/api/fastapi/db/sql/__init__.py +5 -1
  43. svc_infra/api/fastapi/db/sql/add.py +71 -26
  44. svc_infra/api/fastapi/db/sql/crud_router.py +210 -22
  45. svc_infra/api/fastapi/db/sql/health.py +3 -1
  46. svc_infra/api/fastapi/db/sql/session.py +18 -0
  47. svc_infra/api/fastapi/db/sql/users.py +18 -6
  48. svc_infra/api/fastapi/dependencies/ratelimit.py +78 -14
  49. svc_infra/api/fastapi/docs/add.py +173 -0
  50. svc_infra/api/fastapi/docs/landing.py +4 -2
  51. svc_infra/api/fastapi/docs/scoped.py +62 -15
  52. svc_infra/api/fastapi/dual/__init__.py +12 -2
  53. svc_infra/api/fastapi/dual/dualize.py +1 -1
  54. svc_infra/api/fastapi/dual/protected.py +126 -4
  55. svc_infra/api/fastapi/dual/public.py +25 -0
  56. svc_infra/api/fastapi/dual/router.py +40 -13
  57. svc_infra/api/fastapi/dx.py +33 -2
  58. svc_infra/api/fastapi/ease.py +10 -2
  59. svc_infra/api/fastapi/http/concurrency.py +2 -1
  60. svc_infra/api/fastapi/http/conditional.py +3 -1
  61. svc_infra/api/fastapi/middleware/debug.py +4 -1
  62. svc_infra/api/fastapi/middleware/errors/catchall.py +6 -2
  63. svc_infra/api/fastapi/middleware/errors/exceptions.py +1 -1
  64. svc_infra/api/fastapi/middleware/errors/handlers.py +54 -8
  65. svc_infra/api/fastapi/middleware/graceful_shutdown.py +104 -0
  66. svc_infra/api/fastapi/middleware/idempotency.py +197 -70
  67. svc_infra/api/fastapi/middleware/idempotency_store.py +187 -0
  68. svc_infra/api/fastapi/middleware/optimistic_lock.py +42 -0
  69. svc_infra/api/fastapi/middleware/ratelimit.py +125 -28
  70. svc_infra/api/fastapi/middleware/ratelimit_store.py +43 -10
  71. svc_infra/api/fastapi/middleware/request_id.py +27 -11
  72. svc_infra/api/fastapi/middleware/request_size_limit.py +3 -3
  73. svc_infra/api/fastapi/middleware/timeout.py +177 -0
  74. svc_infra/api/fastapi/openapi/apply.py +5 -3
  75. svc_infra/api/fastapi/openapi/conventions.py +9 -2
  76. svc_infra/api/fastapi/openapi/mutators.py +165 -20
  77. svc_infra/api/fastapi/openapi/pipeline.py +1 -1
  78. svc_infra/api/fastapi/openapi/security.py +3 -1
  79. svc_infra/api/fastapi/ops/add.py +75 -0
  80. svc_infra/api/fastapi/pagination.py +47 -20
  81. svc_infra/api/fastapi/routers/__init__.py +43 -15
  82. svc_infra/api/fastapi/routers/ping.py +1 -0
  83. svc_infra/api/fastapi/setup.py +188 -57
  84. svc_infra/api/fastapi/tenancy/add.py +19 -0
  85. svc_infra/api/fastapi/tenancy/context.py +112 -0
  86. svc_infra/api/fastapi/versioned.py +101 -0
  87. svc_infra/app/README.md +5 -5
  88. svc_infra/app/__init__.py +3 -1
  89. svc_infra/app/env.py +69 -1
  90. svc_infra/app/logging/add.py +9 -2
  91. svc_infra/app/logging/formats.py +12 -5
  92. svc_infra/billing/__init__.py +23 -0
  93. svc_infra/billing/async_service.py +147 -0
  94. svc_infra/billing/jobs.py +241 -0
  95. svc_infra/billing/models.py +177 -0
  96. svc_infra/billing/quotas.py +103 -0
  97. svc_infra/billing/schemas.py +36 -0
  98. svc_infra/billing/service.py +123 -0
  99. svc_infra/bundled_docs/README.md +5 -0
  100. svc_infra/bundled_docs/__init__.py +1 -0
  101. svc_infra/bundled_docs/getting-started.md +6 -0
  102. svc_infra/cache/__init__.py +9 -0
  103. svc_infra/cache/add.py +170 -0
  104. svc_infra/cache/backend.py +7 -6
  105. svc_infra/cache/decorators.py +81 -15
  106. svc_infra/cache/demo.py +2 -2
  107. svc_infra/cache/keys.py +24 -4
  108. svc_infra/cache/recache.py +26 -14
  109. svc_infra/cache/resources.py +14 -5
  110. svc_infra/cache/tags.py +19 -44
  111. svc_infra/cache/utils.py +3 -1
  112. svc_infra/cli/__init__.py +52 -8
  113. svc_infra/cli/__main__.py +4 -0
  114. svc_infra/cli/cmds/__init__.py +39 -2
  115. svc_infra/cli/cmds/db/nosql/mongo/mongo_cmds.py +7 -4
  116. svc_infra/cli/cmds/db/nosql/mongo/mongo_scaffold_cmds.py +7 -5
  117. svc_infra/cli/cmds/db/ops_cmds.py +270 -0
  118. svc_infra/cli/cmds/db/sql/alembic_cmds.py +103 -18
  119. svc_infra/cli/cmds/db/sql/sql_export_cmds.py +88 -0
  120. svc_infra/cli/cmds/db/sql/sql_scaffold_cmds.py +3 -3
  121. svc_infra/cli/cmds/docs/docs_cmds.py +142 -0
  122. svc_infra/cli/cmds/dx/__init__.py +12 -0
  123. svc_infra/cli/cmds/dx/dx_cmds.py +116 -0
  124. svc_infra/cli/cmds/health/__init__.py +179 -0
  125. svc_infra/cli/cmds/health/health_cmds.py +8 -0
  126. svc_infra/cli/cmds/help.py +4 -0
  127. svc_infra/cli/cmds/jobs/__init__.py +1 -0
  128. svc_infra/cli/cmds/jobs/jobs_cmds.py +47 -0
  129. svc_infra/cli/cmds/obs/obs_cmds.py +36 -15
  130. svc_infra/cli/cmds/sdk/__init__.py +0 -0
  131. svc_infra/cli/cmds/sdk/sdk_cmds.py +112 -0
  132. svc_infra/cli/foundation/runner.py +6 -2
  133. svc_infra/data/add.py +61 -0
  134. svc_infra/data/backup.py +58 -0
  135. svc_infra/data/erasure.py +45 -0
  136. svc_infra/data/fixtures.py +42 -0
  137. svc_infra/data/retention.py +61 -0
  138. svc_infra/db/__init__.py +15 -0
  139. svc_infra/db/crud_schema.py +9 -9
  140. svc_infra/db/inbox.py +67 -0
  141. svc_infra/db/nosql/__init__.py +3 -0
  142. svc_infra/db/nosql/core.py +30 -9
  143. svc_infra/db/nosql/indexes.py +3 -1
  144. svc_infra/db/nosql/management.py +1 -1
  145. svc_infra/db/nosql/mongo/README.md +13 -13
  146. svc_infra/db/nosql/mongo/client.py +19 -2
  147. svc_infra/db/nosql/mongo/settings.py +6 -2
  148. svc_infra/db/nosql/repository.py +35 -15
  149. svc_infra/db/nosql/resource.py +20 -3
  150. svc_infra/db/nosql/scaffold.py +9 -3
  151. svc_infra/db/nosql/service.py +3 -1
  152. svc_infra/db/nosql/types.py +6 -2
  153. svc_infra/db/ops.py +384 -0
  154. svc_infra/db/outbox.py +108 -0
  155. svc_infra/db/sql/apikey.py +37 -9
  156. svc_infra/db/sql/authref.py +9 -3
  157. svc_infra/db/sql/constants.py +12 -8
  158. svc_infra/db/sql/core.py +2 -2
  159. svc_infra/db/sql/management.py +11 -8
  160. svc_infra/db/sql/repository.py +99 -26
  161. svc_infra/db/sql/resource.py +5 -0
  162. svc_infra/db/sql/scaffold.py +6 -2
  163. svc_infra/db/sql/service.py +15 -5
  164. svc_infra/db/sql/templates/models_schemas/auth/models.py.tmpl +7 -56
  165. svc_infra/db/sql/templates/setup/env_async.py.tmpl +34 -12
  166. svc_infra/db/sql/templates/setup/env_sync.py.tmpl +29 -7
  167. svc_infra/db/sql/tenant.py +88 -0
  168. svc_infra/db/sql/uniq_hooks.py +9 -3
  169. svc_infra/db/sql/utils.py +138 -51
  170. svc_infra/db/sql/versioning.py +14 -0
  171. svc_infra/deploy/__init__.py +538 -0
  172. svc_infra/documents/__init__.py +100 -0
  173. svc_infra/documents/add.py +264 -0
  174. svc_infra/documents/ease.py +233 -0
  175. svc_infra/documents/models.py +114 -0
  176. svc_infra/documents/storage.py +264 -0
  177. svc_infra/dx/add.py +65 -0
  178. svc_infra/dx/changelog.py +74 -0
  179. svc_infra/dx/checks.py +68 -0
  180. svc_infra/exceptions.py +141 -0
  181. svc_infra/health/__init__.py +864 -0
  182. svc_infra/http/__init__.py +13 -0
  183. svc_infra/http/client.py +105 -0
  184. svc_infra/jobs/builtins/outbox_processor.py +40 -0
  185. svc_infra/jobs/builtins/webhook_delivery.py +95 -0
  186. svc_infra/jobs/easy.py +33 -0
  187. svc_infra/jobs/loader.py +50 -0
  188. svc_infra/jobs/queue.py +116 -0
  189. svc_infra/jobs/redis_queue.py +256 -0
  190. svc_infra/jobs/runner.py +79 -0
  191. svc_infra/jobs/scheduler.py +53 -0
  192. svc_infra/jobs/worker.py +40 -0
  193. svc_infra/loaders/__init__.py +186 -0
  194. svc_infra/loaders/base.py +142 -0
  195. svc_infra/loaders/github.py +311 -0
  196. svc_infra/loaders/models.py +147 -0
  197. svc_infra/loaders/url.py +235 -0
  198. svc_infra/logging/__init__.py +374 -0
  199. svc_infra/mcp/svc_infra_mcp.py +91 -33
  200. svc_infra/obs/README.md +2 -0
  201. svc_infra/obs/add.py +65 -9
  202. svc_infra/obs/cloud_dash.py +2 -1
  203. svc_infra/obs/grafana/dashboards/http-overview.json +45 -0
  204. svc_infra/obs/metrics/__init__.py +3 -4
  205. svc_infra/obs/metrics/asgi.py +13 -7
  206. svc_infra/obs/metrics/http.py +9 -5
  207. svc_infra/obs/metrics/sqlalchemy.py +13 -9
  208. svc_infra/obs/metrics.py +6 -5
  209. svc_infra/obs/settings.py +6 -2
  210. svc_infra/security/add.py +217 -0
  211. svc_infra/security/audit.py +92 -10
  212. svc_infra/security/audit_service.py +4 -3
  213. svc_infra/security/headers.py +15 -2
  214. svc_infra/security/hibp.py +14 -4
  215. svc_infra/security/jwt_rotation.py +74 -22
  216. svc_infra/security/lockout.py +11 -5
  217. svc_infra/security/models.py +54 -12
  218. svc_infra/security/oauth_models.py +73 -0
  219. svc_infra/security/org_invites.py +5 -3
  220. svc_infra/security/passwords.py +3 -1
  221. svc_infra/security/permissions.py +25 -2
  222. svc_infra/security/session.py +1 -1
  223. svc_infra/security/signed_cookies.py +21 -1
  224. svc_infra/storage/__init__.py +93 -0
  225. svc_infra/storage/add.py +253 -0
  226. svc_infra/storage/backends/__init__.py +11 -0
  227. svc_infra/storage/backends/local.py +339 -0
  228. svc_infra/storage/backends/memory.py +216 -0
  229. svc_infra/storage/backends/s3.py +353 -0
  230. svc_infra/storage/base.py +239 -0
  231. svc_infra/storage/easy.py +185 -0
  232. svc_infra/storage/settings.py +195 -0
  233. svc_infra/testing/__init__.py +685 -0
  234. svc_infra/utils.py +7 -3
  235. svc_infra/webhooks/__init__.py +69 -0
  236. svc_infra/webhooks/add.py +339 -0
  237. svc_infra/webhooks/encryption.py +115 -0
  238. svc_infra/webhooks/fastapi.py +39 -0
  239. svc_infra/webhooks/router.py +55 -0
  240. svc_infra/webhooks/service.py +70 -0
  241. svc_infra/webhooks/signing.py +34 -0
  242. svc_infra/websocket/__init__.py +79 -0
  243. svc_infra/websocket/add.py +140 -0
  244. svc_infra/websocket/client.py +282 -0
  245. svc_infra/websocket/config.py +69 -0
  246. svc_infra/websocket/easy.py +76 -0
  247. svc_infra/websocket/exceptions.py +61 -0
  248. svc_infra/websocket/manager.py +344 -0
  249. svc_infra/websocket/models.py +49 -0
  250. svc_infra-0.1.706.dist-info/LICENSE +21 -0
  251. svc_infra-0.1.706.dist-info/METADATA +356 -0
  252. svc_infra-0.1.706.dist-info/RECORD +357 -0
  253. svc_infra-0.1.595.dist-info/METADATA +0 -80
  254. svc_infra-0.1.595.dist-info/RECORD +0 -253
  255. {svc_infra-0.1.595.dist-info → svc_infra-0.1.706.dist-info}/WHEEL +0 -0
  256. {svc_infra-0.1.595.dist-info → svc_infra-0.1.706.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,235 @@
1
+ """URL content loader.
2
+
3
+ Load content from URLs with automatic HTML text extraction.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import logging
9
+ import re
10
+ from typing import Any
11
+
12
+ import httpx
13
+
14
+ from .base import BaseLoader, ErrorStrategy
15
+ from .models import LoadedContent
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class URLLoader(BaseLoader):
21
+ """Load content from one or more URLs.
22
+
23
+ Fetches content from URLs and optionally extracts readable text from HTML.
24
+ Supports redirects, custom headers, and batch loading.
25
+
26
+ Args:
27
+ urls: Single URL or list of URLs to load.
28
+ headers: Optional HTTP headers to send with requests.
29
+ extract_text: If True (default), extract readable text from HTML pages.
30
+ Raw HTML is returned if False or if content is not HTML.
31
+ follow_redirects: Follow HTTP redirects (default: True).
32
+ timeout: Request timeout in seconds (default: 30).
33
+ extra_metadata: Additional metadata to attach to all loaded content.
34
+ on_error: How to handle errors ("skip" or "raise"). Default: "skip"
35
+
36
+ Example:
37
+ >>> # Load single URL
38
+ >>> loader = URLLoader("https://example.com/docs/guide.md")
39
+ >>> contents = await loader.load()
40
+ >>> print(contents[0].content[:100])
41
+ >>>
42
+ >>> # Load multiple URLs
43
+ >>> loader = URLLoader([
44
+ ... "https://example.com/page1",
45
+ ... "https://example.com/page2",
46
+ ... ])
47
+ >>> contents = await loader.load()
48
+ >>>
49
+ >>> # Disable HTML text extraction
50
+ >>> loader = URLLoader("https://example.com", extract_text=False)
51
+ >>> contents = await loader.load() # Returns raw HTML
52
+ >>>
53
+ >>> # With custom headers (e.g., for APIs)
54
+ >>> loader = URLLoader(
55
+ ... "https://api.example.com/docs",
56
+ ... headers={"Authorization": "Bearer token123"},
57
+ ... )
58
+ >>> contents = await loader.load()
59
+
60
+ Note:
61
+ - HTML text extraction removes scripts, styles, nav, footer, etc.
62
+ - If BeautifulSoup is not installed, falls back to basic regex extraction
63
+ - Content type is detected from HTTP headers
64
+ """
65
+
66
+ def __init__(
67
+ self,
68
+ urls: str | list[str],
69
+ headers: dict[str, str] | None = None,
70
+ extract_text: bool = True,
71
+ follow_redirects: bool = True,
72
+ timeout: float = 30.0,
73
+ extra_metadata: dict[str, Any] | None = None,
74
+ on_error: ErrorStrategy = "skip",
75
+ ) -> None:
76
+ """Initialize the URL loader.
77
+
78
+ Args:
79
+ urls: Single URL or list of URLs
80
+ headers: HTTP headers to send
81
+ extract_text: Extract text from HTML (default: True)
82
+ follow_redirects: Follow redirects (default: True)
83
+ timeout: Request timeout in seconds
84
+ extra_metadata: Additional metadata for all content
85
+ on_error: Error handling strategy
86
+ """
87
+ super().__init__(on_error=on_error)
88
+
89
+ # Normalize urls to list
90
+ self.urls = [urls] if isinstance(urls, str) else list(urls)
91
+ self.headers = headers or {}
92
+ self.extract_text = extract_text
93
+ self.follow_redirects = follow_redirects
94
+ self.timeout = timeout
95
+ self.extra_metadata = extra_metadata or {}
96
+
97
+ # Validate URLs
98
+ for url in self.urls:
99
+ if not url.startswith(("http://", "https://")):
100
+ raise ValueError(
101
+ f"Invalid URL: {url!r}. URLs must start with http:// or https://"
102
+ )
103
+
104
+ async def load(self) -> list[LoadedContent]:
105
+ """Load content from all URLs.
106
+
107
+ Returns:
108
+ List of LoadedContent objects for each successfully loaded URL.
109
+
110
+ Raises:
111
+ httpx.HTTPError: If request fails and on_error="raise".
112
+ """
113
+ contents: list[LoadedContent] = []
114
+
115
+ async with httpx.AsyncClient(
116
+ timeout=self.timeout,
117
+ follow_redirects=self.follow_redirects,
118
+ ) as client:
119
+ for url in self.urls:
120
+ try:
121
+ logger.debug(f"Fetching: {url}")
122
+ resp = await client.get(url, headers=self.headers)
123
+ resp.raise_for_status()
124
+
125
+ content_type = resp.headers.get("content-type", "")
126
+ raw_content = resp.text
127
+
128
+ # Extract text from HTML if requested
129
+ if self.extract_text and "text/html" in content_type:
130
+ content = self._extract_text_from_html(raw_content)
131
+ else:
132
+ content = raw_content
133
+
134
+ # Parse content type (remove charset etc.)
135
+ mime_type = (
136
+ content_type.split(";")[0].strip() if content_type else None
137
+ )
138
+
139
+ loaded = LoadedContent(
140
+ content=content,
141
+ source=url,
142
+ content_type=mime_type,
143
+ metadata={
144
+ "loader": "url",
145
+ "url": url,
146
+ "status_code": resp.status_code,
147
+ "final_url": str(resp.url), # After redirects
148
+ **self.extra_metadata,
149
+ },
150
+ )
151
+ contents.append(loaded)
152
+ logger.debug(f"Loaded: {url} ({len(content)} chars)")
153
+
154
+ except httpx.HTTPStatusError as e:
155
+ msg = f"HTTP {e.response.status_code} for {url}"
156
+ if self.on_error == "raise":
157
+ raise RuntimeError(msg) from e
158
+ logger.warning(msg)
159
+
160
+ except httpx.RequestError as e:
161
+ msg = f"Request failed for {url}: {e}"
162
+ if self.on_error == "raise":
163
+ raise RuntimeError(msg) from e
164
+ logger.warning(msg)
165
+
166
+ return contents
167
+
168
+ @staticmethod
169
+ def _extract_text_from_html(html: str) -> str:
170
+ """Extract readable text from HTML content.
171
+
172
+ Tries to use BeautifulSoup if available, falls back to regex.
173
+
174
+ Args:
175
+ html: Raw HTML content
176
+
177
+ Returns:
178
+ Extracted text with scripts, styles, and navigation removed.
179
+ """
180
+ try:
181
+ from bs4 import BeautifulSoup
182
+
183
+ soup = BeautifulSoup(html, "html.parser")
184
+
185
+ # Remove non-content elements
186
+ for tag in soup(
187
+ ["script", "style", "nav", "footer", "header", "aside", "noscript"]
188
+ ):
189
+ tag.decompose()
190
+
191
+ # Get text with newlines preserved
192
+ text = soup.get_text(separator="\n", strip=True)
193
+
194
+ # Clean up excessive whitespace
195
+ text = re.sub(r"\n{3,}", "\n\n", text)
196
+ return text.strip()
197
+
198
+ except ImportError:
199
+ # Fallback: basic regex-based extraction
200
+ logger.debug("BeautifulSoup not installed, using regex fallback")
201
+
202
+ # Remove script and style blocks
203
+ text = re.sub(
204
+ r"<script[^>]*>.*?</script>",
205
+ "",
206
+ html,
207
+ flags=re.DOTALL | re.IGNORECASE,
208
+ )
209
+ text = re.sub(
210
+ r"<style[^>]*>.*?</style>",
211
+ "",
212
+ text,
213
+ flags=re.DOTALL | re.IGNORECASE,
214
+ )
215
+
216
+ # Remove all HTML tags
217
+ text = re.sub(r"<[^>]+>", " ", text)
218
+
219
+ # Decode common HTML entities
220
+ text = text.replace("&nbsp;", " ")
221
+ text = text.replace("&amp;", "&")
222
+ text = text.replace("&lt;", "<")
223
+ text = text.replace("&gt;", ">")
224
+ text = text.replace("&quot;", '"')
225
+ text = text.replace("&#39;", "'")
226
+
227
+ # Clean up whitespace
228
+ text = " ".join(text.split())
229
+ return text.strip()
230
+
231
+ def __repr__(self) -> str:
232
+ """Return string representation."""
233
+ if len(self.urls) == 1:
234
+ return f"URLLoader({self.urls[0]!r})"
235
+ return f"URLLoader([{len(self.urls)} URLs])"
@@ -0,0 +1,374 @@
1
+ """Logging utilities for svc-infra applications.
2
+
3
+ This module provides logging utilities optimized for containerized
4
+ environments like Railway, Render, and Kubernetes, where log buffering
5
+ can cause visibility issues.
6
+
7
+ Features:
8
+ - Force flush for immediate log visibility in containers
9
+ - JSON-formatted structured logging
10
+ - Context injection for request tracing
11
+ - Pre-configured loggers with sensible defaults
12
+
13
+ Example:
14
+ >>> from svc_infra.logging import flush, get_logger, configure_for_container
15
+ >>>
16
+ >>> # Configure logging at app startup
17
+ >>> configure_for_container()
18
+ >>>
19
+ >>> # Get a logger
20
+ >>> logger = get_logger(__name__)
21
+ >>> logger.info("Starting application", extra={"version": "1.0.0"})
22
+ >>>
23
+ >>> # Force flush after critical operations
24
+ >>> flush()
25
+ """
26
+
27
+ from __future__ import annotations
28
+
29
+ import contextvars
30
+ import json
31
+ import logging
32
+ import os
33
+ import sys
34
+ from contextlib import contextmanager
35
+ from datetime import datetime, timezone
36
+ from typing import Any, Iterator, Optional
37
+
38
+ # Context variables for structured logging
39
+ _log_context: contextvars.ContextVar[dict[str, Any]] = contextvars.ContextVar(
40
+ "log_context", default={}
41
+ )
42
+
43
+ # Default log level from environment
44
+ DEFAULT_LOG_LEVEL = os.environ.get("LOG_LEVEL", "INFO").upper()
45
+
46
+ # Whether to use JSON format (default: True in containers)
47
+ USE_JSON_FORMAT = os.environ.get("LOG_FORMAT", "").lower() != "text"
48
+
49
+
50
+ def flush() -> None:
51
+ """
52
+ Force flush stdout and stderr for immediate log visibility.
53
+
54
+ In containerized environments (Docker, Railway, Kubernetes), Python's
55
+ output buffering can delay log visibility. Call this after critical
56
+ operations to ensure logs are immediately visible.
57
+
58
+ This is a no-op in terms of log content but ensures buffered output
59
+ is written to the underlying streams.
60
+
61
+ Example:
62
+ >>> import logging
63
+ >>> from svc_infra.logging import flush
64
+ >>>
65
+ >>> logging.info("Starting database migration...")
66
+ >>> # ... perform migration ...
67
+ >>> logging.info("Migration complete")
68
+ >>> flush() # Ensure logs are visible in container logs
69
+ """
70
+ sys.stdout.flush()
71
+ sys.stderr.flush()
72
+
73
+
74
+ class JsonFormatter(logging.Formatter):
75
+ """
76
+ JSON log formatter for structured logging.
77
+
78
+ Produces JSON-formatted log lines suitable for log aggregation
79
+ systems like Datadog, Elastic, or CloudWatch.
80
+
81
+ Output format:
82
+ {"timestamp": "...", "level": "INFO", "logger": "...", "message": "...", ...}
83
+
84
+ Any extra fields passed to the logger are included in the output.
85
+ Context from `with_context()` is also merged in.
86
+ """
87
+
88
+ def format(self, record: logging.LogRecord) -> str:
89
+ """Format a log record as JSON."""
90
+ # Base log structure
91
+ log_dict: dict[str, Any] = {
92
+ "timestamp": datetime.now(timezone.utc).isoformat(),
93
+ "level": record.levelname,
94
+ "logger": record.name,
95
+ "message": record.getMessage(),
96
+ }
97
+
98
+ # Add exception info if present
99
+ if record.exc_info:
100
+ log_dict["exception"] = self.formatException(record.exc_info)
101
+
102
+ # Add context from context variable
103
+ context = _log_context.get()
104
+ if context:
105
+ log_dict.update(context)
106
+
107
+ # Add any extra fields from the log call
108
+ # Skip standard LogRecord attributes
109
+ standard_attrs = {
110
+ "name",
111
+ "msg",
112
+ "args",
113
+ "created",
114
+ "filename",
115
+ "funcName",
116
+ "levelname",
117
+ "levelno",
118
+ "lineno",
119
+ "module",
120
+ "msecs",
121
+ "pathname",
122
+ "process",
123
+ "processName",
124
+ "relativeCreated",
125
+ "stack_info",
126
+ "exc_info",
127
+ "exc_text",
128
+ "thread",
129
+ "threadName",
130
+ "taskName",
131
+ "message",
132
+ }
133
+ for key, value in record.__dict__.items():
134
+ if key not in standard_attrs and not key.startswith("_"):
135
+ log_dict[key] = value
136
+
137
+ return json.dumps(log_dict, default=str)
138
+
139
+
140
+ class TextFormatter(logging.Formatter):
141
+ """
142
+ Human-readable text formatter with context support.
143
+
144
+ Suitable for local development where JSON is harder to read.
145
+
146
+ Output format:
147
+ 2024-01-15 10:30:45 [INFO] logger.name: Message {context}
148
+ """
149
+
150
+ def format(self, record: logging.LogRecord) -> str:
151
+ """Format a log record as human-readable text."""
152
+ timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
153
+ base = f"{timestamp} [{record.levelname}] {record.name}: {record.getMessage()}"
154
+
155
+ # Add context if present
156
+ context = _log_context.get()
157
+ if context:
158
+ context_str = " ".join(f"{k}={v}" for k, v in context.items())
159
+ base = f"{base} [{context_str}]"
160
+
161
+ # Add exception if present
162
+ if record.exc_info:
163
+ base = f"{base}\n{self.formatException(record.exc_info)}"
164
+
165
+ return base
166
+
167
+
168
+ def configure_for_container(
169
+ level: Optional[str] = None,
170
+ json_format: Optional[bool] = None,
171
+ stream: Any = None,
172
+ ) -> None:
173
+ """
174
+ Configure logging for containerized environments.
175
+
176
+ Sets up:
177
+ - Unbuffered output for immediate log visibility
178
+ - JSON or text formatting based on environment
179
+ - Appropriate log level from LOG_LEVEL env var
180
+
181
+ This should be called once at application startup, typically
182
+ before any other logging configuration.
183
+
184
+ Args:
185
+ level: Log level (DEBUG, INFO, WARNING, ERROR). Defaults to LOG_LEVEL env var or INFO.
186
+ json_format: If True, use JSON format; if False, use text. Defaults to LOG_FORMAT env var.
187
+ stream: Output stream. Defaults to sys.stderr.
188
+
189
+ Environment Variables:
190
+ LOG_LEVEL: Default log level (default: INFO)
191
+ LOG_FORMAT: "text" for human-readable, anything else for JSON (default: JSON)
192
+ PYTHONUNBUFFERED: Set to "1" for unbuffered output
193
+
194
+ Example:
195
+ >>> from svc_infra.logging import configure_for_container
196
+ >>>
197
+ >>> # In your app's main.py or __init__.py
198
+ >>> configure_for_container()
199
+ >>>
200
+ >>> # Or with explicit settings
201
+ >>> configure_for_container(level="DEBUG", json_format=False)
202
+ """
203
+ # Determine settings
204
+ log_level = level or DEFAULT_LOG_LEVEL
205
+ use_json = json_format if json_format is not None else USE_JSON_FORMAT
206
+ output_stream = stream or sys.stderr
207
+
208
+ # Force unbuffered output
209
+ os.environ["PYTHONUNBUFFERED"] = "1"
210
+
211
+ # Get root logger
212
+ root_logger = logging.getLogger()
213
+ root_logger.setLevel(getattr(logging, log_level, logging.INFO))
214
+
215
+ # Remove existing handlers to avoid duplicates
216
+ root_logger.handlers.clear()
217
+
218
+ # Create handler with appropriate formatter
219
+ handler = logging.StreamHandler(output_stream)
220
+ handler.setLevel(getattr(logging, log_level, logging.INFO))
221
+
222
+ if use_json:
223
+ handler.setFormatter(JsonFormatter())
224
+ else:
225
+ handler.setFormatter(TextFormatter())
226
+
227
+ root_logger.addHandler(handler)
228
+
229
+ # Also configure uvicorn loggers to use our format
230
+ for logger_name in ("uvicorn", "uvicorn.access", "uvicorn.error"):
231
+ uvicorn_logger = logging.getLogger(logger_name)
232
+ uvicorn_logger.handlers.clear()
233
+ uvicorn_logger.addHandler(handler)
234
+ uvicorn_logger.propagate = False
235
+
236
+
237
+ def get_logger(name: str) -> logging.Logger:
238
+ """
239
+ Get a pre-configured logger instance.
240
+
241
+ Returns a logger that respects the configuration set by
242
+ `configure_for_container()`. If that hasn't been called,
243
+ the logger will use Python's default configuration.
244
+
245
+ Args:
246
+ name: Logger name, typically `__name__` of the module.
247
+
248
+ Returns:
249
+ Configured logger instance.
250
+
251
+ Example:
252
+ >>> from svc_infra.logging import get_logger
253
+ >>>
254
+ >>> logger = get_logger(__name__)
255
+ >>> logger.info("Processing request", extra={"user_id": 123})
256
+ """
257
+ return logging.getLogger(name)
258
+
259
+
260
+ @contextmanager
261
+ def with_context(**kwargs: Any) -> Iterator[None]:
262
+ """
263
+ Context manager for adding structured context to log messages.
264
+
265
+ All log messages within the context will include the specified
266
+ key-value pairs, making it easy to trace requests or operations
267
+ across multiple log statements.
268
+
269
+ Args:
270
+ **kwargs: Key-value pairs to add to log context.
271
+
272
+ Yields:
273
+ None
274
+
275
+ Example:
276
+ >>> from svc_infra.logging import with_context, get_logger
277
+ >>>
278
+ >>> logger = get_logger(__name__)
279
+ >>>
280
+ >>> with with_context(request_id="abc-123", user_id=42):
281
+ ... logger.info("Processing request")
282
+ ... # Output includes: {"request_id": "abc-123", "user_id": 42, ...}
283
+ ... do_something()
284
+ ... logger.info("Request complete")
285
+ >>>
286
+ >>> # Context is automatically cleared after the block
287
+ >>> logger.info("No context here")
288
+ """
289
+ # Get current context and merge with new values
290
+ current = _log_context.get()
291
+ new_context = {**current, **kwargs}
292
+
293
+ # Set new context
294
+ token = _log_context.set(new_context)
295
+ try:
296
+ yield
297
+ finally:
298
+ # Restore previous context
299
+ _log_context.reset(token)
300
+
301
+
302
+ def set_context(**kwargs: Any) -> None:
303
+ """
304
+ Set persistent log context (not scoped like with_context).
305
+
306
+ Use this for context that should persist across multiple operations,
307
+ like tenant_id or user_id for the entire request lifecycle.
308
+
309
+ Args:
310
+ **kwargs: Key-value pairs to add to log context.
311
+
312
+ Example:
313
+ >>> from svc_infra.logging import set_context, clear_context, get_logger
314
+ >>>
315
+ >>> logger = get_logger(__name__)
316
+ >>>
317
+ >>> # In request middleware
318
+ >>> set_context(request_id="abc-123", tenant_id="tenant-1")
319
+ >>>
320
+ >>> # All subsequent logs include context
321
+ >>> logger.info("Processing...")
322
+ >>>
323
+ >>> # Clear at end of request
324
+ >>> clear_context()
325
+ """
326
+ current = _log_context.get()
327
+ _log_context.set({**current, **kwargs})
328
+
329
+
330
+ def clear_context() -> None:
331
+ """
332
+ Clear all log context.
333
+
334
+ Call this at the end of a request or operation to ensure
335
+ context doesn't leak to subsequent operations.
336
+
337
+ Example:
338
+ >>> from svc_infra.logging import set_context, clear_context
339
+ >>>
340
+ >>> set_context(request_id="abc-123")
341
+ >>> # ... handle request ...
342
+ >>> clear_context() # Clean up
343
+ """
344
+ _log_context.set({})
345
+
346
+
347
+ def get_context() -> dict[str, Any]:
348
+ """
349
+ Get the current log context.
350
+
351
+ Returns:
352
+ Dictionary of current context key-value pairs.
353
+
354
+ Example:
355
+ >>> from svc_infra.logging import set_context, get_context
356
+ >>>
357
+ >>> set_context(request_id="abc-123")
358
+ >>> ctx = get_context()
359
+ >>> print(ctx) # {"request_id": "abc-123"}
360
+ """
361
+ return _log_context.get().copy()
362
+
363
+
364
+ __all__ = [
365
+ "flush",
366
+ "configure_for_container",
367
+ "get_logger",
368
+ "with_context",
369
+ "set_context",
370
+ "clear_context",
371
+ "get_context",
372
+ "JsonFormatter",
373
+ "TextFormatter",
374
+ ]