svc-infra 0.1.595__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of svc-infra might be problematic. Click here for more details.

Files changed (274) hide show
  1. svc_infra/__init__.py +58 -2
  2. svc_infra/apf_payments/models.py +68 -38
  3. svc_infra/apf_payments/provider/__init__.py +2 -2
  4. svc_infra/apf_payments/provider/aiydan.py +39 -23
  5. svc_infra/apf_payments/provider/base.py +8 -3
  6. svc_infra/apf_payments/provider/registry.py +3 -5
  7. svc_infra/apf_payments/provider/stripe.py +74 -52
  8. svc_infra/apf_payments/schemas.py +84 -83
  9. svc_infra/apf_payments/service.py +27 -16
  10. svc_infra/apf_payments/settings.py +12 -11
  11. svc_infra/api/__init__.py +61 -0
  12. svc_infra/api/fastapi/__init__.py +34 -0
  13. svc_infra/api/fastapi/admin/__init__.py +3 -0
  14. svc_infra/api/fastapi/admin/add.py +240 -0
  15. svc_infra/api/fastapi/apf_payments/router.py +94 -73
  16. svc_infra/api/fastapi/apf_payments/setup.py +10 -9
  17. svc_infra/api/fastapi/auth/__init__.py +65 -0
  18. svc_infra/api/fastapi/auth/_cookies.py +1 -3
  19. svc_infra/api/fastapi/auth/add.py +14 -15
  20. svc_infra/api/fastapi/auth/gaurd.py +32 -20
  21. svc_infra/api/fastapi/auth/mfa/models.py +3 -4
  22. svc_infra/api/fastapi/auth/mfa/pre_auth.py +13 -9
  23. svc_infra/api/fastapi/auth/mfa/router.py +9 -8
  24. svc_infra/api/fastapi/auth/mfa/security.py +4 -7
  25. svc_infra/api/fastapi/auth/mfa/utils.py +5 -3
  26. svc_infra/api/fastapi/auth/policy.py +0 -1
  27. svc_infra/api/fastapi/auth/providers.py +3 -3
  28. svc_infra/api/fastapi/auth/routers/apikey_router.py +19 -21
  29. svc_infra/api/fastapi/auth/routers/oauth_router.py +98 -52
  30. svc_infra/api/fastapi/auth/routers/session_router.py +6 -5
  31. svc_infra/api/fastapi/auth/security.py +25 -15
  32. svc_infra/api/fastapi/auth/sender.py +5 -0
  33. svc_infra/api/fastapi/auth/settings.py +18 -19
  34. svc_infra/api/fastapi/auth/state.py +5 -4
  35. svc_infra/api/fastapi/auth/ws_security.py +275 -0
  36. svc_infra/api/fastapi/billing/router.py +71 -0
  37. svc_infra/api/fastapi/billing/setup.py +19 -0
  38. svc_infra/api/fastapi/cache/add.py +9 -5
  39. svc_infra/api/fastapi/db/__init__.py +5 -1
  40. svc_infra/api/fastapi/db/http.py +10 -9
  41. svc_infra/api/fastapi/db/nosql/__init__.py +39 -1
  42. svc_infra/api/fastapi/db/nosql/mongo/add.py +35 -30
  43. svc_infra/api/fastapi/db/nosql/mongo/crud_router.py +39 -21
  44. svc_infra/api/fastapi/db/sql/__init__.py +5 -1
  45. svc_infra/api/fastapi/db/sql/add.py +62 -25
  46. svc_infra/api/fastapi/db/sql/crud_router.py +205 -30
  47. svc_infra/api/fastapi/db/sql/session.py +19 -2
  48. svc_infra/api/fastapi/db/sql/users.py +18 -9
  49. svc_infra/api/fastapi/dependencies/ratelimit.py +76 -14
  50. svc_infra/api/fastapi/docs/add.py +163 -0
  51. svc_infra/api/fastapi/docs/landing.py +6 -6
  52. svc_infra/api/fastapi/docs/scoped.py +75 -36
  53. svc_infra/api/fastapi/dual/__init__.py +12 -2
  54. svc_infra/api/fastapi/dual/dualize.py +2 -2
  55. svc_infra/api/fastapi/dual/protected.py +123 -10
  56. svc_infra/api/fastapi/dual/public.py +25 -0
  57. svc_infra/api/fastapi/dual/router.py +18 -8
  58. svc_infra/api/fastapi/dx.py +33 -2
  59. svc_infra/api/fastapi/ease.py +59 -7
  60. svc_infra/api/fastapi/http/concurrency.py +2 -1
  61. svc_infra/api/fastapi/http/conditional.py +2 -2
  62. svc_infra/api/fastapi/middleware/debug.py +4 -1
  63. svc_infra/api/fastapi/middleware/errors/exceptions.py +2 -5
  64. svc_infra/api/fastapi/middleware/errors/handlers.py +50 -10
  65. svc_infra/api/fastapi/middleware/graceful_shutdown.py +95 -0
  66. svc_infra/api/fastapi/middleware/idempotency.py +190 -68
  67. svc_infra/api/fastapi/middleware/idempotency_store.py +187 -0
  68. svc_infra/api/fastapi/middleware/optimistic_lock.py +39 -0
  69. svc_infra/api/fastapi/middleware/ratelimit.py +125 -28
  70. svc_infra/api/fastapi/middleware/ratelimit_store.py +45 -13
  71. svc_infra/api/fastapi/middleware/request_id.py +24 -10
  72. svc_infra/api/fastapi/middleware/request_size_limit.py +3 -3
  73. svc_infra/api/fastapi/middleware/timeout.py +176 -0
  74. svc_infra/api/fastapi/object_router.py +1060 -0
  75. svc_infra/api/fastapi/openapi/apply.py +4 -3
  76. svc_infra/api/fastapi/openapi/conventions.py +13 -6
  77. svc_infra/api/fastapi/openapi/mutators.py +144 -17
  78. svc_infra/api/fastapi/openapi/pipeline.py +2 -2
  79. svc_infra/api/fastapi/openapi/responses.py +4 -6
  80. svc_infra/api/fastapi/openapi/security.py +1 -1
  81. svc_infra/api/fastapi/ops/add.py +73 -0
  82. svc_infra/api/fastapi/pagination.py +47 -32
  83. svc_infra/api/fastapi/routers/__init__.py +16 -10
  84. svc_infra/api/fastapi/routers/ping.py +1 -0
  85. svc_infra/api/fastapi/setup.py +167 -54
  86. svc_infra/api/fastapi/tenancy/add.py +20 -0
  87. svc_infra/api/fastapi/tenancy/context.py +113 -0
  88. svc_infra/api/fastapi/versioned.py +102 -0
  89. svc_infra/app/README.md +5 -5
  90. svc_infra/app/__init__.py +3 -1
  91. svc_infra/app/env.py +70 -4
  92. svc_infra/app/logging/add.py +10 -2
  93. svc_infra/app/logging/filter.py +1 -1
  94. svc_infra/app/logging/formats.py +13 -5
  95. svc_infra/app/root.py +3 -3
  96. svc_infra/billing/__init__.py +40 -0
  97. svc_infra/billing/async_service.py +167 -0
  98. svc_infra/billing/jobs.py +231 -0
  99. svc_infra/billing/models.py +146 -0
  100. svc_infra/billing/quotas.py +101 -0
  101. svc_infra/billing/schemas.py +34 -0
  102. svc_infra/bundled_docs/README.md +5 -0
  103. svc_infra/bundled_docs/__init__.py +1 -0
  104. svc_infra/bundled_docs/getting-started.md +6 -0
  105. svc_infra/cache/__init__.py +21 -5
  106. svc_infra/cache/add.py +167 -0
  107. svc_infra/cache/backend.py +9 -7
  108. svc_infra/cache/decorators.py +75 -20
  109. svc_infra/cache/demo.py +2 -2
  110. svc_infra/cache/keys.py +26 -6
  111. svc_infra/cache/recache.py +26 -27
  112. svc_infra/cache/resources.py +6 -5
  113. svc_infra/cache/tags.py +19 -44
  114. svc_infra/cache/ttl.py +2 -3
  115. svc_infra/cache/utils.py +4 -3
  116. svc_infra/cli/__init__.py +44 -8
  117. svc_infra/cli/__main__.py +4 -0
  118. svc_infra/cli/cmds/__init__.py +39 -2
  119. svc_infra/cli/cmds/db/nosql/mongo/mongo_cmds.py +18 -14
  120. svc_infra/cli/cmds/db/nosql/mongo/mongo_scaffold_cmds.py +9 -10
  121. svc_infra/cli/cmds/db/ops_cmds.py +267 -0
  122. svc_infra/cli/cmds/db/sql/alembic_cmds.py +97 -29
  123. svc_infra/cli/cmds/db/sql/sql_export_cmds.py +80 -0
  124. svc_infra/cli/cmds/db/sql/sql_scaffold_cmds.py +13 -13
  125. svc_infra/cli/cmds/docs/docs_cmds.py +139 -0
  126. svc_infra/cli/cmds/dx/__init__.py +12 -0
  127. svc_infra/cli/cmds/dx/dx_cmds.py +110 -0
  128. svc_infra/cli/cmds/health/__init__.py +179 -0
  129. svc_infra/cli/cmds/health/health_cmds.py +8 -0
  130. svc_infra/cli/cmds/help.py +4 -0
  131. svc_infra/cli/cmds/jobs/__init__.py +1 -0
  132. svc_infra/cli/cmds/jobs/jobs_cmds.py +42 -0
  133. svc_infra/cli/cmds/obs/obs_cmds.py +31 -13
  134. svc_infra/cli/cmds/sdk/__init__.py +0 -0
  135. svc_infra/cli/cmds/sdk/sdk_cmds.py +102 -0
  136. svc_infra/cli/foundation/runner.py +4 -5
  137. svc_infra/cli/foundation/typer_bootstrap.py +1 -2
  138. svc_infra/data/__init__.py +83 -0
  139. svc_infra/data/add.py +61 -0
  140. svc_infra/data/backup.py +56 -0
  141. svc_infra/data/erasure.py +46 -0
  142. svc_infra/data/fixtures.py +42 -0
  143. svc_infra/data/retention.py +56 -0
  144. svc_infra/db/__init__.py +15 -0
  145. svc_infra/db/crud_schema.py +14 -13
  146. svc_infra/db/inbox.py +67 -0
  147. svc_infra/db/nosql/__init__.py +2 -0
  148. svc_infra/db/nosql/constants.py +1 -1
  149. svc_infra/db/nosql/core.py +19 -5
  150. svc_infra/db/nosql/indexes.py +12 -9
  151. svc_infra/db/nosql/management.py +4 -4
  152. svc_infra/db/nosql/mongo/README.md +13 -13
  153. svc_infra/db/nosql/mongo/client.py +21 -4
  154. svc_infra/db/nosql/mongo/settings.py +1 -1
  155. svc_infra/db/nosql/repository.py +46 -27
  156. svc_infra/db/nosql/resource.py +28 -16
  157. svc_infra/db/nosql/scaffold.py +14 -12
  158. svc_infra/db/nosql/service.py +2 -1
  159. svc_infra/db/nosql/service_with_hooks.py +4 -3
  160. svc_infra/db/nosql/utils.py +4 -4
  161. svc_infra/db/ops.py +380 -0
  162. svc_infra/db/outbox.py +105 -0
  163. svc_infra/db/sql/apikey.py +34 -15
  164. svc_infra/db/sql/authref.py +8 -6
  165. svc_infra/db/sql/constants.py +5 -1
  166. svc_infra/db/sql/core.py +13 -13
  167. svc_infra/db/sql/management.py +5 -6
  168. svc_infra/db/sql/repository.py +92 -26
  169. svc_infra/db/sql/resource.py +18 -12
  170. svc_infra/db/sql/scaffold.py +11 -11
  171. svc_infra/db/sql/service.py +2 -1
  172. svc_infra/db/sql/service_with_hooks.py +4 -3
  173. svc_infra/db/sql/templates/models_schemas/auth/models.py.tmpl +7 -56
  174. svc_infra/db/sql/templates/setup/env_async.py.tmpl +34 -12
  175. svc_infra/db/sql/templates/setup/env_sync.py.tmpl +29 -7
  176. svc_infra/db/sql/tenant.py +80 -0
  177. svc_infra/db/sql/uniq.py +8 -7
  178. svc_infra/db/sql/uniq_hooks.py +12 -11
  179. svc_infra/db/sql/utils.py +105 -47
  180. svc_infra/db/sql/versioning.py +14 -0
  181. svc_infra/db/utils.py +3 -3
  182. svc_infra/deploy/__init__.py +531 -0
  183. svc_infra/documents/__init__.py +100 -0
  184. svc_infra/documents/add.py +263 -0
  185. svc_infra/documents/ease.py +233 -0
  186. svc_infra/documents/models.py +114 -0
  187. svc_infra/documents/storage.py +262 -0
  188. svc_infra/dx/__init__.py +58 -0
  189. svc_infra/dx/add.py +63 -0
  190. svc_infra/dx/changelog.py +74 -0
  191. svc_infra/dx/checks.py +68 -0
  192. svc_infra/exceptions.py +141 -0
  193. svc_infra/health/__init__.py +863 -0
  194. svc_infra/http/__init__.py +13 -0
  195. svc_infra/http/client.py +101 -0
  196. svc_infra/jobs/__init__.py +79 -0
  197. svc_infra/jobs/builtins/outbox_processor.py +38 -0
  198. svc_infra/jobs/builtins/webhook_delivery.py +93 -0
  199. svc_infra/jobs/easy.py +33 -0
  200. svc_infra/jobs/loader.py +49 -0
  201. svc_infra/jobs/queue.py +106 -0
  202. svc_infra/jobs/redis_queue.py +242 -0
  203. svc_infra/jobs/runner.py +75 -0
  204. svc_infra/jobs/scheduler.py +53 -0
  205. svc_infra/jobs/worker.py +40 -0
  206. svc_infra/loaders/__init__.py +186 -0
  207. svc_infra/loaders/base.py +143 -0
  208. svc_infra/loaders/github.py +309 -0
  209. svc_infra/loaders/models.py +147 -0
  210. svc_infra/loaders/url.py +229 -0
  211. svc_infra/logging/__init__.py +375 -0
  212. svc_infra/mcp/__init__.py +82 -0
  213. svc_infra/mcp/svc_infra_mcp.py +91 -33
  214. svc_infra/obs/README.md +2 -0
  215. svc_infra/obs/add.py +68 -11
  216. svc_infra/obs/cloud_dash.py +2 -1
  217. svc_infra/obs/grafana/dashboards/http-overview.json +45 -0
  218. svc_infra/obs/metrics/__init__.py +6 -7
  219. svc_infra/obs/metrics/asgi.py +8 -7
  220. svc_infra/obs/metrics/base.py +13 -13
  221. svc_infra/obs/metrics/http.py +3 -3
  222. svc_infra/obs/metrics/sqlalchemy.py +14 -13
  223. svc_infra/obs/metrics.py +9 -8
  224. svc_infra/resilience/__init__.py +44 -0
  225. svc_infra/resilience/circuit_breaker.py +328 -0
  226. svc_infra/resilience/retry.py +289 -0
  227. svc_infra/security/__init__.py +167 -0
  228. svc_infra/security/add.py +213 -0
  229. svc_infra/security/audit.py +97 -18
  230. svc_infra/security/audit_service.py +10 -9
  231. svc_infra/security/headers.py +15 -2
  232. svc_infra/security/hibp.py +14 -7
  233. svc_infra/security/jwt_rotation.py +78 -29
  234. svc_infra/security/lockout.py +23 -16
  235. svc_infra/security/models.py +77 -44
  236. svc_infra/security/oauth_models.py +73 -0
  237. svc_infra/security/org_invites.py +12 -12
  238. svc_infra/security/passwords.py +3 -3
  239. svc_infra/security/permissions.py +31 -7
  240. svc_infra/security/session.py +7 -8
  241. svc_infra/security/signed_cookies.py +26 -6
  242. svc_infra/storage/__init__.py +93 -0
  243. svc_infra/storage/add.py +250 -0
  244. svc_infra/storage/backends/__init__.py +11 -0
  245. svc_infra/storage/backends/local.py +331 -0
  246. svc_infra/storage/backends/memory.py +213 -0
  247. svc_infra/storage/backends/s3.py +334 -0
  248. svc_infra/storage/base.py +239 -0
  249. svc_infra/storage/easy.py +181 -0
  250. svc_infra/storage/settings.py +193 -0
  251. svc_infra/testing/__init__.py +682 -0
  252. svc_infra/utils.py +170 -5
  253. svc_infra/webhooks/__init__.py +69 -0
  254. svc_infra/webhooks/add.py +327 -0
  255. svc_infra/webhooks/encryption.py +115 -0
  256. svc_infra/webhooks/fastapi.py +37 -0
  257. svc_infra/webhooks/router.py +55 -0
  258. svc_infra/webhooks/service.py +69 -0
  259. svc_infra/webhooks/signing.py +34 -0
  260. svc_infra/websocket/__init__.py +79 -0
  261. svc_infra/websocket/add.py +139 -0
  262. svc_infra/websocket/client.py +283 -0
  263. svc_infra/websocket/config.py +57 -0
  264. svc_infra/websocket/easy.py +76 -0
  265. svc_infra/websocket/exceptions.py +61 -0
  266. svc_infra/websocket/manager.py +343 -0
  267. svc_infra/websocket/models.py +49 -0
  268. svc_infra-1.1.0.dist-info/LICENSE +21 -0
  269. svc_infra-1.1.0.dist-info/METADATA +362 -0
  270. svc_infra-1.1.0.dist-info/RECORD +364 -0
  271. svc_infra-0.1.595.dist-info/METADATA +0 -80
  272. svc_infra-0.1.595.dist-info/RECORD +0 -253
  273. {svc_infra-0.1.595.dist-info → svc_infra-1.1.0.dist-info}/WHEEL +0 -0
  274. {svc_infra-0.1.595.dist-info → svc_infra-1.1.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,229 @@
1
+ """URL content loader.
2
+
3
+ Load content from URLs with automatic HTML text extraction.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import logging
9
+ import re
10
+ from typing import Any
11
+
12
+ import httpx
13
+
14
+ from .base import BaseLoader, ErrorStrategy
15
+ from .models import LoadedContent
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class URLLoader(BaseLoader):
21
+ """Load content from one or more URLs.
22
+
23
+ Fetches content from URLs and optionally extracts readable text from HTML.
24
+ Supports redirects, custom headers, and batch loading.
25
+
26
+ Args:
27
+ urls: Single URL or list of URLs to load.
28
+ headers: Optional HTTP headers to send with requests.
29
+ extract_text: If True (default), extract readable text from HTML pages.
30
+ Raw HTML is returned if False or if content is not HTML.
31
+ follow_redirects: Follow HTTP redirects (default: True).
32
+ timeout: Request timeout in seconds (default: 30).
33
+ extra_metadata: Additional metadata to attach to all loaded content.
34
+ on_error: How to handle errors ("skip" or "raise"). Default: "skip"
35
+
36
+ Example:
37
+ >>> # Load single URL
38
+ >>> loader = URLLoader("https://example.com/docs/guide.md")
39
+ >>> contents = await loader.load()
40
+ >>> print(contents[0].content[:100])
41
+ >>>
42
+ >>> # Load multiple URLs
43
+ >>> loader = URLLoader([
44
+ ... "https://example.com/page1",
45
+ ... "https://example.com/page2",
46
+ ... ])
47
+ >>> contents = await loader.load()
48
+ >>>
49
+ >>> # Disable HTML text extraction
50
+ >>> loader = URLLoader("https://example.com", extract_text=False)
51
+ >>> contents = await loader.load() # Returns raw HTML
52
+ >>>
53
+ >>> # With custom headers (e.g., for APIs)
54
+ >>> loader = URLLoader(
55
+ ... "https://api.example.com/docs",
56
+ ... headers={"Authorization": "Bearer token123"},
57
+ ... )
58
+ >>> contents = await loader.load()
59
+
60
+ Note:
61
+ - HTML text extraction removes scripts, styles, nav, footer, etc.
62
+ - If BeautifulSoup is not installed, falls back to basic regex extraction
63
+ - Content type is detected from HTTP headers
64
+ """
65
+
66
+ def __init__(
67
+ self,
68
+ urls: str | list[str],
69
+ headers: dict[str, str] | None = None,
70
+ extract_text: bool = True,
71
+ follow_redirects: bool = True,
72
+ timeout: float = 30.0,
73
+ extra_metadata: dict[str, Any] | None = None,
74
+ on_error: ErrorStrategy = "skip",
75
+ ) -> None:
76
+ """Initialize the URL loader.
77
+
78
+ Args:
79
+ urls: Single URL or list of URLs
80
+ headers: HTTP headers to send
81
+ extract_text: Extract text from HTML (default: True)
82
+ follow_redirects: Follow redirects (default: True)
83
+ timeout: Request timeout in seconds
84
+ extra_metadata: Additional metadata for all content
85
+ on_error: Error handling strategy
86
+ """
87
+ super().__init__(on_error=on_error)
88
+
89
+ # Normalize urls to list
90
+ self.urls = [urls] if isinstance(urls, str) else list(urls)
91
+ self.headers = headers or {}
92
+ self.extract_text = extract_text
93
+ self.follow_redirects = follow_redirects
94
+ self.timeout = timeout
95
+ self.extra_metadata = extra_metadata or {}
96
+
97
+ # Validate URLs
98
+ for url in self.urls:
99
+ if not url.startswith(("http://", "https://")):
100
+ raise ValueError(f"Invalid URL: {url!r}. URLs must start with http:// or https://")
101
+
102
+ async def load(self) -> list[LoadedContent]:
103
+ """Load content from all URLs.
104
+
105
+ Returns:
106
+ List of LoadedContent objects for each successfully loaded URL.
107
+
108
+ Raises:
109
+ httpx.HTTPError: If request fails and on_error="raise".
110
+ """
111
+ contents: list[LoadedContent] = []
112
+
113
+ async with httpx.AsyncClient(
114
+ timeout=self.timeout,
115
+ follow_redirects=self.follow_redirects,
116
+ ) as client:
117
+ for url in self.urls:
118
+ try:
119
+ logger.debug(f"Fetching: {url}")
120
+ resp = await client.get(url, headers=self.headers)
121
+ resp.raise_for_status()
122
+
123
+ content_type = resp.headers.get("content-type", "")
124
+ raw_content = resp.text
125
+
126
+ # Extract text from HTML if requested
127
+ if self.extract_text and "text/html" in content_type:
128
+ content = self._extract_text_from_html(raw_content)
129
+ else:
130
+ content = raw_content
131
+
132
+ # Parse content type (remove charset etc.)
133
+ mime_type = content_type.split(";")[0].strip() if content_type else None
134
+
135
+ loaded = LoadedContent(
136
+ content=content,
137
+ source=url,
138
+ content_type=mime_type,
139
+ metadata={
140
+ "loader": "url",
141
+ "url": url,
142
+ "status_code": resp.status_code,
143
+ "final_url": str(resp.url), # After redirects
144
+ **self.extra_metadata,
145
+ },
146
+ )
147
+ contents.append(loaded)
148
+ logger.debug(f"Loaded: {url} ({len(content)} chars)")
149
+
150
+ except httpx.HTTPStatusError as e:
151
+ msg = f"HTTP {e.response.status_code} for {url}"
152
+ if self.on_error == "raise":
153
+ raise RuntimeError(msg) from e
154
+ logger.warning(msg)
155
+
156
+ except httpx.RequestError as e:
157
+ msg = f"Request failed for {url}: {e}"
158
+ if self.on_error == "raise":
159
+ raise RuntimeError(msg) from e
160
+ logger.warning(msg)
161
+
162
+ return contents
163
+
164
+ @staticmethod
165
+ def _extract_text_from_html(html: str) -> str:
166
+ """Extract readable text from HTML content.
167
+
168
+ Tries to use BeautifulSoup if available, falls back to regex.
169
+
170
+ Args:
171
+ html: Raw HTML content
172
+
173
+ Returns:
174
+ Extracted text with scripts, styles, and navigation removed.
175
+ """
176
+ try:
177
+ from bs4 import BeautifulSoup
178
+
179
+ soup = BeautifulSoup(html, "html.parser")
180
+
181
+ # Remove non-content elements
182
+ for tag in soup(["script", "style", "nav", "footer", "header", "aside", "noscript"]):
183
+ tag.decompose()
184
+
185
+ # Get text with newlines preserved
186
+ text = soup.get_text(separator="\n", strip=True)
187
+
188
+ # Clean up excessive whitespace
189
+ text = re.sub(r"\n{3,}", "\n\n", text)
190
+ return text.strip()
191
+
192
+ except ImportError:
193
+ # Fallback: basic regex-based extraction
194
+ logger.debug("BeautifulSoup not installed, using regex fallback")
195
+
196
+ # Remove script and style blocks
197
+ text = re.sub(
198
+ r"<script[^>]*>.*?</script>",
199
+ "",
200
+ html,
201
+ flags=re.DOTALL | re.IGNORECASE,
202
+ )
203
+ text = re.sub(
204
+ r"<style[^>]*>.*?</style>",
205
+ "",
206
+ text,
207
+ flags=re.DOTALL | re.IGNORECASE,
208
+ )
209
+
210
+ # Remove all HTML tags
211
+ text = re.sub(r"<[^>]+>", " ", text)
212
+
213
+ # Decode common HTML entities
214
+ text = text.replace("&nbsp;", " ")
215
+ text = text.replace("&amp;", "&")
216
+ text = text.replace("&lt;", "<")
217
+ text = text.replace("&gt;", ">")
218
+ text = text.replace("&quot;", '"')
219
+ text = text.replace("&#39;", "'")
220
+
221
+ # Clean up whitespace
222
+ text = " ".join(text.split())
223
+ return text.strip()
224
+
225
+ def __repr__(self) -> str:
226
+ """Return string representation."""
227
+ if len(self.urls) == 1:
228
+ return f"URLLoader({self.urls[0]!r})"
229
+ return f"URLLoader([{len(self.urls)} URLs])"
@@ -0,0 +1,375 @@
1
+ """Logging utilities for svc-infra applications.
2
+
3
+ This module provides logging utilities optimized for containerized
4
+ environments like Railway, Render, and Kubernetes, where log buffering
5
+ can cause visibility issues.
6
+
7
+ Features:
8
+ - Force flush for immediate log visibility in containers
9
+ - JSON-formatted structured logging
10
+ - Context injection for request tracing
11
+ - Pre-configured loggers with sensible defaults
12
+
13
+ Example:
14
+ >>> from svc_infra.logging import flush, get_logger, configure_for_container
15
+ >>>
16
+ >>> # Configure logging at app startup
17
+ >>> configure_for_container()
18
+ >>>
19
+ >>> # Get a logger
20
+ >>> logger = get_logger(__name__)
21
+ >>> logger.info("Starting application", extra={"version": "1.0.0"})
22
+ >>>
23
+ >>> # Force flush after critical operations
24
+ >>> flush()
25
+ """
26
+
27
+ from __future__ import annotations
28
+
29
+ import contextvars
30
+ import json
31
+ import logging
32
+ import os
33
+ import sys
34
+ from collections.abc import Iterator
35
+ from contextlib import contextmanager
36
+ from datetime import UTC, datetime
37
+ from typing import Any
38
+
39
+ # Context variables for structured logging
40
+ _log_context: contextvars.ContextVar[dict[str, Any]] = contextvars.ContextVar(
41
+ "log_context", default={}
42
+ )
43
+
44
+ # Default log level from environment
45
+ DEFAULT_LOG_LEVEL = os.environ.get("LOG_LEVEL", "INFO").upper()
46
+
47
+ # Whether to use JSON format (default: True in containers)
48
+ USE_JSON_FORMAT = os.environ.get("LOG_FORMAT", "").lower() != "text"
49
+
50
+
51
+ def flush() -> None:
52
+ """
53
+ Force flush stdout and stderr for immediate log visibility.
54
+
55
+ In containerized environments (Docker, Railway, Kubernetes), Python's
56
+ output buffering can delay log visibility. Call this after critical
57
+ operations to ensure logs are immediately visible.
58
+
59
+ This is a no-op in terms of log content but ensures buffered output
60
+ is written to the underlying streams.
61
+
62
+ Example:
63
+ >>> import logging
64
+ >>> from svc_infra.logging import flush
65
+ >>>
66
+ >>> logging.info("Starting database migration...")
67
+ >>> # ... perform migration ...
68
+ >>> logging.info("Migration complete")
69
+ >>> flush() # Ensure logs are visible in container logs
70
+ """
71
+ sys.stdout.flush()
72
+ sys.stderr.flush()
73
+
74
+
75
+ class JsonFormatter(logging.Formatter):
76
+ """
77
+ JSON log formatter for structured logging.
78
+
79
+ Produces JSON-formatted log lines suitable for log aggregation
80
+ systems like Datadog, Elastic, or CloudWatch.
81
+
82
+ Output format:
83
+ {"timestamp": "...", "level": "INFO", "logger": "...", "message": "...", ...}
84
+
85
+ Any extra fields passed to the logger are included in the output.
86
+ Context from `with_context()` is also merged in.
87
+ """
88
+
89
+ def format(self, record: logging.LogRecord) -> str:
90
+ """Format a log record as JSON."""
91
+ # Base log structure
92
+ log_dict: dict[str, Any] = {
93
+ "timestamp": datetime.now(UTC).isoformat(),
94
+ "level": record.levelname,
95
+ "logger": record.name,
96
+ "message": record.getMessage(),
97
+ }
98
+
99
+ # Add exception info if present
100
+ if record.exc_info:
101
+ log_dict["exception"] = self.formatException(record.exc_info)
102
+
103
+ # Add context from context variable
104
+ context = _log_context.get()
105
+ if context:
106
+ log_dict.update(context)
107
+
108
+ # Add any extra fields from the log call
109
+ # Skip standard LogRecord attributes
110
+ standard_attrs = {
111
+ "name",
112
+ "msg",
113
+ "args",
114
+ "created",
115
+ "filename",
116
+ "funcName",
117
+ "levelname",
118
+ "levelno",
119
+ "lineno",
120
+ "module",
121
+ "msecs",
122
+ "pathname",
123
+ "process",
124
+ "processName",
125
+ "relativeCreated",
126
+ "stack_info",
127
+ "exc_info",
128
+ "exc_text",
129
+ "thread",
130
+ "threadName",
131
+ "taskName",
132
+ "message",
133
+ }
134
+ for key, value in record.__dict__.items():
135
+ if key not in standard_attrs and not key.startswith("_"):
136
+ log_dict[key] = value
137
+
138
+ return json.dumps(log_dict, default=str)
139
+
140
+
141
+ class TextFormatter(logging.Formatter):
142
+ """
143
+ Human-readable text formatter with context support.
144
+
145
+ Suitable for local development where JSON is harder to read.
146
+
147
+ Output format:
148
+ 2024-01-15 10:30:45 [INFO] logger.name: Message {context}
149
+ """
150
+
151
+ def format(self, record: logging.LogRecord) -> str:
152
+ """Format a log record as human-readable text."""
153
+ timestamp = datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S")
154
+ base = f"{timestamp} [{record.levelname}] {record.name}: {record.getMessage()}"
155
+
156
+ # Add context if present
157
+ context = _log_context.get()
158
+ if context:
159
+ context_str = " ".join(f"{k}={v}" for k, v in context.items())
160
+ base = f"{base} [{context_str}]"
161
+
162
+ # Add exception if present
163
+ if record.exc_info:
164
+ base = f"{base}\n{self.formatException(record.exc_info)}"
165
+
166
+ return base
167
+
168
+
169
+ def configure_for_container(
170
+ level: str | None = None,
171
+ json_format: bool | None = None,
172
+ stream: Any = None,
173
+ ) -> None:
174
+ """
175
+ Configure logging for containerized environments.
176
+
177
+ Sets up:
178
+ - Unbuffered output for immediate log visibility
179
+ - JSON or text formatting based on environment
180
+ - Appropriate log level from LOG_LEVEL env var
181
+
182
+ This should be called once at application startup, typically
183
+ before any other logging configuration.
184
+
185
+ Args:
186
+ level: Log level (DEBUG, INFO, WARNING, ERROR). Defaults to LOG_LEVEL env var or INFO.
187
+ json_format: If True, use JSON format; if False, use text. Defaults to LOG_FORMAT env var.
188
+ stream: Output stream. Defaults to sys.stderr.
189
+
190
+ Environment Variables:
191
+ LOG_LEVEL: Default log level (default: INFO)
192
+ LOG_FORMAT: "text" for human-readable, anything else for JSON (default: JSON)
193
+ PYTHONUNBUFFERED: Set to "1" for unbuffered output
194
+
195
+ Example:
196
+ >>> from svc_infra.logging import configure_for_container
197
+ >>>
198
+ >>> # In your app's main.py or __init__.py
199
+ >>> configure_for_container()
200
+ >>>
201
+ >>> # Or with explicit settings
202
+ >>> configure_for_container(level="DEBUG", json_format=False)
203
+ """
204
+ # Determine settings
205
+ log_level = level or DEFAULT_LOG_LEVEL
206
+ use_json = json_format if json_format is not None else USE_JSON_FORMAT
207
+ output_stream = stream or sys.stderr
208
+
209
+ # Force unbuffered output
210
+ os.environ["PYTHONUNBUFFERED"] = "1"
211
+
212
+ # Get root logger
213
+ root_logger = logging.getLogger()
214
+ root_logger.setLevel(getattr(logging, log_level, logging.INFO))
215
+
216
+ # Remove existing handlers to avoid duplicates
217
+ root_logger.handlers.clear()
218
+
219
+ # Create handler with appropriate formatter
220
+ handler = logging.StreamHandler(output_stream)
221
+ handler.setLevel(getattr(logging, log_level, logging.INFO))
222
+
223
+ if use_json:
224
+ handler.setFormatter(JsonFormatter())
225
+ else:
226
+ handler.setFormatter(TextFormatter())
227
+
228
+ root_logger.addHandler(handler)
229
+
230
+ # Also configure uvicorn loggers to use our format
231
+ for logger_name in ("uvicorn", "uvicorn.access", "uvicorn.error"):
232
+ uvicorn_logger = logging.getLogger(logger_name)
233
+ uvicorn_logger.handlers.clear()
234
+ uvicorn_logger.addHandler(handler)
235
+ uvicorn_logger.propagate = False
236
+
237
+
238
+ def get_logger(name: str) -> logging.Logger:
239
+ """
240
+ Get a pre-configured logger instance.
241
+
242
+ Returns a logger that respects the configuration set by
243
+ `configure_for_container()`. If that hasn't been called,
244
+ the logger will use Python's default configuration.
245
+
246
+ Args:
247
+ name: Logger name, typically `__name__` of the module.
248
+
249
+ Returns:
250
+ Configured logger instance.
251
+
252
+ Example:
253
+ >>> from svc_infra.logging import get_logger
254
+ >>>
255
+ >>> logger = get_logger(__name__)
256
+ >>> logger.info("Processing request", extra={"user_id": 123})
257
+ """
258
+ return logging.getLogger(name)
259
+
260
+
261
+ @contextmanager
262
+ def with_context(**kwargs: Any) -> Iterator[None]:
263
+ """
264
+ Context manager for adding structured context to log messages.
265
+
266
+ All log messages within the context will include the specified
267
+ key-value pairs, making it easy to trace requests or operations
268
+ across multiple log statements.
269
+
270
+ Args:
271
+ **kwargs: Key-value pairs to add to log context.
272
+
273
+ Yields:
274
+ None
275
+
276
+ Example:
277
+ >>> from svc_infra.logging import with_context, get_logger
278
+ >>>
279
+ >>> logger = get_logger(__name__)
280
+ >>>
281
+ >>> with with_context(request_id="abc-123", user_id=42):
282
+ ... logger.info("Processing request")
283
+ ... # Output includes: {"request_id": "abc-123", "user_id": 42, ...}
284
+ ... do_something()
285
+ ... logger.info("Request complete")
286
+ >>>
287
+ >>> # Context is automatically cleared after the block
288
+ >>> logger.info("No context here")
289
+ """
290
+ # Get current context and merge with new values
291
+ current = _log_context.get()
292
+ new_context = {**current, **kwargs}
293
+
294
+ # Set new context
295
+ token = _log_context.set(new_context)
296
+ try:
297
+ yield
298
+ finally:
299
+ # Restore previous context
300
+ _log_context.reset(token)
301
+
302
+
303
+ def set_context(**kwargs: Any) -> None:
304
+ """
305
+ Set persistent log context (not scoped like with_context).
306
+
307
+ Use this for context that should persist across multiple operations,
308
+ like tenant_id or user_id for the entire request lifecycle.
309
+
310
+ Args:
311
+ **kwargs: Key-value pairs to add to log context.
312
+
313
+ Example:
314
+ >>> from svc_infra.logging import set_context, clear_context, get_logger
315
+ >>>
316
+ >>> logger = get_logger(__name__)
317
+ >>>
318
+ >>> # In request middleware
319
+ >>> set_context(request_id="abc-123", tenant_id="tenant-1")
320
+ >>>
321
+ >>> # All subsequent logs include context
322
+ >>> logger.info("Processing...")
323
+ >>>
324
+ >>> # Clear at end of request
325
+ >>> clear_context()
326
+ """
327
+ current = _log_context.get()
328
+ _log_context.set({**current, **kwargs})
329
+
330
+
331
+ def clear_context() -> None:
332
+ """
333
+ Clear all log context.
334
+
335
+ Call this at the end of a request or operation to ensure
336
+ context doesn't leak to subsequent operations.
337
+
338
+ Example:
339
+ >>> from svc_infra.logging import set_context, clear_context
340
+ >>>
341
+ >>> set_context(request_id="abc-123")
342
+ >>> # ... handle request ...
343
+ >>> clear_context() # Clean up
344
+ """
345
+ _log_context.set({})
346
+
347
+
348
+ def get_context() -> dict[str, Any]:
349
+ """
350
+ Get the current log context.
351
+
352
+ Returns:
353
+ Dictionary of current context key-value pairs.
354
+
355
+ Example:
356
+ >>> from svc_infra.logging import set_context, get_context
357
+ >>>
358
+ >>> set_context(request_id="abc-123")
359
+ >>> ctx = get_context()
360
+ >>> print(ctx) # {"request_id": "abc-123"}
361
+ """
362
+ return _log_context.get().copy()
363
+
364
+
365
+ __all__ = [
366
+ "flush",
367
+ "configure_for_container",
368
+ "get_logger",
369
+ "with_context",
370
+ "set_context",
371
+ "clear_context",
372
+ "get_context",
373
+ "JsonFormatter",
374
+ "TextFormatter",
375
+ ]
svc_infra/mcp/__init__.py CHANGED
@@ -0,0 +1,82 @@
1
+ """MCP (Model Context Protocol) server for svc-infra CLI.
2
+
3
+ This module provides an MCP server that exposes svc-infra CLI commands as tools
4
+ for AI assistants and agents.
5
+
6
+ Available Tools:
7
+ - svc_infra_cmd_help: Get help text for the svc-infra CLI
8
+ - svc_infra_subcmd_help: Get help for specific subcommands
9
+ - svc_infra_docs_help: Get documentation help
10
+
11
+ Example:
12
+ # Run the MCP server
13
+ python -m svc_infra.mcp.svc_infra_mcp
14
+
15
+ # Or use programmatically
16
+ from svc_infra.mcp import mcp, Subcommand, svc_infra_subcmd_help
17
+
18
+ # Get help for a subcommand
19
+ result = await svc_infra_subcmd_help(Subcommand.sql_upgrade)
20
+
21
+ See Also:
22
+ - ai-infra MCP documentation for client usage
23
+ - svc-infra CLI reference for available commands
24
+
25
+ Note:
26
+ This module requires ai-infra to be installed. If ai-infra is not available,
27
+ imports will raise ImportError with a helpful message.
28
+ """
29
+
30
+ from __future__ import annotations
31
+
32
+ from typing import TYPE_CHECKING
33
+
34
+ if TYPE_CHECKING:
35
+ from .svc_infra_mcp import (
36
+ CLI_PROG as CLI_PROG,
37
+ )
38
+ from .svc_infra_mcp import (
39
+ Subcommand as Subcommand,
40
+ )
41
+ from .svc_infra_mcp import (
42
+ mcp as mcp,
43
+ )
44
+ from .svc_infra_mcp import (
45
+ svc_infra_cmd_help as svc_infra_cmd_help,
46
+ )
47
+ from .svc_infra_mcp import (
48
+ svc_infra_docs_help as svc_infra_docs_help,
49
+ )
50
+ from .svc_infra_mcp import (
51
+ svc_infra_subcmd_help as svc_infra_subcmd_help,
52
+ )
53
+
54
+ __all__ = [
55
+ # MCP server instance
56
+ "mcp",
57
+ # Subcommand enum
58
+ "Subcommand",
59
+ # Tool functions
60
+ "svc_infra_cmd_help",
61
+ "svc_infra_subcmd_help",
62
+ "svc_infra_docs_help",
63
+ # Constants
64
+ "CLI_PROG",
65
+ ]
66
+
67
+
68
+ def __getattr__(name: str):
69
+ """Lazy import to defer ai-infra dependency until runtime."""
70
+ if name in __all__:
71
+ try:
72
+ from . import svc_infra_mcp
73
+
74
+ return getattr(svc_infra_mcp, name)
75
+ except ImportError as e:
76
+ if "ai_infra" in str(e):
77
+ raise ImportError(
78
+ f"Cannot import '{name}' from svc_infra.mcp: "
79
+ "ai-infra package is required. Install with: pip install ai-infra"
80
+ ) from e
81
+ raise
82
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")