svc-infra 0.1.595__py3-none-any.whl → 0.1.706__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of svc-infra might be problematic. Click here for more details.

Files changed (256) hide show
  1. svc_infra/__init__.py +58 -2
  2. svc_infra/apf_payments/models.py +133 -42
  3. svc_infra/apf_payments/provider/aiydan.py +121 -47
  4. svc_infra/apf_payments/provider/base.py +30 -9
  5. svc_infra/apf_payments/provider/stripe.py +156 -62
  6. svc_infra/apf_payments/schemas.py +18 -9
  7. svc_infra/apf_payments/service.py +98 -41
  8. svc_infra/apf_payments/settings.py +5 -1
  9. svc_infra/api/__init__.py +61 -0
  10. svc_infra/api/fastapi/__init__.py +15 -0
  11. svc_infra/api/fastapi/admin/__init__.py +3 -0
  12. svc_infra/api/fastapi/admin/add.py +245 -0
  13. svc_infra/api/fastapi/apf_payments/router.py +128 -70
  14. svc_infra/api/fastapi/apf_payments/setup.py +13 -6
  15. svc_infra/api/fastapi/auth/__init__.py +65 -0
  16. svc_infra/api/fastapi/auth/_cookies.py +6 -2
  17. svc_infra/api/fastapi/auth/add.py +17 -14
  18. svc_infra/api/fastapi/auth/gaurd.py +45 -16
  19. svc_infra/api/fastapi/auth/mfa/models.py +3 -1
  20. svc_infra/api/fastapi/auth/mfa/pre_auth.py +10 -6
  21. svc_infra/api/fastapi/auth/mfa/router.py +15 -8
  22. svc_infra/api/fastapi/auth/mfa/security.py +1 -2
  23. svc_infra/api/fastapi/auth/mfa/utils.py +2 -1
  24. svc_infra/api/fastapi/auth/mfa/verify.py +9 -2
  25. svc_infra/api/fastapi/auth/policy.py +0 -1
  26. svc_infra/api/fastapi/auth/providers.py +3 -1
  27. svc_infra/api/fastapi/auth/routers/apikey_router.py +6 -6
  28. svc_infra/api/fastapi/auth/routers/oauth_router.py +146 -52
  29. svc_infra/api/fastapi/auth/routers/session_router.py +6 -2
  30. svc_infra/api/fastapi/auth/security.py +31 -10
  31. svc_infra/api/fastapi/auth/sender.py +8 -1
  32. svc_infra/api/fastapi/auth/state.py +3 -1
  33. svc_infra/api/fastapi/auth/ws_security.py +275 -0
  34. svc_infra/api/fastapi/billing/router.py +73 -0
  35. svc_infra/api/fastapi/billing/setup.py +19 -0
  36. svc_infra/api/fastapi/cache/add.py +9 -5
  37. svc_infra/api/fastapi/db/__init__.py +5 -1
  38. svc_infra/api/fastapi/db/http.py +3 -1
  39. svc_infra/api/fastapi/db/nosql/__init__.py +39 -1
  40. svc_infra/api/fastapi/db/nosql/mongo/add.py +47 -32
  41. svc_infra/api/fastapi/db/nosql/mongo/crud_router.py +30 -11
  42. svc_infra/api/fastapi/db/sql/__init__.py +5 -1
  43. svc_infra/api/fastapi/db/sql/add.py +71 -26
  44. svc_infra/api/fastapi/db/sql/crud_router.py +210 -22
  45. svc_infra/api/fastapi/db/sql/health.py +3 -1
  46. svc_infra/api/fastapi/db/sql/session.py +18 -0
  47. svc_infra/api/fastapi/db/sql/users.py +18 -6
  48. svc_infra/api/fastapi/dependencies/ratelimit.py +78 -14
  49. svc_infra/api/fastapi/docs/add.py +173 -0
  50. svc_infra/api/fastapi/docs/landing.py +4 -2
  51. svc_infra/api/fastapi/docs/scoped.py +62 -15
  52. svc_infra/api/fastapi/dual/__init__.py +12 -2
  53. svc_infra/api/fastapi/dual/dualize.py +1 -1
  54. svc_infra/api/fastapi/dual/protected.py +126 -4
  55. svc_infra/api/fastapi/dual/public.py +25 -0
  56. svc_infra/api/fastapi/dual/router.py +40 -13
  57. svc_infra/api/fastapi/dx.py +33 -2
  58. svc_infra/api/fastapi/ease.py +10 -2
  59. svc_infra/api/fastapi/http/concurrency.py +2 -1
  60. svc_infra/api/fastapi/http/conditional.py +3 -1
  61. svc_infra/api/fastapi/middleware/debug.py +4 -1
  62. svc_infra/api/fastapi/middleware/errors/catchall.py +6 -2
  63. svc_infra/api/fastapi/middleware/errors/exceptions.py +1 -1
  64. svc_infra/api/fastapi/middleware/errors/handlers.py +54 -8
  65. svc_infra/api/fastapi/middleware/graceful_shutdown.py +104 -0
  66. svc_infra/api/fastapi/middleware/idempotency.py +197 -70
  67. svc_infra/api/fastapi/middleware/idempotency_store.py +187 -0
  68. svc_infra/api/fastapi/middleware/optimistic_lock.py +42 -0
  69. svc_infra/api/fastapi/middleware/ratelimit.py +125 -28
  70. svc_infra/api/fastapi/middleware/ratelimit_store.py +43 -10
  71. svc_infra/api/fastapi/middleware/request_id.py +27 -11
  72. svc_infra/api/fastapi/middleware/request_size_limit.py +3 -3
  73. svc_infra/api/fastapi/middleware/timeout.py +177 -0
  74. svc_infra/api/fastapi/openapi/apply.py +5 -3
  75. svc_infra/api/fastapi/openapi/conventions.py +9 -2
  76. svc_infra/api/fastapi/openapi/mutators.py +165 -20
  77. svc_infra/api/fastapi/openapi/pipeline.py +1 -1
  78. svc_infra/api/fastapi/openapi/security.py +3 -1
  79. svc_infra/api/fastapi/ops/add.py +75 -0
  80. svc_infra/api/fastapi/pagination.py +47 -20
  81. svc_infra/api/fastapi/routers/__init__.py +43 -15
  82. svc_infra/api/fastapi/routers/ping.py +1 -0
  83. svc_infra/api/fastapi/setup.py +188 -57
  84. svc_infra/api/fastapi/tenancy/add.py +19 -0
  85. svc_infra/api/fastapi/tenancy/context.py +112 -0
  86. svc_infra/api/fastapi/versioned.py +101 -0
  87. svc_infra/app/README.md +5 -5
  88. svc_infra/app/__init__.py +3 -1
  89. svc_infra/app/env.py +69 -1
  90. svc_infra/app/logging/add.py +9 -2
  91. svc_infra/app/logging/formats.py +12 -5
  92. svc_infra/billing/__init__.py +23 -0
  93. svc_infra/billing/async_service.py +147 -0
  94. svc_infra/billing/jobs.py +241 -0
  95. svc_infra/billing/models.py +177 -0
  96. svc_infra/billing/quotas.py +103 -0
  97. svc_infra/billing/schemas.py +36 -0
  98. svc_infra/billing/service.py +123 -0
  99. svc_infra/bundled_docs/README.md +5 -0
  100. svc_infra/bundled_docs/__init__.py +1 -0
  101. svc_infra/bundled_docs/getting-started.md +6 -0
  102. svc_infra/cache/__init__.py +9 -0
  103. svc_infra/cache/add.py +170 -0
  104. svc_infra/cache/backend.py +7 -6
  105. svc_infra/cache/decorators.py +81 -15
  106. svc_infra/cache/demo.py +2 -2
  107. svc_infra/cache/keys.py +24 -4
  108. svc_infra/cache/recache.py +26 -14
  109. svc_infra/cache/resources.py +14 -5
  110. svc_infra/cache/tags.py +19 -44
  111. svc_infra/cache/utils.py +3 -1
  112. svc_infra/cli/__init__.py +52 -8
  113. svc_infra/cli/__main__.py +4 -0
  114. svc_infra/cli/cmds/__init__.py +39 -2
  115. svc_infra/cli/cmds/db/nosql/mongo/mongo_cmds.py +7 -4
  116. svc_infra/cli/cmds/db/nosql/mongo/mongo_scaffold_cmds.py +7 -5
  117. svc_infra/cli/cmds/db/ops_cmds.py +270 -0
  118. svc_infra/cli/cmds/db/sql/alembic_cmds.py +103 -18
  119. svc_infra/cli/cmds/db/sql/sql_export_cmds.py +88 -0
  120. svc_infra/cli/cmds/db/sql/sql_scaffold_cmds.py +3 -3
  121. svc_infra/cli/cmds/docs/docs_cmds.py +142 -0
  122. svc_infra/cli/cmds/dx/__init__.py +12 -0
  123. svc_infra/cli/cmds/dx/dx_cmds.py +116 -0
  124. svc_infra/cli/cmds/health/__init__.py +179 -0
  125. svc_infra/cli/cmds/health/health_cmds.py +8 -0
  126. svc_infra/cli/cmds/help.py +4 -0
  127. svc_infra/cli/cmds/jobs/__init__.py +1 -0
  128. svc_infra/cli/cmds/jobs/jobs_cmds.py +47 -0
  129. svc_infra/cli/cmds/obs/obs_cmds.py +36 -15
  130. svc_infra/cli/cmds/sdk/__init__.py +0 -0
  131. svc_infra/cli/cmds/sdk/sdk_cmds.py +112 -0
  132. svc_infra/cli/foundation/runner.py +6 -2
  133. svc_infra/data/add.py +61 -0
  134. svc_infra/data/backup.py +58 -0
  135. svc_infra/data/erasure.py +45 -0
  136. svc_infra/data/fixtures.py +42 -0
  137. svc_infra/data/retention.py +61 -0
  138. svc_infra/db/__init__.py +15 -0
  139. svc_infra/db/crud_schema.py +9 -9
  140. svc_infra/db/inbox.py +67 -0
  141. svc_infra/db/nosql/__init__.py +3 -0
  142. svc_infra/db/nosql/core.py +30 -9
  143. svc_infra/db/nosql/indexes.py +3 -1
  144. svc_infra/db/nosql/management.py +1 -1
  145. svc_infra/db/nosql/mongo/README.md +13 -13
  146. svc_infra/db/nosql/mongo/client.py +19 -2
  147. svc_infra/db/nosql/mongo/settings.py +6 -2
  148. svc_infra/db/nosql/repository.py +35 -15
  149. svc_infra/db/nosql/resource.py +20 -3
  150. svc_infra/db/nosql/scaffold.py +9 -3
  151. svc_infra/db/nosql/service.py +3 -1
  152. svc_infra/db/nosql/types.py +6 -2
  153. svc_infra/db/ops.py +384 -0
  154. svc_infra/db/outbox.py +108 -0
  155. svc_infra/db/sql/apikey.py +37 -9
  156. svc_infra/db/sql/authref.py +9 -3
  157. svc_infra/db/sql/constants.py +12 -8
  158. svc_infra/db/sql/core.py +2 -2
  159. svc_infra/db/sql/management.py +11 -8
  160. svc_infra/db/sql/repository.py +99 -26
  161. svc_infra/db/sql/resource.py +5 -0
  162. svc_infra/db/sql/scaffold.py +6 -2
  163. svc_infra/db/sql/service.py +15 -5
  164. svc_infra/db/sql/templates/models_schemas/auth/models.py.tmpl +7 -56
  165. svc_infra/db/sql/templates/setup/env_async.py.tmpl +34 -12
  166. svc_infra/db/sql/templates/setup/env_sync.py.tmpl +29 -7
  167. svc_infra/db/sql/tenant.py +88 -0
  168. svc_infra/db/sql/uniq_hooks.py +9 -3
  169. svc_infra/db/sql/utils.py +138 -51
  170. svc_infra/db/sql/versioning.py +14 -0
  171. svc_infra/deploy/__init__.py +538 -0
  172. svc_infra/documents/__init__.py +100 -0
  173. svc_infra/documents/add.py +264 -0
  174. svc_infra/documents/ease.py +233 -0
  175. svc_infra/documents/models.py +114 -0
  176. svc_infra/documents/storage.py +264 -0
  177. svc_infra/dx/add.py +65 -0
  178. svc_infra/dx/changelog.py +74 -0
  179. svc_infra/dx/checks.py +68 -0
  180. svc_infra/exceptions.py +141 -0
  181. svc_infra/health/__init__.py +864 -0
  182. svc_infra/http/__init__.py +13 -0
  183. svc_infra/http/client.py +105 -0
  184. svc_infra/jobs/builtins/outbox_processor.py +40 -0
  185. svc_infra/jobs/builtins/webhook_delivery.py +95 -0
  186. svc_infra/jobs/easy.py +33 -0
  187. svc_infra/jobs/loader.py +50 -0
  188. svc_infra/jobs/queue.py +116 -0
  189. svc_infra/jobs/redis_queue.py +256 -0
  190. svc_infra/jobs/runner.py +79 -0
  191. svc_infra/jobs/scheduler.py +53 -0
  192. svc_infra/jobs/worker.py +40 -0
  193. svc_infra/loaders/__init__.py +186 -0
  194. svc_infra/loaders/base.py +142 -0
  195. svc_infra/loaders/github.py +311 -0
  196. svc_infra/loaders/models.py +147 -0
  197. svc_infra/loaders/url.py +235 -0
  198. svc_infra/logging/__init__.py +374 -0
  199. svc_infra/mcp/svc_infra_mcp.py +91 -33
  200. svc_infra/obs/README.md +2 -0
  201. svc_infra/obs/add.py +65 -9
  202. svc_infra/obs/cloud_dash.py +2 -1
  203. svc_infra/obs/grafana/dashboards/http-overview.json +45 -0
  204. svc_infra/obs/metrics/__init__.py +3 -4
  205. svc_infra/obs/metrics/asgi.py +13 -7
  206. svc_infra/obs/metrics/http.py +9 -5
  207. svc_infra/obs/metrics/sqlalchemy.py +13 -9
  208. svc_infra/obs/metrics.py +6 -5
  209. svc_infra/obs/settings.py +6 -2
  210. svc_infra/security/add.py +217 -0
  211. svc_infra/security/audit.py +92 -10
  212. svc_infra/security/audit_service.py +4 -3
  213. svc_infra/security/headers.py +15 -2
  214. svc_infra/security/hibp.py +14 -4
  215. svc_infra/security/jwt_rotation.py +74 -22
  216. svc_infra/security/lockout.py +11 -5
  217. svc_infra/security/models.py +54 -12
  218. svc_infra/security/oauth_models.py +73 -0
  219. svc_infra/security/org_invites.py +5 -3
  220. svc_infra/security/passwords.py +3 -1
  221. svc_infra/security/permissions.py +25 -2
  222. svc_infra/security/session.py +1 -1
  223. svc_infra/security/signed_cookies.py +21 -1
  224. svc_infra/storage/__init__.py +93 -0
  225. svc_infra/storage/add.py +253 -0
  226. svc_infra/storage/backends/__init__.py +11 -0
  227. svc_infra/storage/backends/local.py +339 -0
  228. svc_infra/storage/backends/memory.py +216 -0
  229. svc_infra/storage/backends/s3.py +353 -0
  230. svc_infra/storage/base.py +239 -0
  231. svc_infra/storage/easy.py +185 -0
  232. svc_infra/storage/settings.py +195 -0
  233. svc_infra/testing/__init__.py +685 -0
  234. svc_infra/utils.py +7 -3
  235. svc_infra/webhooks/__init__.py +69 -0
  236. svc_infra/webhooks/add.py +339 -0
  237. svc_infra/webhooks/encryption.py +115 -0
  238. svc_infra/webhooks/fastapi.py +39 -0
  239. svc_infra/webhooks/router.py +55 -0
  240. svc_infra/webhooks/service.py +70 -0
  241. svc_infra/webhooks/signing.py +34 -0
  242. svc_infra/websocket/__init__.py +79 -0
  243. svc_infra/websocket/add.py +140 -0
  244. svc_infra/websocket/client.py +282 -0
  245. svc_infra/websocket/config.py +69 -0
  246. svc_infra/websocket/easy.py +76 -0
  247. svc_infra/websocket/exceptions.py +61 -0
  248. svc_infra/websocket/manager.py +344 -0
  249. svc_infra/websocket/models.py +49 -0
  250. svc_infra-0.1.706.dist-info/LICENSE +21 -0
  251. svc_infra-0.1.706.dist-info/METADATA +356 -0
  252. svc_infra-0.1.706.dist-info/RECORD +357 -0
  253. svc_infra-0.1.595.dist-info/METADATA +0 -80
  254. svc_infra-0.1.595.dist-info/RECORD +0 -253
  255. {svc_infra-0.1.595.dist-info → svc_infra-0.1.706.dist-info}/WHEEL +0 -0
  256. {svc_infra-0.1.595.dist-info → svc_infra-0.1.706.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,864 @@
1
+ """Health check utilities for svc-infra applications.
2
+
3
+ This module provides comprehensive health check infrastructure for
4
+ containerized deployments, including:
5
+
6
+ - **Startup probes**: Wait for dependencies before accepting traffic
7
+ - **Readiness probes**: Check if the service can handle requests
8
+ - **Liveness probes**: Verify the service is still running
9
+ - **Dependency checks**: Built-in checks for common services
10
+
11
+ Designed for Kubernetes, Docker, and PaaS deployments where proper
12
+ health probes prevent routing traffic to unhealthy instances.
13
+
14
+ Example:
15
+ >>> from svc_infra.health import (
16
+ ... HealthRegistry,
17
+ ... check_database,
18
+ ... check_redis,
19
+ ... check_url,
20
+ ... add_health_routes,
21
+ ... )
22
+ >>>
23
+ >>> # Create registry with checks
24
+ >>> registry = HealthRegistry()
25
+ >>> registry.add("database", check_database(os.getenv("DATABASE_URL")))
26
+ >>> registry.add("redis", check_redis(os.getenv("REDIS_URL")))
27
+ >>> registry.add("api", check_url("http://api-service:8080/health"))
28
+ >>>
29
+ >>> # Add to FastAPI app
30
+ >>> add_health_routes(app, registry)
31
+ >>>
32
+ >>> # Or wait for dependencies at startup
33
+ >>> await registry.wait_until_healthy(timeout=60, interval=2)
34
+ """
35
+
36
+ from __future__ import annotations
37
+
38
+ import asyncio
39
+ import time
40
+ from dataclasses import dataclass, field
41
+ from enum import StrEnum
42
+ from typing import Any, Awaitable, Callable, Optional
43
+
44
+ import httpx
45
+
46
+
47
+ class HealthStatus(StrEnum):
48
+ """Health check status values."""
49
+
50
+ HEALTHY = "healthy"
51
+ UNHEALTHY = "unhealthy"
52
+ DEGRADED = "degraded" # Partially working
53
+ UNKNOWN = "unknown" # Check hasn't run yet
54
+
55
+
56
+ @dataclass
57
+ class HealthCheckResult:
58
+ """Result of a single health check."""
59
+
60
+ name: str
61
+ status: HealthStatus
62
+ latency_ms: float
63
+ message: Optional[str] = None
64
+ details: Optional[dict[str, Any]] = None
65
+
66
+ def to_dict(self) -> dict[str, Any]:
67
+ """Convert to dictionary for JSON serialization."""
68
+ result: dict[str, Any] = {
69
+ "name": self.name,
70
+ "status": self.status,
71
+ "latency_ms": round(self.latency_ms, 2),
72
+ }
73
+ if self.message:
74
+ result["message"] = self.message
75
+ if self.details:
76
+ result["details"] = self.details
77
+ return result
78
+
79
+
80
+ # Type alias for health check functions
81
+ HealthCheckFn = Callable[[], Awaitable[HealthCheckResult]]
82
+
83
+
84
+ @dataclass
85
+ class HealthCheck:
86
+ """Registered health check with metadata."""
87
+
88
+ name: str
89
+ check_fn: HealthCheckFn
90
+ critical: bool = True # If False, failure doesn't fail overall health
91
+ timeout: float = 5.0 # Timeout in seconds
92
+
93
+
94
+ class HealthRegistry:
95
+ """Registry of health checks for a service.
96
+
97
+ The registry manages multiple health checks and provides methods to:
98
+ - Run all checks and aggregate results
99
+ - Wait for all critical checks to pass (startup probe)
100
+ - Determine overall service health
101
+
102
+ Example:
103
+ >>> registry = HealthRegistry()
104
+ >>> registry.add("database", check_database(db_url), critical=True)
105
+ >>> registry.add("cache", check_redis(redis_url), critical=False)
106
+ >>>
107
+ >>> # Run all checks
108
+ >>> result = await registry.check_all()
109
+ >>> print(result.status) # "healthy" or "unhealthy"
110
+ >>>
111
+ >>> # Wait for startup
112
+ >>> await registry.wait_until_healthy(timeout=60)
113
+ """
114
+
115
+ def __init__(self) -> None:
116
+ """Initialize empty health registry."""
117
+ self._checks: dict[str, HealthCheck] = {}
118
+
119
+ def add(
120
+ self,
121
+ name: str,
122
+ check_fn: HealthCheckFn,
123
+ *,
124
+ critical: bool = True,
125
+ timeout: float = 5.0,
126
+ ) -> None:
127
+ """
128
+ Register a health check.
129
+
130
+ Args:
131
+ name: Unique name for this check (e.g., "database", "redis")
132
+ check_fn: Async function that returns HealthCheckResult
133
+ critical: If True, failure means service is unhealthy
134
+ timeout: Maximum time to wait for this check (seconds)
135
+
136
+ Raises:
137
+ ValueError: If a check with this name already exists
138
+ """
139
+ if name in self._checks:
140
+ raise ValueError(f"Health check '{name}' already registered")
141
+ self._checks[name] = HealthCheck(
142
+ name=name,
143
+ check_fn=check_fn,
144
+ critical=critical,
145
+ timeout=timeout,
146
+ )
147
+
148
+ def remove(self, name: str) -> bool:
149
+ """
150
+ Remove a health check by name.
151
+
152
+ Args:
153
+ name: Name of the check to remove
154
+
155
+ Returns:
156
+ True if check was removed, False if not found
157
+ """
158
+ if name in self._checks:
159
+ del self._checks[name]
160
+ return True
161
+ return False
162
+
163
+ def clear(self) -> None:
164
+ """Remove all registered health checks."""
165
+ self._checks.clear()
166
+
167
+ @property
168
+ def checks(self) -> list[HealthCheck]:
169
+ """Get list of all registered checks."""
170
+ return list(self._checks.values())
171
+
172
+ async def check_one(self, name: str) -> HealthCheckResult:
173
+ """
174
+ Run a single health check by name.
175
+
176
+ Args:
177
+ name: Name of the check to run
178
+
179
+ Returns:
180
+ HealthCheckResult for the check
181
+
182
+ Raises:
183
+ KeyError: If check not found
184
+ """
185
+ if name not in self._checks:
186
+ raise KeyError(f"Health check '{name}' not found")
187
+
188
+ check = self._checks[name]
189
+ start = time.perf_counter()
190
+
191
+ try:
192
+ result = await asyncio.wait_for(check.check_fn(), timeout=check.timeout)
193
+ # Update latency from our timing
194
+ result.latency_ms = (time.perf_counter() - start) * 1000
195
+ return result
196
+ except asyncio.TimeoutError:
197
+ return HealthCheckResult(
198
+ name=name,
199
+ status=HealthStatus.UNHEALTHY,
200
+ latency_ms=(time.perf_counter() - start) * 1000,
201
+ message=f"Check timed out after {check.timeout}s",
202
+ )
203
+ except Exception as e:
204
+ return HealthCheckResult(
205
+ name=name,
206
+ status=HealthStatus.UNHEALTHY,
207
+ latency_ms=(time.perf_counter() - start) * 1000,
208
+ message=str(e),
209
+ )
210
+
211
+ async def check_all(self) -> "AggregatedHealthResult":
212
+ """
213
+ Run all registered health checks concurrently.
214
+
215
+ Returns:
216
+ AggregatedHealthResult with overall status and individual results
217
+ """
218
+ if not self._checks:
219
+ return AggregatedHealthResult(
220
+ status=HealthStatus.HEALTHY,
221
+ checks=[],
222
+ message="No health checks registered",
223
+ )
224
+
225
+ # Run all checks concurrently
226
+ check_names = list(self._checks.keys())
227
+ results = await asyncio.gather(
228
+ *[self.check_one(name) for name in check_names],
229
+ return_exceptions=False,
230
+ )
231
+
232
+ # Determine overall status
233
+ # - All critical checks must pass for HEALTHY
234
+ # - If any critical check fails, UNHEALTHY
235
+ # - If only non-critical checks fail, DEGRADED
236
+ critical_failed = False
237
+ non_critical_failed = False
238
+
239
+ for registered_name, result in zip(check_names, results):
240
+ check = self._checks.get(registered_name)
241
+ if result.status == HealthStatus.UNHEALTHY:
242
+ if check and check.critical:
243
+ critical_failed = True
244
+ else:
245
+ non_critical_failed = True
246
+
247
+ if critical_failed:
248
+ overall_status = HealthStatus.UNHEALTHY
249
+ elif non_critical_failed:
250
+ overall_status = HealthStatus.DEGRADED
251
+ else:
252
+ overall_status = HealthStatus.HEALTHY
253
+
254
+ return AggregatedHealthResult(
255
+ status=overall_status,
256
+ checks=results,
257
+ )
258
+
259
+ async def wait_until_healthy(
260
+ self,
261
+ *,
262
+ timeout: float = 60.0,
263
+ interval: float = 2.0,
264
+ check_names: Optional[list[str]] = None,
265
+ ) -> bool:
266
+ """
267
+ Wait until all (or specified) critical checks pass.
268
+
269
+ Useful for startup scripts to wait for dependencies before
270
+ the main application starts accepting traffic.
271
+
272
+ Args:
273
+ timeout: Maximum time to wait (seconds)
274
+ interval: Time between check attempts (seconds)
275
+ check_names: Specific checks to wait for (None = all critical)
276
+
277
+ Returns:
278
+ True if all checks passed, False if timeout reached
279
+
280
+ Example:
281
+ >>> # Wait up to 60 seconds for database
282
+ >>> if not await registry.wait_until_healthy(timeout=60):
283
+ ... raise RuntimeError("Dependencies not ready")
284
+ """
285
+ deadline = time.monotonic() + timeout
286
+
287
+ while time.monotonic() < deadline:
288
+ if check_names:
289
+ # Check specific checks
290
+ all_healthy = True
291
+ for name in check_names:
292
+ try:
293
+ check_result = await self.check_one(name)
294
+ if check_result.status == HealthStatus.UNHEALTHY:
295
+ all_healthy = False
296
+ break
297
+ except KeyError:
298
+ all_healthy = False
299
+ break
300
+ else:
301
+ # Check all critical checks
302
+ agg_result = await self.check_all()
303
+ all_healthy = agg_result.status in (
304
+ HealthStatus.HEALTHY,
305
+ HealthStatus.DEGRADED,
306
+ )
307
+
308
+ if all_healthy:
309
+ return True
310
+
311
+ # Wait before next attempt
312
+ remaining = deadline - time.monotonic()
313
+ await asyncio.sleep(min(interval, max(0, remaining)))
314
+
315
+ return False
316
+
317
+
318
+ @dataclass
319
+ class AggregatedHealthResult:
320
+ """Aggregated result from multiple health checks."""
321
+
322
+ status: HealthStatus
323
+ checks: list[HealthCheckResult] = field(default_factory=list)
324
+ message: Optional[str] = None
325
+
326
+ def to_dict(self) -> dict[str, Any]:
327
+ """Convert to dictionary for JSON serialization."""
328
+ result: dict[str, Any] = {
329
+ "status": self.status,
330
+ "checks": [c.to_dict() for c in self.checks],
331
+ }
332
+ if self.message:
333
+ result["message"] = self.message
334
+ return result
335
+
336
+
337
+ # =============================================================================
338
+ # Built-in Health Check Functions
339
+ # =============================================================================
340
+
341
+
342
+ def check_database(url: Optional[str]) -> HealthCheckFn:
343
+ """
344
+ Create a health check for a PostgreSQL database.
345
+
346
+ Uses a simple "SELECT 1" query to verify connectivity.
347
+
348
+ Args:
349
+ url: Database URL (postgres:// or postgresql://)
350
+
351
+ Returns:
352
+ Async health check function
353
+
354
+ Example:
355
+ >>> check = check_database(os.getenv("DATABASE_URL"))
356
+ >>> registry.add("database", check, critical=True)
357
+ """
358
+
359
+ async def _check() -> HealthCheckResult:
360
+ if not url:
361
+ return HealthCheckResult(
362
+ name="database",
363
+ status=HealthStatus.UNHEALTHY,
364
+ latency_ms=0,
365
+ message="DATABASE_URL not configured",
366
+ )
367
+
368
+ start = time.perf_counter()
369
+ try:
370
+ # Use asyncpg directly for lightweight check
371
+ import asyncpg
372
+
373
+ # Normalize URL for asyncpg
374
+ db_url = url
375
+ if db_url.startswith("postgres://"):
376
+ db_url = db_url.replace("postgres://", "postgresql://", 1)
377
+ if "+asyncpg" in db_url:
378
+ db_url = db_url.replace("+asyncpg", "")
379
+
380
+ conn = await asyncio.wait_for(
381
+ asyncpg.connect(db_url),
382
+ timeout=5.0,
383
+ )
384
+ try:
385
+ await conn.fetchval("SELECT 1")
386
+ finally:
387
+ await conn.close()
388
+
389
+ return HealthCheckResult(
390
+ name="database",
391
+ status=HealthStatus.HEALTHY,
392
+ latency_ms=(time.perf_counter() - start) * 1000,
393
+ )
394
+ except asyncio.TimeoutError:
395
+ return HealthCheckResult(
396
+ name="database",
397
+ status=HealthStatus.UNHEALTHY,
398
+ latency_ms=(time.perf_counter() - start) * 1000,
399
+ message="Connection timeout",
400
+ )
401
+ except ImportError:
402
+ # asyncpg not installed, try with httpx to a hypothetical health endpoint
403
+ return HealthCheckResult(
404
+ name="database",
405
+ status=HealthStatus.UNKNOWN,
406
+ latency_ms=0,
407
+ message="asyncpg not installed",
408
+ )
409
+ except Exception as e:
410
+ return HealthCheckResult(
411
+ name="database",
412
+ status=HealthStatus.UNHEALTHY,
413
+ latency_ms=(time.perf_counter() - start) * 1000,
414
+ message=str(e),
415
+ )
416
+
417
+ return _check
418
+
419
+
420
+ def check_redis(url: Optional[str]) -> HealthCheckFn:
421
+ """
422
+ Create a health check for Redis.
423
+
424
+ Uses PING command to verify connectivity.
425
+
426
+ Args:
427
+ url: Redis URL (redis://)
428
+
429
+ Returns:
430
+ Async health check function
431
+
432
+ Example:
433
+ >>> check = check_redis(os.getenv("REDIS_URL"))
434
+ >>> registry.add("redis", check, critical=False)
435
+ """
436
+
437
+ async def _check() -> HealthCheckResult:
438
+ if not url:
439
+ return HealthCheckResult(
440
+ name="redis",
441
+ status=HealthStatus.UNHEALTHY,
442
+ latency_ms=0,
443
+ message="REDIS_URL not configured",
444
+ )
445
+
446
+ start = time.perf_counter()
447
+ try:
448
+ import redis.asyncio as redis_async
449
+
450
+ client = redis_async.from_url(url, socket_connect_timeout=5.0)
451
+ try:
452
+ pong = await asyncio.wait_for(client.ping(), timeout=5.0)
453
+ if pong:
454
+ return HealthCheckResult(
455
+ name="redis",
456
+ status=HealthStatus.HEALTHY,
457
+ latency_ms=(time.perf_counter() - start) * 1000,
458
+ )
459
+ else:
460
+ return HealthCheckResult(
461
+ name="redis",
462
+ status=HealthStatus.UNHEALTHY,
463
+ latency_ms=(time.perf_counter() - start) * 1000,
464
+ message="PING returned False",
465
+ )
466
+ finally:
467
+ await client.aclose()
468
+ except asyncio.TimeoutError:
469
+ return HealthCheckResult(
470
+ name="redis",
471
+ status=HealthStatus.UNHEALTHY,
472
+ latency_ms=(time.perf_counter() - start) * 1000,
473
+ message="Connection timeout",
474
+ )
475
+ except ImportError:
476
+ return HealthCheckResult(
477
+ name="redis",
478
+ status=HealthStatus.UNKNOWN,
479
+ latency_ms=0,
480
+ message="redis-py not installed",
481
+ )
482
+ except Exception as e:
483
+ return HealthCheckResult(
484
+ name="redis",
485
+ status=HealthStatus.UNHEALTHY,
486
+ latency_ms=(time.perf_counter() - start) * 1000,
487
+ message=str(e),
488
+ )
489
+
490
+ return _check
491
+
492
+
493
+ def check_url(
494
+ url: str,
495
+ *,
496
+ method: str = "GET",
497
+ expected_status: int = 200,
498
+ timeout: float = 5.0,
499
+ headers: Optional[dict[str, str]] = None,
500
+ ) -> HealthCheckFn:
501
+ """
502
+ Create a health check for an HTTP endpoint.
503
+
504
+ Args:
505
+ url: URL to check
506
+ method: HTTP method (default: GET)
507
+ expected_status: Expected HTTP status code (default: 200)
508
+ timeout: Request timeout in seconds
509
+ headers: Optional headers to include
510
+
511
+ Returns:
512
+ Async health check function
513
+
514
+ Example:
515
+ >>> check = check_url("http://api:8080/health")
516
+ >>> registry.add("api", check)
517
+ >>>
518
+ >>> # With custom options
519
+ >>> check = check_url(
520
+ ... "http://service:8080/ready",
521
+ ... expected_status=204,
522
+ ... headers={"Authorization": "Bearer token"},
523
+ ... )
524
+ """
525
+ # Extract name from URL for the result
526
+ try:
527
+ from urllib.parse import urlparse
528
+
529
+ parsed = urlparse(url)
530
+ name = parsed.netloc.split(":")[0]
531
+ except Exception:
532
+ name = "http"
533
+
534
+ async def _check() -> HealthCheckResult:
535
+ start = time.perf_counter()
536
+ try:
537
+ async with httpx.AsyncClient(timeout=timeout) as client:
538
+ response = await client.request(
539
+ method=method,
540
+ url=url,
541
+ headers=headers,
542
+ )
543
+
544
+ if response.status_code == expected_status:
545
+ return HealthCheckResult(
546
+ name=name,
547
+ status=HealthStatus.HEALTHY,
548
+ latency_ms=(time.perf_counter() - start) * 1000,
549
+ details={"status_code": response.status_code},
550
+ )
551
+ else:
552
+ return HealthCheckResult(
553
+ name=name,
554
+ status=HealthStatus.UNHEALTHY,
555
+ latency_ms=(time.perf_counter() - start) * 1000,
556
+ message=f"Expected status {expected_status}, got {response.status_code}",
557
+ details={"status_code": response.status_code},
558
+ )
559
+ except httpx.TimeoutException:
560
+ return HealthCheckResult(
561
+ name=name,
562
+ status=HealthStatus.UNHEALTHY,
563
+ latency_ms=(time.perf_counter() - start) * 1000,
564
+ message=f"Request timeout after {timeout}s",
565
+ )
566
+ except httpx.ConnectError as e:
567
+ return HealthCheckResult(
568
+ name=name,
569
+ status=HealthStatus.UNHEALTHY,
570
+ latency_ms=(time.perf_counter() - start) * 1000,
571
+ message=f"Connection failed: {e}",
572
+ )
573
+ except Exception as e:
574
+ return HealthCheckResult(
575
+ name=name,
576
+ status=HealthStatus.UNHEALTHY,
577
+ latency_ms=(time.perf_counter() - start) * 1000,
578
+ message=str(e),
579
+ )
580
+
581
+ return _check
582
+
583
+
584
+ def check_tcp(
585
+ host: str,
586
+ port: int,
587
+ *,
588
+ timeout: float = 5.0,
589
+ ) -> HealthCheckFn:
590
+ """
591
+ Create a health check for a TCP port.
592
+
593
+ Useful for checking if a service is listening on a port
594
+ without needing protocol-specific logic.
595
+
596
+ Args:
597
+ host: Hostname or IP address
598
+ port: Port number
599
+ timeout: Connection timeout in seconds
600
+
601
+ Returns:
602
+ Async health check function
603
+
604
+ Example:
605
+ >>> check = check_tcp("database", 5432)
606
+ >>> registry.add("postgres-port", check)
607
+ """
608
+ name = f"{host}:{port}"
609
+
610
+ async def _check() -> HealthCheckResult:
611
+ start = time.perf_counter()
612
+ try:
613
+ _, writer = await asyncio.wait_for(
614
+ asyncio.open_connection(host, port),
615
+ timeout=timeout,
616
+ )
617
+ writer.close()
618
+ await writer.wait_closed()
619
+
620
+ return HealthCheckResult(
621
+ name=name,
622
+ status=HealthStatus.HEALTHY,
623
+ latency_ms=(time.perf_counter() - start) * 1000,
624
+ )
625
+ except asyncio.TimeoutError:
626
+ return HealthCheckResult(
627
+ name=name,
628
+ status=HealthStatus.UNHEALTHY,
629
+ latency_ms=(time.perf_counter() - start) * 1000,
630
+ message=f"Connection timeout after {timeout}s",
631
+ )
632
+ except OSError as e:
633
+ return HealthCheckResult(
634
+ name=name,
635
+ status=HealthStatus.UNHEALTHY,
636
+ latency_ms=(time.perf_counter() - start) * 1000,
637
+ message=str(e),
638
+ )
639
+
640
+ return _check
641
+
642
+
643
+ # =============================================================================
644
+ # FastAPI Integration
645
+ # =============================================================================
646
+
647
+
648
+ def add_health_routes(
649
+ app: Any, # FastAPI
650
+ registry: HealthRegistry,
651
+ *,
652
+ prefix: str = "/_health",
653
+ include_in_schema: bool = False,
654
+ detailed_on_failure: bool = True,
655
+ ) -> None:
656
+ """
657
+ Add health check routes to a FastAPI application.
658
+
659
+ Creates three endpoints:
660
+ - `/_health/live` - Liveness probe (always returns 200)
661
+ - `/_health/ready` - Readiness probe (runs all checks)
662
+ - `/_health/startup` - Startup probe (runs critical checks)
663
+
664
+ Args:
665
+ app: FastAPI application instance
666
+ registry: HealthRegistry with registered checks
667
+ prefix: URL prefix for health routes
668
+ include_in_schema: Include in OpenAPI schema
669
+ detailed_on_failure: Include check details in error responses
670
+
671
+ Example:
672
+ >>> from fastapi import FastAPI
673
+ >>> from svc_infra.health import HealthRegistry, check_database, add_health_routes
674
+ >>>
675
+ >>> app = FastAPI()
676
+ >>> registry = HealthRegistry()
677
+ >>> registry.add("database", check_database(os.getenv("DATABASE_URL")))
678
+ >>> add_health_routes(app, registry)
679
+ """
680
+ from starlette.responses import JSONResponse
681
+
682
+ from svc_infra.api.fastapi.dual.public import public_router
683
+
684
+ router = public_router(
685
+ prefix=prefix,
686
+ tags=["health"],
687
+ include_in_schema=include_in_schema,
688
+ )
689
+
690
+ @router.get("/live")
691
+ async def liveness() -> JSONResponse:
692
+ """Liveness probe - always returns 200 if process is running."""
693
+ return JSONResponse({"status": "ok"})
694
+
695
+ @router.get("/ready")
696
+ async def readiness() -> JSONResponse:
697
+ """Readiness probe - checks all dependencies."""
698
+ result = await registry.check_all()
699
+
700
+ if result.status == HealthStatus.HEALTHY:
701
+ return JSONResponse(result.to_dict(), status_code=200)
702
+ elif result.status == HealthStatus.DEGRADED:
703
+ # Degraded is still ready, but indicate the issue
704
+ return JSONResponse(result.to_dict(), status_code=200)
705
+ else:
706
+ if detailed_on_failure:
707
+ return JSONResponse(result.to_dict(), status_code=503)
708
+ else:
709
+ return JSONResponse({"status": "unhealthy"}, status_code=503)
710
+
711
+ @router.get("/startup")
712
+ async def startup() -> JSONResponse:
713
+ """Startup probe - checks critical dependencies only."""
714
+ result = await registry.check_all()
715
+
716
+ # For startup, only critical checks matter
717
+ critical_healthy = result.status in (
718
+ HealthStatus.HEALTHY,
719
+ HealthStatus.DEGRADED,
720
+ )
721
+
722
+ if critical_healthy:
723
+ return JSONResponse({"status": "ok"}, status_code=200)
724
+ else:
725
+ if detailed_on_failure:
726
+ return JSONResponse(result.to_dict(), status_code=503)
727
+ else:
728
+ return JSONResponse({"status": "unhealthy"}, status_code=503)
729
+
730
+ @router.get("/checks/{name}")
731
+ async def check_single(name: str) -> JSONResponse:
732
+ """Run a single health check by name."""
733
+ try:
734
+ result = await registry.check_one(name)
735
+ status_code = 200 if result.status == HealthStatus.HEALTHY else 503
736
+ return JSONResponse(result.to_dict(), status_code=status_code)
737
+ except KeyError:
738
+ return JSONResponse(
739
+ {"error": f"Health check '{name}' not found"},
740
+ status_code=404,
741
+ )
742
+
743
+ app.include_router(router)
744
+
745
+
746
+ def add_startup_probe(
747
+ app: Any, # FastAPI
748
+ checks: list[HealthCheckFn],
749
+ *,
750
+ timeout: float = 60.0,
751
+ interval: float = 2.0,
752
+ ) -> None:
753
+ """
754
+ Add a startup event that waits for dependencies.
755
+
756
+ This is useful for ensuring the database, cache, and other
757
+ dependencies are ready before the application starts accepting traffic.
758
+
759
+ Args:
760
+ app: FastAPI application instance
761
+ checks: List of health check functions to wait for
762
+ timeout: Maximum time to wait for all checks (seconds)
763
+ interval: Time between check attempts (seconds)
764
+
765
+ Raises:
766
+ RuntimeError: If dependencies aren't ready within timeout
767
+
768
+ Example:
769
+ >>> from fastapi import FastAPI
770
+ >>> from svc_infra.health import check_database, check_redis, add_startup_probe
771
+ >>>
772
+ >>> app = FastAPI()
773
+ >>> add_startup_probe(
774
+ ... app,
775
+ ... checks=[
776
+ ... check_database(os.getenv("DATABASE_URL")),
777
+ ... check_redis(os.getenv("REDIS_URL")),
778
+ ... ],
779
+ ... timeout=60,
780
+ ... )
781
+ """
782
+ registry = HealthRegistry()
783
+ for i, check in enumerate(checks):
784
+ registry.add(f"startup_{i}", check, critical=True)
785
+
786
+ @app.on_event("startup")
787
+ async def _wait_for_dependencies() -> None:
788
+ import logging
789
+
790
+ logger = logging.getLogger("svc_infra.health")
791
+ logger.info(f"Waiting for {len(checks)} dependencies (timeout={timeout}s)...")
792
+
793
+ if await registry.wait_until_healthy(timeout=timeout, interval=interval):
794
+ logger.info("All dependencies ready")
795
+ else:
796
+ # Log which checks failed
797
+ result = await registry.check_all()
798
+ failed = [
799
+ c.name for c in result.checks if c.status == HealthStatus.UNHEALTHY
800
+ ]
801
+ error_msg = f"Dependencies not ready after {timeout}s: {failed}"
802
+ logger.error(error_msg)
803
+ raise RuntimeError(error_msg)
804
+
805
+
806
+ def add_dependency_health(
807
+ app: Any, # FastAPI
808
+ name: str,
809
+ check_fn: HealthCheckFn,
810
+ *,
811
+ critical: bool = True,
812
+ ) -> None:
813
+ """
814
+ Register a dependency health check on an existing app.
815
+
816
+ This adds the check to the app's health registry if one exists,
817
+ or creates a new one.
818
+
819
+ Args:
820
+ app: FastAPI application instance
821
+ name: Name for the health check
822
+ check_fn: Async function that returns HealthCheckResult
823
+ critical: Whether failure means service is unhealthy
824
+
825
+ Example:
826
+ >>> # Add checks incrementally
827
+ >>> add_dependency_health(app, "database", check_database(db_url))
828
+ >>> add_dependency_health(app, "cache", check_redis(redis_url), critical=False)
829
+ """
830
+ # Get or create registry on app state
831
+ if not hasattr(app, "state"):
832
+ raise ValueError("App must have a 'state' attribute (FastAPI/Starlette)")
833
+
834
+ if not hasattr(app.state, "_health_registry"):
835
+ app.state._health_registry = HealthRegistry()
836
+ # Add routes for the registry
837
+ add_health_routes(app, app.state._health_registry)
838
+
839
+ app.state._health_registry.add(name, check_fn, critical=critical)
840
+
841
+
842
+ # =============================================================================
843
+ # Exports
844
+ # =============================================================================
845
+
846
+ __all__ = [
847
+ # Status types
848
+ "HealthStatus",
849
+ "HealthCheckResult",
850
+ "HealthCheck",
851
+ "HealthCheckFn",
852
+ "AggregatedHealthResult",
853
+ # Registry
854
+ "HealthRegistry",
855
+ # Built-in checks
856
+ "check_database",
857
+ "check_redis",
858
+ "check_url",
859
+ "check_tcp",
860
+ # FastAPI integration
861
+ "add_health_routes",
862
+ "add_startup_probe",
863
+ "add_dependency_health",
864
+ ]