svc-infra 0.1.600__py3-none-any.whl → 0.1.640__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of svc-infra might be problematic. Click here for more details.

Files changed (118) hide show
  1. svc_infra/api/fastapi/admin/__init__.py +3 -0
  2. svc_infra/api/fastapi/admin/add.py +231 -0
  3. svc_infra/api/fastapi/billing/router.py +64 -0
  4. svc_infra/api/fastapi/billing/setup.py +19 -0
  5. svc_infra/api/fastapi/db/sql/add.py +32 -13
  6. svc_infra/api/fastapi/db/sql/crud_router.py +178 -16
  7. svc_infra/api/fastapi/db/sql/session.py +16 -0
  8. svc_infra/api/fastapi/dependencies/ratelimit.py +57 -7
  9. svc_infra/api/fastapi/docs/add.py +160 -0
  10. svc_infra/api/fastapi/docs/landing.py +1 -1
  11. svc_infra/api/fastapi/middleware/errors/handlers.py +45 -7
  12. svc_infra/api/fastapi/middleware/graceful_shutdown.py +87 -0
  13. svc_infra/api/fastapi/middleware/ratelimit.py +59 -1
  14. svc_infra/api/fastapi/middleware/ratelimit_store.py +12 -6
  15. svc_infra/api/fastapi/middleware/timeout.py +148 -0
  16. svc_infra/api/fastapi/openapi/mutators.py +114 -0
  17. svc_infra/api/fastapi/ops/add.py +73 -0
  18. svc_infra/api/fastapi/pagination.py +3 -1
  19. svc_infra/api/fastapi/routers/ping.py +1 -0
  20. svc_infra/api/fastapi/setup.py +11 -1
  21. svc_infra/api/fastapi/tenancy/add.py +19 -0
  22. svc_infra/api/fastapi/tenancy/context.py +112 -0
  23. svc_infra/app/README.md +5 -5
  24. svc_infra/billing/__init__.py +23 -0
  25. svc_infra/billing/async_service.py +147 -0
  26. svc_infra/billing/jobs.py +230 -0
  27. svc_infra/billing/models.py +131 -0
  28. svc_infra/billing/quotas.py +101 -0
  29. svc_infra/billing/schemas.py +33 -0
  30. svc_infra/billing/service.py +115 -0
  31. svc_infra/bundled_docs/README.md +5 -0
  32. svc_infra/bundled_docs/__init__.py +1 -0
  33. svc_infra/bundled_docs/getting-started.md +6 -0
  34. svc_infra/cache/__init__.py +4 -0
  35. svc_infra/cache/add.py +158 -0
  36. svc_infra/cache/backend.py +5 -2
  37. svc_infra/cache/decorators.py +19 -1
  38. svc_infra/cache/keys.py +24 -4
  39. svc_infra/cli/__init__.py +28 -8
  40. svc_infra/cli/cmds/__init__.py +8 -0
  41. svc_infra/cli/cmds/db/nosql/mongo/mongo_cmds.py +4 -3
  42. svc_infra/cli/cmds/db/nosql/mongo/mongo_scaffold_cmds.py +4 -4
  43. svc_infra/cli/cmds/db/sql/alembic_cmds.py +80 -11
  44. svc_infra/cli/cmds/db/sql/sql_export_cmds.py +80 -0
  45. svc_infra/cli/cmds/db/sql/sql_scaffold_cmds.py +3 -3
  46. svc_infra/cli/cmds/docs/docs_cmds.py +140 -0
  47. svc_infra/cli/cmds/dx/__init__.py +12 -0
  48. svc_infra/cli/cmds/dx/dx_cmds.py +99 -0
  49. svc_infra/cli/cmds/help.py +4 -0
  50. svc_infra/cli/cmds/obs/obs_cmds.py +4 -3
  51. svc_infra/cli/cmds/sdk/__init__.py +0 -0
  52. svc_infra/cli/cmds/sdk/sdk_cmds.py +102 -0
  53. svc_infra/data/add.py +61 -0
  54. svc_infra/data/backup.py +53 -0
  55. svc_infra/data/erasure.py +45 -0
  56. svc_infra/data/fixtures.py +40 -0
  57. svc_infra/data/retention.py +55 -0
  58. svc_infra/db/nosql/mongo/README.md +13 -13
  59. svc_infra/db/sql/repository.py +51 -11
  60. svc_infra/db/sql/resource.py +5 -0
  61. svc_infra/db/sql/templates/setup/env_async.py.tmpl +9 -1
  62. svc_infra/db/sql/templates/setup/env_sync.py.tmpl +9 -2
  63. svc_infra/db/sql/tenant.py +79 -0
  64. svc_infra/db/sql/utils.py +18 -4
  65. svc_infra/docs/acceptance-matrix.md +71 -0
  66. svc_infra/docs/acceptance.md +44 -0
  67. svc_infra/docs/admin.md +425 -0
  68. svc_infra/docs/adr/0002-background-jobs-and-scheduling.md +40 -0
  69. svc_infra/docs/adr/0003-webhooks-framework.md +24 -0
  70. svc_infra/docs/adr/0004-tenancy-model.md +42 -0
  71. svc_infra/docs/adr/0005-data-lifecycle.md +86 -0
  72. svc_infra/docs/adr/0006-ops-slos-and-metrics.md +47 -0
  73. svc_infra/docs/adr/0007-docs-and-sdks.md +83 -0
  74. svc_infra/docs/adr/0008-billing-primitives.md +143 -0
  75. svc_infra/docs/adr/0009-acceptance-harness.md +40 -0
  76. svc_infra/docs/adr/0010-timeouts-and-resource-limits.md +54 -0
  77. svc_infra/docs/adr/0011-admin-scope-and-impersonation.md +73 -0
  78. svc_infra/docs/api.md +59 -0
  79. svc_infra/docs/auth.md +11 -0
  80. svc_infra/docs/billing.md +190 -0
  81. svc_infra/docs/cache.md +76 -0
  82. svc_infra/docs/cli.md +74 -0
  83. svc_infra/docs/contributing.md +34 -0
  84. svc_infra/docs/data-lifecycle.md +52 -0
  85. svc_infra/docs/database.md +14 -0
  86. svc_infra/docs/docs-and-sdks.md +62 -0
  87. svc_infra/docs/environment.md +114 -0
  88. svc_infra/docs/getting-started.md +63 -0
  89. svc_infra/docs/idempotency.md +111 -0
  90. svc_infra/docs/jobs.md +67 -0
  91. svc_infra/docs/observability.md +16 -0
  92. svc_infra/docs/ops.md +37 -0
  93. svc_infra/docs/rate-limiting.md +125 -0
  94. svc_infra/docs/repo-review.md +48 -0
  95. svc_infra/docs/security.md +176 -0
  96. svc_infra/docs/tenancy.md +35 -0
  97. svc_infra/docs/timeouts-and-resource-limits.md +147 -0
  98. svc_infra/docs/webhooks.md +112 -0
  99. svc_infra/dx/add.py +63 -0
  100. svc_infra/dx/changelog.py +74 -0
  101. svc_infra/dx/checks.py +67 -0
  102. svc_infra/http/__init__.py +13 -0
  103. svc_infra/http/client.py +72 -0
  104. svc_infra/jobs/builtins/webhook_delivery.py +14 -2
  105. svc_infra/jobs/queue.py +9 -1
  106. svc_infra/jobs/runner.py +75 -0
  107. svc_infra/jobs/worker.py +17 -1
  108. svc_infra/mcp/svc_infra_mcp.py +85 -28
  109. svc_infra/obs/add.py +54 -7
  110. svc_infra/obs/grafana/dashboards/http-overview.json +45 -0
  111. svc_infra/security/headers.py +15 -2
  112. svc_infra/security/hibp.py +6 -2
  113. svc_infra/security/permissions.py +1 -0
  114. svc_infra/webhooks/service.py +10 -2
  115. {svc_infra-0.1.600.dist-info → svc_infra-0.1.640.dist-info}/METADATA +40 -14
  116. {svc_infra-0.1.600.dist-info → svc_infra-0.1.640.dist-info}/RECORD +118 -44
  117. {svc_infra-0.1.600.dist-info → svc_infra-0.1.640.dist-info}/WHEEL +0 -0
  118. {svc_infra-0.1.600.dist-info → svc_infra-0.1.640.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,425 @@
1
+ # Admin Scope & Operations
2
+
3
+ This guide covers the admin subsystem: admin-only routes, permissions, impersonation, and operational guardrails.
4
+
5
+ ## Overview
6
+
7
+ The admin module provides:
8
+ - **Admin router pattern**: Role-gated endpoints under `/admin` with fine-grained permission checks
9
+ - **Impersonation**: Controlled user impersonation for support and debugging with full audit trails
10
+ - **Permission alignment**: `admin.impersonate` permission integrated with the RBAC system
11
+ - **Easy integration**: One-line setup via `add_admin(app, ...)`
12
+
13
+ ## Quick Start
14
+
15
+ ### Basic Setup
16
+
17
+ ```python
18
+ from fastapi import FastAPI
19
+ from svc_infra.api.fastapi.admin import add_admin
20
+
21
+ app = FastAPI()
22
+
23
+ # Mount admin endpoints with defaults
24
+ add_admin(app)
25
+
26
+ # Endpoints are now available:
27
+ # POST /admin/impersonate/start
28
+ # POST /admin/impersonate/stop
29
+ ```
30
+
31
+ ### Custom User Loader
32
+
33
+ If you have a custom user model or retrieval logic:
34
+
35
+ ```python
36
+ from fastapi import Request
37
+
38
+ async def my_user_getter(request: Request, user_id: str):
39
+ # Your custom user loading logic
40
+ user = await my_user_service.get_user(user_id)
41
+ if not user:
42
+ raise HTTPException(404, "user_not_found")
43
+ return user
44
+
45
+ add_admin(app, impersonation_user_getter=my_user_getter)
46
+ ```
47
+
48
+ ### Configuration
49
+
50
+ Environment variables:
51
+
52
+ - `ADMIN_IMPERSONATION_SECRET`: Secret for signing impersonation tokens (falls back to `APP_SECRET` or `"dev-secret"`)
53
+ - `ADMIN_IMPERSONATION_TTL`: Token TTL in seconds (default: 900 = 15 minutes)
54
+ - `ADMIN_IMPERSONATION_COOKIE`: Cookie name (default: `"impersonation"`)
55
+
56
+ Function parameters:
57
+
58
+ ```python
59
+ add_admin(
60
+ app,
61
+ base_path="/admin", # Base path for admin routes
62
+ enable_impersonation=True, # Enable impersonation endpoints
63
+ secret=None, # Override token signing secret
64
+ ttl_seconds=15 * 60, # Token TTL (15 minutes)
65
+ cookie_name="impersonation", # Cookie name
66
+ impersonation_user_getter=None, # Custom user loader
67
+ )
68
+ ```
69
+
70
+ ## Permissions & RBAC
71
+
72
+ ### Admin Role
73
+
74
+ The `admin` role includes the following permissions by default:
75
+
76
+ - `user.read`, `user.write`: User management
77
+ - `billing.read`, `billing.write`: Billing operations
78
+ - `security.session.list`, `security.session.revoke`: Session management
79
+ - `admin.impersonate`: User impersonation
80
+
81
+ ### Permission Guards
82
+
83
+ Admin endpoints use layered guards:
84
+
85
+ 1. **Role gate** at router level: `RequireRoles("admin")`
86
+ 2. **Permission gate** at endpoint level: `RequirePermission("admin.impersonate")`
87
+
88
+ This ensures both coarse-grained role membership and fine-grained permission enforcement.
89
+
90
+ ### Custom Admin Routes
91
+
92
+ ```python
93
+ from svc_infra.api.fastapi.admin import admin_router
94
+ from svc_infra.security.permissions import RequirePermission
95
+
96
+ # Create an admin-only router
97
+ router = admin_router(prefix="/admin", tags=["admin"])
98
+
99
+ @router.get("/analytics", dependencies=[RequirePermission("analytics.read")])
100
+ async def admin_analytics():
101
+ return {"data": "..."}
102
+
103
+ app.include_router(router)
104
+ ```
105
+
106
+ ## Impersonation
107
+
108
+ ### Use Cases
109
+
110
+ - **Customer support**: Debug issues as the affected user
111
+ - **Testing**: Verify permission boundaries and user-specific behavior
112
+ - **Compliance**: Audit access patterns under controlled conditions
113
+
114
+ ### Workflow
115
+
116
+ #### 1. Start Impersonation
117
+
118
+ ```bash
119
+ POST /admin/impersonate/start
120
+ Content-Type: application/json
121
+
122
+ {
123
+ "user_id": "u-12345",
124
+ "reason": "Investigating billing issue #789"
125
+ }
126
+ ```
127
+
128
+ **Requirements:**
129
+ - Authenticated user must have `admin` role
130
+ - User must have `admin.impersonate` permission
131
+ - `reason` field is mandatory
132
+
133
+ **Response:** `204 No Content` with impersonation cookie set
134
+
135
+ #### 2. Make Requests as Impersonated User
136
+
137
+ All subsequent requests will be made as the target user while preserving the admin's permissions for authorization checks:
138
+
139
+ ```bash
140
+ GET /api/v1/profile
141
+ Cookie: impersonation=<token>
142
+
143
+ # Returns the impersonated user's profile
144
+ ```
145
+
146
+ **Behavior:**
147
+ - `request.user` reflects the impersonated user
148
+ - `request.user.roles` inherits the actor's roles (admin maintains permissions)
149
+ - `principal.via` is set to `"impersonated"` for tracking
150
+
151
+ #### 3. Stop Impersonation
152
+
153
+ ```bash
154
+ POST /admin/impersonate/stop
155
+
156
+ # Response: 204 No Content
157
+ # Cookie deleted, subsequent requests use original identity
158
+ ```
159
+
160
+ ### Security Guardrails
161
+
162
+ #### Short TTL
163
+ - Default: 15 minutes
164
+ - No sliding refresh: token expires after TTL regardless of activity
165
+ - Rationale: Minimize blast radius of compromised impersonation sessions
166
+
167
+ #### Explicit Reason
168
+ - Required for every impersonation start
169
+ - Logged in audit trail for compliance and forensics
170
+
171
+ #### Audit Trail
172
+ Every impersonation action is logged with:
173
+ - `admin.impersonation.started`: actor, target, reason, expiry
174
+ - `admin.impersonation.stopped`: termination reason (manual/expired)
175
+
176
+ Example log entry:
177
+ ```json
178
+ {
179
+ "message": "admin.impersonation.started",
180
+ "actor_id": "u-admin-42",
181
+ "target_id": "u-12345",
182
+ "reason": "Investigating billing issue #789",
183
+ "expires_in": 900,
184
+ "timestamp": "2025-11-01T12:00:00Z"
185
+ }
186
+ ```
187
+
188
+ #### Token Security
189
+ - HMAC-SHA256 signed tokens with nonce
190
+ - Includes: actor_id, target_id, issued_at, expires_at, nonce
191
+ - Tamper detection via signature verification
192
+ - Cookie attributes:
193
+ - `httponly=true`: No JavaScript access
194
+ - `samesite=lax`: CSRF protection
195
+ - `secure=true` in production: HTTPS only
196
+
197
+ #### Permission Preservation
198
+ - Impersonated requests maintain the actor's permissions
199
+ - Prevents privilege escalation by impersonating a higher-privileged user
200
+ - Target user context for data scoping, actor permissions for authorization
201
+
202
+ ### Operational Recommendations
203
+
204
+ #### Development
205
+ ```python
206
+ # Relaxed for local testing
207
+ add_admin(
208
+ app,
209
+ secret="dev-secret",
210
+ ttl_seconds=60 * 60, # 1 hour for convenience
211
+ )
212
+ ```
213
+
214
+ #### Production
215
+ ```python
216
+ # Strict settings
217
+ add_admin(
218
+ app,
219
+ secret=os.environ["ADMIN_IMPERSONATION_SECRET"], # Strong secret from vault
220
+ ttl_seconds=15 * 60, # 15 minutes max
221
+ )
222
+ ```
223
+
224
+ **Best practices:**
225
+ - Rotate `ADMIN_IMPERSONATION_SECRET` periodically
226
+ - Monitor impersonation logs for anomalies
227
+ - Set up alerts for frequent impersonation by the same actor
228
+ - Consider org/tenant scoping for multi-tenant systems
229
+ - Document allowed impersonation reasons in your runbook
230
+
231
+ ## Monitoring & Observability
232
+
233
+ ### Metrics
234
+
235
+ Label admin routes with `route_class=admin` for SLO tracking:
236
+
237
+ ```python
238
+ from svc_infra.obs.add import add_observability
239
+
240
+ def route_classifier(path: str) -> str:
241
+ if path.startswith("/admin"):
242
+ return "admin"
243
+ # ... other classifications
244
+ return "public"
245
+
246
+ add_observability(app, route_classifier=route_classifier)
247
+ ```
248
+
249
+ ### Audit Log Queries
250
+
251
+ Search for impersonation events:
252
+ ```python
253
+ # Example: Query structured logs
254
+ logs.filter(message="admin.impersonation.started") \
255
+ .filter(actor_id="u-admin-42") \
256
+ .order_by(timestamp.desc()) \
257
+ .limit(100)
258
+ ```
259
+
260
+ Compliance report:
261
+ ```python
262
+ # Generate monthly impersonation summary
263
+ impersonations = audit_log.filter(
264
+ event_type__in=["admin.impersonation.started", "admin.impersonation.stopped"],
265
+ timestamp__gte=start_of_month,
266
+ )
267
+ report = impersonations.group_by("actor_id").agg(count="id", targets=unique("target_id"))
268
+ ```
269
+
270
+ ## Testing
271
+
272
+ ### Unit Tests
273
+
274
+ ```python
275
+ import pytest
276
+ from svc_infra.api.fastapi.admin import add_admin
277
+
278
+ @pytest.mark.admin
279
+ def test_impersonation_requires_permission():
280
+ app = make_test_app()
281
+ add_admin(app, impersonation_user_getter=lambda req, uid: User(id=uid))
282
+
283
+ # Without admin role → 403
284
+ client = TestClient(app)
285
+ r = client.post("/admin/impersonate/start", json={"user_id": "u-2", "reason": "test"})
286
+ assert r.status_code == 403
287
+ ```
288
+
289
+ ### Acceptance Tests
290
+
291
+ ```python
292
+ @pytest.mark.acceptance
293
+ @pytest.mark.admin
294
+ def test_impersonation_lifecycle(admin_client):
295
+ # Start impersonation
296
+ r = admin_client.post(
297
+ "/admin/impersonate/start",
298
+ json={"user_id": "u-target", "reason": "acceptance test"}
299
+ )
300
+ assert r.status_code == 204
301
+
302
+ # Verify impersonated context
303
+ profile = admin_client.get("/api/v1/profile")
304
+ assert profile.json()["id"] == "u-target"
305
+
306
+ # Stop impersonation
307
+ r = admin_client.post("/admin/impersonate/stop")
308
+ assert r.status_code == 204
309
+ ```
310
+
311
+ Run admin tests:
312
+ ```bash
313
+ pytest -m admin
314
+ ```
315
+
316
+ ## Troubleshooting
317
+
318
+ ### Impersonation Not Working
319
+
320
+ **Symptom:** Impersonation cookie set but requests still use original identity
321
+
322
+ **Check:**
323
+ 1. Cookie is being sent: verify `Cookie: impersonation=<token>` in request headers
324
+ 2. Token is valid: check signature and expiry
325
+ 3. User getter succeeds: ensure `impersonation_user_getter` doesn't raise exceptions
326
+ 4. Dependency override is active: `add_admin` registers a global override on startup
327
+
328
+ **Debug:**
329
+ ```python
330
+ # Enable debug logging
331
+ import logging
332
+ logging.getLogger("svc_infra.api.fastapi.admin").setLevel(logging.DEBUG)
333
+ ```
334
+
335
+ ### Permission Denied
336
+
337
+ **Symptom:** 403 when calling `/admin/impersonate/start`
338
+
339
+ **Check:**
340
+ 1. User has `admin` role: verify `user.roles` includes `"admin"`
341
+ 2. Permission registered: ensure `admin.impersonate` is in the permission registry
342
+ 3. Permission assigned to role: check `PERMISSION_REGISTRY["admin"]` includes `"admin.impersonate"`
343
+
344
+ ### Token Expired Too Soon
345
+
346
+ **Symptom:** Impersonation session ends before expected TTL
347
+
348
+ **Possible causes:**
349
+ 1. TTL misconfigured: check `ADMIN_IMPERSONATION_TTL` environment variable
350
+ 2. Server time skew: verify system clock is synchronized (NTP)
351
+ 3. Cookie attributes: ensure `max_age` matches TTL
352
+
353
+ ## Security Considerations
354
+
355
+ ### Threat Model
356
+
357
+ | Threat | Mitigation |
358
+ |--------|-----------|
359
+ | Token theft (XSS) | `httponly=true` cookie prevents JavaScript access |
360
+ | Token theft (network) | `secure=true` requires HTTPS in production |
361
+ | CSRF attacks | `samesite=lax` prevents cross-site cookie sending |
362
+ | Privilege escalation | Actor permissions preserved during impersonation |
363
+ | Prolonged access | Short TTL (15 min default) with no refresh |
364
+ | Abuse detection | Audit logs with reason, actor, and target tracking |
365
+ | Insider threat | Required reason and comprehensive audit trail |
366
+
367
+ ### Compliance
368
+
369
+ **SOC 2 / ISO 27001:**
370
+ - Audit trail requirement: ✅ All impersonation events logged
371
+ - Access justification: ✅ Mandatory `reason` field
372
+ - Time-bound access: ✅ Short TTL with no renewal
373
+ - Least privilege: ✅ Permission-based access control
374
+
375
+ **GDPR / Data Protection:**
376
+ - Lawful basis: Support/debugging under legitimate interest or contract performance
377
+ - Data minimization: Only necessary user context loaded
378
+ - Transparency: Log access for data subject access requests (DSAR)
379
+ - Documentation: This guide serves as basis for DPA documentation
380
+
381
+ ## API Reference
382
+
383
+ ### `add_admin(app, **kwargs)`
384
+
385
+ Wire admin endpoints and impersonation to a FastAPI app.
386
+
387
+ **Parameters:**
388
+ - `app` (FastAPI): Target application
389
+ - `base_path` (str): Admin router base path (default: `"/admin"`)
390
+ - `enable_impersonation` (bool): Enable impersonation endpoints (default: `True`)
391
+ - `secret` (str | None): Token signing secret (default: env `ADMIN_IMPERSONATION_SECRET`)
392
+ - `ttl_seconds` (int): Token TTL (default: `900` = 15 minutes)
393
+ - `cookie_name` (str): Cookie name (default: `"impersonation"`)
394
+ - `impersonation_user_getter` (Callable | None): Custom user loader `(request, user_id) -> user`
395
+
396
+ **Returns:** None (modifies app in place)
397
+
398
+ **Idempotency:** Safe to call multiple times; only wires once per app instance
399
+
400
+ ### `admin_router(**kwargs)`
401
+
402
+ Create an admin-only router with role gate.
403
+
404
+ **Parameters:** Same as `APIRouter` (FastAPI)
405
+
406
+ **Returns:** APIRouter with `RequireRoles("admin")` dependency
407
+
408
+ **Example:**
409
+ ```python
410
+ from svc_infra.api.fastapi.admin import admin_router
411
+
412
+ router = admin_router(prefix="/admin/reports", tags=["admin-reports"])
413
+
414
+ @router.get("/summary")
415
+ async def admin_summary():
416
+ return {"total_users": 1234}
417
+ ```
418
+
419
+ ## Further Reading
420
+
421
+ - [ADR 0011: Admin scope and impersonation](../src/svc_infra/docs/adr/0011-admin-scope-and-impersonation.md)
422
+ - [Security & Auth Hardening](./security.md)
423
+ - [Permissions & RBAC](./security.md#permissions-and-rbac)
424
+ - [Audit Logging](./security.md#audit-logging)
425
+ - [Observability](./observability.md)
@@ -0,0 +1,40 @@
1
+ # ADR 0002: Background Jobs & Scheduling
2
+
3
+ Date: 2025-10-15
4
+
5
+ Status: Accepted
6
+
7
+ ## Context
8
+ We need production-grade background job processing and simple scheduling with a one-call setup. The library already includes in-memory queue/scheduler for tests/local. We need a production backend and a minimal runner.
9
+
10
+ ## Decision
11
+ - JobQueue protocol defines enqueue/reserve/ack/fail with retry and exponential backoff (base seconds * attempts). Jobs have: id, name, payload, available_at, attempts, max_attempts, backoff_seconds, last_error.
12
+ - Backends:
13
+ - InMemoryJobQueue for tests/local.
14
+ - RedisJobQueue for production using Redis primitives with visibility timeout and atomic operations.
15
+ - Scheduler:
16
+ - InMemoryScheduler providing interval-based scheduling via next_run_at. Cron parsing is out of scope initially; a simple YAML loader can be added later.
17
+ - Runner:
18
+ - A CLI loop `svc-infra jobs run` will tick the scheduler and process jobs in a loop with small sleep/backoff.
19
+ - Configuration:
20
+ - One-call `easy_jobs()` returns (queue, scheduler). Picks backend via `JOBS_DRIVER` env (memory|redis). Redis URL via `REDIS_URL`.
21
+
22
+ ## Alternatives Considered
23
+ - Using RQ/Huey/Celery: heavier dependency and less control over API ergonomic goals; we prefer thin primitives aligned with svc-infra patterns.
24
+ - SQL-backed queue first: we will consider later; Redis is sufficient for v1.
25
+
26
+ ## Consequences
27
+ - Enables outbox/webhook processors on a reliable queue.
28
+ - Minimal cognitive load: consistent APIs, ENV-driven.
29
+ - Future work: SQL queue, cron YAML loader, metrics, concurrency controls.
30
+
31
+ ## Redis Data Model (initial)
32
+ - List `jobs:ready` holds ready job IDs; a ZSET `jobs:delayed` with score=available_at keeps delayed jobs; a HASH per job `job:{id}` stores fields.
33
+ - Reserve uses RPOPLPUSH from `jobs:ready` to `jobs:processing` or BRPOPLPUSH with timeout; sets `visible_at` on job as now+vt and increments `attempts`.
34
+ - Ack removes job from `jobs:processing` and deletes `job:{id}`.
35
+ - Fail increments attempts and computes next available_at = now + backoff_seconds * attempts; moves job to delayed ZSET.
36
+ - A housekeeping step periodically moves due jobs from delayed ZSET to ready list. Reserve also checks ZSET for due jobs opportunistically.
37
+
38
+ ## Testing Strategy
39
+ - Unit tests cover enqueue/reserve/ack/fail, visibility timeout behavior, and DLQ after max_attempts.
40
+ - Runner tests cover one iteration loop processing.
@@ -0,0 +1,24 @@
1
+ # ADR 0003: Webhooks Framework
2
+
3
+ Date: 2025-10-15
4
+
5
+ Status: Accepted
6
+
7
+ ## Context
8
+ Services need a consistent way to publish domain events to external consumers via webhooks, verify inbound signatures, and handle retries with backoff. We already have an outbox pattern, a job queue, and a webhook delivery worker.
9
+
10
+ ## Decision
11
+ - Event Schema: minimal fields {topic, payload, version, created_at}. Versioning included to evolve payloads.
12
+ - Signing: HMAC-SHA256 over canonical JSON payload; header `X-Signature` carries hex digest. Future: include timestamp and v1 signature header variant.
13
+ - Outbox → Job Queue: Producer writes events to Outbox; outbox tick enqueues delivery jobs; worker performs HTTP POST with signature.
14
+ - Subscriptions: In-memory subscription store maps topic → {url, secret}. Persistence deferred.
15
+ - Verification: Provide helper for verifying incoming webhook requests by recomputing the HMAC.
16
+ - Retry: Already handled by JobQueue backoff; DLQ after max attempts.
17
+
18
+ ## Consequences
19
+ - Clear boundary: producers don't call HTTP directly; they publish to Outbox.
20
+ - Deterministic signing & verification across producer/consumer.
21
+ - Extensibility: timestamped signed headers, secret rotation, persisted subscriptions are future extensions.
22
+
23
+ ## Testing
24
+ - Unit tests for verification helper and end-to-end publish→outbox→queue→delivery using in-memory components and a fake HTTP client.
@@ -0,0 +1,42 @@
1
+ # ADR-0004: Tenancy Model and Enforcement
2
+
3
+ Date: 2025-10-15
4
+
5
+ Status: Proposed
6
+
7
+ ## Context
8
+
9
+ The framework needs a consistent, ergonomic multi-tenant story across modules (API scaffolding, SQL/Mongo persistence, auth/security, payments, jobs, webhooks). Existing patterns already reference `tenant_id` in many places (payments models and service, audit/session models, SQL/Mongo scaffolds). However, enforcement and app ergonomics were not unified.
10
+
11
+ ## Decision
12
+
13
+ Adopt a default "soft-tenant" isolation model via a `tenant_id` column and centralized enforcement primitives:
14
+
15
+ - Resolution: `resolve_tenant_id` and `require_tenant_id` FastAPI dependencies in `api.fastapi.tenancy.context`. Resolution order: override hook → identity (user/api_key) → `X-Tenant-Id` header → `request.state.tenant_id`.
16
+ - Enforcement in SQL: `TenantSqlService(SqlService)` that scopes list/get/update/delete/search/count with a `where` clause and injects `tenant_id` on create when the model supports it. Repository methods accept optional `where` filters.
17
+ - Router ergonomics: `make_tenant_crud_router_plus_sql` which requires `TenantId` and uses `TenantSqlService` under the hood. This keeps route code simple while enforcing scoping.
18
+ - Extensibility: `set_tenant_resolver` hook to override resolution logic per app; `tenant_field` parameter to support custom column names. Future: schema-per-tenant or db-per-tenant via alternate repository/service implementations.
19
+
20
+ ## Alternatives considered
21
+
22
+ 1) Enforce tenancy at the ORM layer (SQLAlchemy events/session) – rejected for clarity and testability; we prefer explicit service/dep composition.
23
+ 2) Global middleware that rewrites queries – rejected due to SQLAlchemy complexity and opacity.
24
+ 3) Only rely on developers to remember filters – rejected due to footguns.
25
+
26
+ ## Consequences
27
+
28
+ - Clear default behavior with escape hatches. Minimal changes for consumers using CRUD builders and SqlService.
29
+ - Requires models to include an optional or required `tenant_id` column for scoping.
30
+ - Non-SQL stores should add equivalent wrappers; Mongo scaffolds already include `tenant_id` fields and can mirror these patterns later.
31
+
32
+ ## Implementation Notes
33
+
34
+ - New modules: `api.fastapi.tenancy.context`, `db.sql.tenant`. Repository updated to accept `where` filters.
35
+ - CRUD router extended with `make_tenant_crud_router_plus_sql` to require `TenantId`.
36
+ - Tests added: `tests/tenancy/*` for resolution and service scoping.
37
+
38
+ ## Open Items
39
+
40
+ - Per-tenant quotas & rate limit overrides (tie into rate limit dependency/middleware via a resolver that returns per-tenant config).
41
+ - Export tenant CLI (dump/import data for a specific tenant).
42
+ - Docs: isolation guidance (column vs schema vs db), migration guidance.
@@ -0,0 +1,86 @@
1
+ # ADR 0005: Data Lifecycle — Soft Delete, Retention, Erasure, Backups
2
+
3
+ Date: 2025-10-16
4
+ Status: Accepted
5
+
6
+ ## Context
7
+ We need a coherent Data Lifecycle story in svc-infra that covers:
8
+ - Migrations & fixtures: simple way to run DB setup/migrations and load reference data.
9
+ - Soft delete conventions: consistent filtering and model scaffolding support.
10
+ - Retention policies: periodic purging of expired records per model/table.
11
+ - GDPR/PII erasure: queued workflow to scrub user-related data while preserving legal audit.
12
+ - Backups/PITR verification: a job that exercises restore checks or at least validates backup health signals.
13
+
14
+ Existing building blocks:
15
+ - Migrations CLI with end-to-end "setup-and-migrate" and new `sql seed` command for executing a user-specified seed callable.
16
+ - Code: `src/svc_infra/cli/cmds/db/sql/alembic_cmds.py` (cmd_setup_and_migrate, cmd_seed)
17
+ - Soft delete support in repository and scaffold:
18
+ - Repo filtering: `src/svc_infra/db/sql/repository.py` (soft_delete flags, `deleted_at` timestamp, optional active flag)
19
+ - Model scaffolding: `src/svc_infra/db/sql/scaffold.py` (optional `deleted_at` field)
20
+ - Easy-setup helper to coordinate lifecycle bits:
21
+ - `src/svc_infra/data/add.py` provides a startup hook to auto-migrate and optional callbacks for fixtures, retention jobs, and an erasure job.
22
+
23
+ Gaps:
24
+ - No standardized fixture loader contract beyond the callback surface.
25
+ - No built-in retention policy registry or purge execution job.
26
+ - No opinionated GDPR erasure workflow and audit trail.
27
+ - No backup/PITR verification job implementation.
28
+
29
+ ## Decision
30
+ Introduce minimal, composable primitives that keep svc-infra flexible while providing a clear path to production-grade lifecycle.
31
+
32
+ 1) Fixture Loader Contract
33
+ - Provide a simple callable signature for deterministic, idempotent fixture loading: `Callable[[], None | Awaitable[None]]`.
34
+ - Document best practices: UPSERT by natural keys, avoid random IDs, guard on existing rows.
35
+ - Expose via `add_data_lifecycle(on_load_fixtures=...)` (already available); add docs and tests.
36
+
37
+ 2) Retention Policy Registry
38
+ - Define a registry API that allows services to register per-resource retention rules.
39
+ - Basic shape:
40
+ - `RetentionPolicy(name: str, model: type, where: list[Any] | None, older_than_days: int, soft_delete_field: str = "deleted_at")`
41
+ - A purge function computes a cutoff timestamp and issues DELETE or marks soft-delete fields.
42
+ - Execution model: a periodic job (via jobs scheduler) calls `run_retention_purge(registry)`.
43
+ - Keep SQL-only first; room for NoSQL extensions later.
44
+
45
+ 3) GDPR Erasure Workflow
46
+ - Provide a single callable entrypoint `erase_principal(principal_id: str) -> None | Awaitable[None]`.
47
+ - Default strategy: enqueue a job that runs a configurable erasure plan composed of steps (delete/soft-delete/overwrite) across tables.
48
+ - Add an audit log entry per erasure request with outcome and timestamp (reuse `security.audit` helpers if feasible).
49
+ - Keep the plan provider pluggable so apps specify which tables/columns participate.
50
+
51
+ 4) Backup/PITR Verification Job
52
+ - Define an interface `verify_backups() -> HealthReport` with a minimal default implementation that:
53
+ - Queries the backup system or driver for last successful backup timestamp and retention window.
54
+ - Emits metrics/logs and returns a structured status.
55
+ - Defer full "restore drill" capability; provide extension hook only.
56
+
57
+ ## Interfaces
58
+ - Registry
59
+ - `register_retention(policy: RetentionPolicy) -> None`
60
+ - `run_retention_purge(session_factory, policies: list[RetentionPolicy]) -> PurgeReport`
61
+ - Erasure
62
+ - `erase_principal(principal_id: str, plan: ErasurePlan, session_factory) -> ErasureReport`
63
+ - Fixtures
64
+ - `load_fixtures()` as provided by caller via `add_data_lifecycle`.
65
+ - Backup
66
+ - `verify_backups() -> BackupHealthReport`
67
+
68
+ ## Alternatives Considered
69
+ - Heavy-weight DSL for retention and erasure: rejected for now; keep APIs Pythonic and pluggable.
70
+ - Trigger-level soft delete enforcement: skipped to avoid provider lock-in; enforced at repository and query layer.
71
+ - Full restore drill automation: out of scope for v1; introduce later behind provider integrations.
72
+
73
+ ## Consequences
74
+ - Minimal surface that doesn't over-constrain adopters; provides default patterns and contracts.
75
+ - Requires additional test scaffolds and example docs to demonstrate usage.
76
+ - SQL-focused initial implementation; other backends can plug via similar interfaces.
77
+
78
+ ## Rollout & Testing
79
+ - Add unit/integration tests for fixture loader, retention purge logic, and erasure workflow skeleton.
80
+ - Provide docs in `docs/database.md` with examples and operational guidance.
81
+
82
+ ## References
83
+ - `src/svc_infra/db/sql/repository.py` soft-delete handling
84
+ - `src/svc_infra/db/sql/scaffold.py` deleted_at field scaffolding
85
+ - `src/svc_infra/data/add.py` data lifecycle helper
86
+ - `src/svc_infra/cli/cmds/db/sql/alembic_cmds.py` migrations & seed
@@ -0,0 +1,47 @@
1
+ # ADR-0006: Ops SLOs, SLIs, and Metrics Naming
2
+
3
+ Date: 2025-10-16
4
+
5
+ ## Status
6
+ Accepted
7
+
8
+ ## Context
9
+ We already expose Prometheus metrics via `svc_infra.obs.add.add_observability`, which mounts the `PrometheusMiddleware` and exports:
10
+ - `http_server_requests_total{method,route,code}`
11
+ - `http_server_request_duration_seconds_bucket{route,method}` + _sum/_count
12
+ - `http_server_inflight_requests{route}`
13
+ - `http_server_response_size_bytes_bucket` + _sum/_count (where available)
14
+ - `http_server_exceptions_total{route,exception}` (where available)
15
+
16
+ We also optionally expose SQLAlchemy pool metrics and instrument `requests`/`httpx`. Logging is configured via `svc_infra.app.logging.setup_logging`.
17
+
18
+ ## Decision
19
+ 1. Metric naming and labels
20
+ - Keep `http_server_*` naming aligned with Prometheus and OpenTelemetry conventions.
21
+ - Labels: `route` uses normalized FastAPI route pattern (e.g., `/users/{id}`); `method` is uppercase HTTP verb; `code` is the 3-digit status.
22
+ - Add DB pool metrics with `db_pool_*` prefix when bound (labels: `engine`/`pool_name`).
23
+ 2. SLIs
24
+ - Request Success Rate: 1 - error_ratio, where errors are 5xx by default; optionally include 429/499 as errors per service config.
25
+ - Request Latency: p50/p90/p99 on `http_server_request_duration_seconds` by `route` and overall.
26
+ - Availability (Probes): uptime of `/_ops/live` and `/_ops/ready` endpoints.
27
+ 3. SLOs
28
+ - Default SLOs per service class:
29
+ - Public API: 99.9% success, p99 < 500ms.
30
+ - Internal API/Jobs control plane: 99.5% success, p99 < 1000ms.
31
+ - Error Budget: monthly window; alert on burn rates of 2h (fast) and 24h (slow). Budgets computed from success SLI.
32
+ 4. Dashboards & Alerts
33
+ - Provide Grafana JSON dashboard templates referencing the above metrics and labels.
34
+ - Include alert rules for budget burn (fast/slow).
35
+
36
+ ## Consequences
37
+ - Developers can rely on consistent metrics and labels for dashboards.
38
+ - SLO targets are explicit and can be overridden per service.
39
+ - Future work: Emit `http_server_exceptions_total` where missing; provide helper to register per-route classes (public/internal/admin) to pick default SLOs.
40
+
41
+ ## Alternatives Considered
42
+ - OpenTelemetry SDK direct instrumentation was considered but deferred to keep dependency surface minimal; we keep the naming aligned for easy migration.
43
+
44
+ ## References
45
+ - `src/svc_infra/obs/metrics/asgi.py`
46
+ - `src/svc_infra/api/fastapi/ops/add.py`
47
+ - Google SRE Workbook: SLOs and Error Budgets