svc-infra 0.1.600__py3-none-any.whl → 0.1.664__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of svc-infra might be problematic. Click here for more details.

Files changed (140) hide show
  1. svc_infra/api/fastapi/admin/__init__.py +3 -0
  2. svc_infra/api/fastapi/admin/add.py +231 -0
  3. svc_infra/api/fastapi/apf_payments/setup.py +0 -2
  4. svc_infra/api/fastapi/auth/add.py +0 -4
  5. svc_infra/api/fastapi/auth/routers/oauth_router.py +19 -4
  6. svc_infra/api/fastapi/billing/router.py +64 -0
  7. svc_infra/api/fastapi/billing/setup.py +19 -0
  8. svc_infra/api/fastapi/cache/add.py +9 -5
  9. svc_infra/api/fastapi/db/nosql/mongo/add.py +33 -27
  10. svc_infra/api/fastapi/db/sql/add.py +40 -18
  11. svc_infra/api/fastapi/db/sql/crud_router.py +176 -14
  12. svc_infra/api/fastapi/db/sql/session.py +16 -0
  13. svc_infra/api/fastapi/dependencies/ratelimit.py +57 -7
  14. svc_infra/api/fastapi/docs/add.py +160 -0
  15. svc_infra/api/fastapi/docs/landing.py +1 -1
  16. svc_infra/api/fastapi/docs/scoped.py +41 -6
  17. svc_infra/api/fastapi/middleware/errors/handlers.py +45 -7
  18. svc_infra/api/fastapi/middleware/graceful_shutdown.py +87 -0
  19. svc_infra/api/fastapi/middleware/ratelimit.py +59 -1
  20. svc_infra/api/fastapi/middleware/ratelimit_store.py +12 -6
  21. svc_infra/api/fastapi/middleware/timeout.py +148 -0
  22. svc_infra/api/fastapi/openapi/mutators.py +114 -0
  23. svc_infra/api/fastapi/ops/add.py +73 -0
  24. svc_infra/api/fastapi/pagination.py +3 -1
  25. svc_infra/api/fastapi/routers/ping.py +1 -0
  26. svc_infra/api/fastapi/setup.py +21 -13
  27. svc_infra/api/fastapi/tenancy/add.py +19 -0
  28. svc_infra/api/fastapi/tenancy/context.py +112 -0
  29. svc_infra/api/fastapi/versioned.py +101 -0
  30. svc_infra/app/README.md +5 -5
  31. svc_infra/billing/__init__.py +23 -0
  32. svc_infra/billing/async_service.py +147 -0
  33. svc_infra/billing/jobs.py +230 -0
  34. svc_infra/billing/models.py +131 -0
  35. svc_infra/billing/quotas.py +101 -0
  36. svc_infra/billing/schemas.py +33 -0
  37. svc_infra/billing/service.py +115 -0
  38. svc_infra/bundled_docs/README.md +5 -0
  39. svc_infra/bundled_docs/__init__.py +1 -0
  40. svc_infra/bundled_docs/getting-started.md +6 -0
  41. svc_infra/cache/__init__.py +4 -0
  42. svc_infra/cache/add.py +158 -0
  43. svc_infra/cache/backend.py +5 -2
  44. svc_infra/cache/decorators.py +19 -1
  45. svc_infra/cache/keys.py +24 -4
  46. svc_infra/cli/__init__.py +28 -8
  47. svc_infra/cli/cmds/__init__.py +8 -0
  48. svc_infra/cli/cmds/db/nosql/mongo/mongo_cmds.py +4 -3
  49. svc_infra/cli/cmds/db/nosql/mongo/mongo_scaffold_cmds.py +4 -4
  50. svc_infra/cli/cmds/db/sql/alembic_cmds.py +80 -11
  51. svc_infra/cli/cmds/db/sql/sql_export_cmds.py +80 -0
  52. svc_infra/cli/cmds/db/sql/sql_scaffold_cmds.py +3 -3
  53. svc_infra/cli/cmds/docs/docs_cmds.py +140 -0
  54. svc_infra/cli/cmds/dx/__init__.py +12 -0
  55. svc_infra/cli/cmds/dx/dx_cmds.py +99 -0
  56. svc_infra/cli/cmds/help.py +4 -0
  57. svc_infra/cli/cmds/obs/obs_cmds.py +4 -3
  58. svc_infra/cli/cmds/sdk/__init__.py +0 -0
  59. svc_infra/cli/cmds/sdk/sdk_cmds.py +102 -0
  60. svc_infra/data/add.py +61 -0
  61. svc_infra/data/backup.py +53 -0
  62. svc_infra/data/erasure.py +45 -0
  63. svc_infra/data/fixtures.py +40 -0
  64. svc_infra/data/retention.py +55 -0
  65. svc_infra/db/nosql/mongo/README.md +13 -13
  66. svc_infra/db/sql/repository.py +51 -11
  67. svc_infra/db/sql/resource.py +5 -0
  68. svc_infra/db/sql/templates/models_schemas/auth/models.py.tmpl +7 -56
  69. svc_infra/db/sql/templates/setup/env_async.py.tmpl +34 -12
  70. svc_infra/db/sql/templates/setup/env_sync.py.tmpl +29 -7
  71. svc_infra/db/sql/tenant.py +79 -0
  72. svc_infra/db/sql/utils.py +18 -4
  73. svc_infra/docs/acceptance-matrix.md +88 -0
  74. svc_infra/docs/acceptance.md +44 -0
  75. svc_infra/docs/admin.md +425 -0
  76. svc_infra/docs/adr/0002-background-jobs-and-scheduling.md +40 -0
  77. svc_infra/docs/adr/0003-webhooks-framework.md +24 -0
  78. svc_infra/docs/adr/0004-tenancy-model.md +42 -0
  79. svc_infra/docs/adr/0005-data-lifecycle.md +86 -0
  80. svc_infra/docs/adr/0006-ops-slos-and-metrics.md +47 -0
  81. svc_infra/docs/adr/0007-docs-and-sdks.md +83 -0
  82. svc_infra/docs/adr/0008-billing-primitives.md +143 -0
  83. svc_infra/docs/adr/0009-acceptance-harness.md +40 -0
  84. svc_infra/docs/adr/0010-timeouts-and-resource-limits.md +54 -0
  85. svc_infra/docs/adr/0011-admin-scope-and-impersonation.md +73 -0
  86. svc_infra/docs/adr/0012-generic-file-storage.md +498 -0
  87. svc_infra/docs/api.md +186 -0
  88. svc_infra/docs/auth.md +11 -0
  89. svc_infra/docs/billing.md +190 -0
  90. svc_infra/docs/cache.md +76 -0
  91. svc_infra/docs/cli.md +74 -0
  92. svc_infra/docs/contributing.md +34 -0
  93. svc_infra/docs/data-lifecycle.md +52 -0
  94. svc_infra/docs/database.md +14 -0
  95. svc_infra/docs/docs-and-sdks.md +62 -0
  96. svc_infra/docs/environment.md +114 -0
  97. svc_infra/docs/getting-started.md +63 -0
  98. svc_infra/docs/idempotency.md +111 -0
  99. svc_infra/docs/jobs.md +67 -0
  100. svc_infra/docs/observability.md +16 -0
  101. svc_infra/docs/ops.md +37 -0
  102. svc_infra/docs/rate-limiting.md +125 -0
  103. svc_infra/docs/repo-review.md +48 -0
  104. svc_infra/docs/security.md +176 -0
  105. svc_infra/docs/storage.md +982 -0
  106. svc_infra/docs/tenancy.md +35 -0
  107. svc_infra/docs/timeouts-and-resource-limits.md +147 -0
  108. svc_infra/docs/versioned-integrations.md +146 -0
  109. svc_infra/docs/webhooks.md +112 -0
  110. svc_infra/dx/add.py +63 -0
  111. svc_infra/dx/changelog.py +74 -0
  112. svc_infra/dx/checks.py +67 -0
  113. svc_infra/http/__init__.py +13 -0
  114. svc_infra/http/client.py +72 -0
  115. svc_infra/jobs/builtins/webhook_delivery.py +14 -2
  116. svc_infra/jobs/queue.py +9 -1
  117. svc_infra/jobs/runner.py +75 -0
  118. svc_infra/jobs/worker.py +17 -1
  119. svc_infra/mcp/svc_infra_mcp.py +85 -28
  120. svc_infra/obs/add.py +54 -7
  121. svc_infra/obs/grafana/dashboards/http-overview.json +45 -0
  122. svc_infra/security/headers.py +15 -2
  123. svc_infra/security/hibp.py +6 -2
  124. svc_infra/security/models.py +27 -7
  125. svc_infra/security/oauth_models.py +59 -0
  126. svc_infra/security/permissions.py +1 -0
  127. svc_infra/storage/__init__.py +93 -0
  128. svc_infra/storage/add.py +250 -0
  129. svc_infra/storage/backends/__init__.py +11 -0
  130. svc_infra/storage/backends/local.py +331 -0
  131. svc_infra/storage/backends/memory.py +214 -0
  132. svc_infra/storage/backends/s3.py +329 -0
  133. svc_infra/storage/base.py +239 -0
  134. svc_infra/storage/easy.py +182 -0
  135. svc_infra/storage/settings.py +192 -0
  136. svc_infra/webhooks/service.py +10 -2
  137. {svc_infra-0.1.600.dist-info → svc_infra-0.1.664.dist-info}/METADATA +45 -14
  138. {svc_infra-0.1.600.dist-info → svc_infra-0.1.664.dist-info}/RECORD +140 -52
  139. {svc_infra-0.1.600.dist-info → svc_infra-0.1.664.dist-info}/WHEEL +0 -0
  140. {svc_infra-0.1.600.dist-info → svc_infra-0.1.664.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,86 @@
1
+ # ADR 0005: Data Lifecycle — Soft Delete, Retention, Erasure, Backups
2
+
3
+ Date: 2025-10-16
4
+ Status: Accepted
5
+
6
+ ## Context
7
+ We need a coherent Data Lifecycle story in svc-infra that covers:
8
+ - Migrations & fixtures: simple way to run DB setup/migrations and load reference data.
9
+ - Soft delete conventions: consistent filtering and model scaffolding support.
10
+ - Retention policies: periodic purging of expired records per model/table.
11
+ - GDPR/PII erasure: queued workflow to scrub user-related data while preserving legal audit.
12
+ - Backups/PITR verification: a job that exercises restore checks or at least validates backup health signals.
13
+
14
+ Existing building blocks:
15
+ - Migrations CLI with end-to-end "setup-and-migrate" and new `sql seed` command for executing a user-specified seed callable.
16
+ - Code: `src/svc_infra/cli/cmds/db/sql/alembic_cmds.py` (cmd_setup_and_migrate, cmd_seed)
17
+ - Soft delete support in repository and scaffold:
18
+ - Repo filtering: `src/svc_infra/db/sql/repository.py` (soft_delete flags, `deleted_at` timestamp, optional active flag)
19
+ - Model scaffolding: `src/svc_infra/db/sql/scaffold.py` (optional `deleted_at` field)
20
+ - Easy-setup helper to coordinate lifecycle bits:
21
+ - `src/svc_infra/data/add.py` provides a startup hook to auto-migrate and optional callbacks for fixtures, retention jobs, and an erasure job.
22
+
23
+ Gaps:
24
+ - No standardized fixture loader contract beyond the callback surface.
25
+ - No built-in retention policy registry or purge execution job.
26
+ - No opinionated GDPR erasure workflow and audit trail.
27
+ - No backup/PITR verification job implementation.
28
+
29
+ ## Decision
30
+ Introduce minimal, composable primitives that keep svc-infra flexible while providing a clear path to production-grade lifecycle.
31
+
32
+ 1) Fixture Loader Contract
33
+ - Provide a simple callable signature for deterministic, idempotent fixture loading: `Callable[[], None | Awaitable[None]]`.
34
+ - Document best practices: UPSERT by natural keys, avoid random IDs, guard on existing rows.
35
+ - Expose via `add_data_lifecycle(on_load_fixtures=...)` (already available); add docs and tests.
36
+
37
+ 2) Retention Policy Registry
38
+ - Define a registry API that allows services to register per-resource retention rules.
39
+ - Basic shape:
40
+ - `RetentionPolicy(name: str, model: type, where: list[Any] | None, older_than_days: int, soft_delete_field: str = "deleted_at")`
41
+ - A purge function computes a cutoff timestamp and issues DELETE or marks soft-delete fields.
42
+ - Execution model: a periodic job (via jobs scheduler) calls `run_retention_purge(registry)`.
43
+ - Keep SQL-only first; room for NoSQL extensions later.
44
+
45
+ 3) GDPR Erasure Workflow
46
+ - Provide a single callable entrypoint `erase_principal(principal_id: str) -> None | Awaitable[None]`.
47
+ - Default strategy: enqueue a job that runs a configurable erasure plan composed of steps (delete/soft-delete/overwrite) across tables.
48
+ - Add an audit log entry per erasure request with outcome and timestamp (reuse `security.audit` helpers if feasible).
49
+ - Keep the plan provider pluggable so apps specify which tables/columns participate.
50
+
51
+ 4) Backup/PITR Verification Job
52
+ - Define an interface `verify_backups() -> HealthReport` with a minimal default implementation that:
53
+ - Queries the backup system or driver for last successful backup timestamp and retention window.
54
+ - Emits metrics/logs and returns a structured status.
55
+ - Defer full "restore drill" capability; provide extension hook only.
56
+
57
+ ## Interfaces
58
+ - Registry
59
+ - `register_retention(policy: RetentionPolicy) -> None`
60
+ - `run_retention_purge(session_factory, policies: list[RetentionPolicy]) -> PurgeReport`
61
+ - Erasure
62
+ - `erase_principal(principal_id: str, plan: ErasurePlan, session_factory) -> ErasureReport`
63
+ - Fixtures
64
+ - `load_fixtures()` as provided by caller via `add_data_lifecycle`.
65
+ - Backup
66
+ - `verify_backups() -> BackupHealthReport`
67
+
68
+ ## Alternatives Considered
69
+ - Heavy-weight DSL for retention and erasure: rejected for now; keep APIs Pythonic and pluggable.
70
+ - Trigger-level soft delete enforcement: skipped to avoid provider lock-in; enforced at repository and query layer.
71
+ - Full restore drill automation: out of scope for v1; introduce later behind provider integrations.
72
+
73
+ ## Consequences
74
+ - Minimal surface that doesn't over-constrain adopters; provides default patterns and contracts.
75
+ - Requires additional test scaffolds and example docs to demonstrate usage.
76
+ - SQL-focused initial implementation; other backends can plug via similar interfaces.
77
+
78
+ ## Rollout & Testing
79
+ - Add unit/integration tests for fixture loader, retention purge logic, and erasure workflow skeleton.
80
+ - Provide docs in `docs/database.md` with examples and operational guidance.
81
+
82
+ ## References
83
+ - `src/svc_infra/db/sql/repository.py` soft-delete handling
84
+ - `src/svc_infra/db/sql/scaffold.py` deleted_at field scaffolding
85
+ - `src/svc_infra/data/add.py` data lifecycle helper
86
+ - `src/svc_infra/cli/cmds/db/sql/alembic_cmds.py` migrations & seed
@@ -0,0 +1,47 @@
1
+ # ADR-0006: Ops SLOs, SLIs, and Metrics Naming
2
+
3
+ Date: 2025-10-16
4
+
5
+ ## Status
6
+ Accepted
7
+
8
+ ## Context
9
+ We already expose Prometheus metrics via `svc_infra.obs.add.add_observability`, which mounts the `PrometheusMiddleware` and exports:
10
+ - `http_server_requests_total{method,route,code}`
11
+ - `http_server_request_duration_seconds_bucket{route,method}` + _sum/_count
12
+ - `http_server_inflight_requests{route}`
13
+ - `http_server_response_size_bytes_bucket` + _sum/_count (where available)
14
+ - `http_server_exceptions_total{route,exception}` (where available)
15
+
16
+ We also optionally expose SQLAlchemy pool metrics and instrument `requests`/`httpx`. Logging is configured via `svc_infra.app.logging.setup_logging`.
17
+
18
+ ## Decision
19
+ 1. Metric naming and labels
20
+ - Keep `http_server_*` naming aligned with Prometheus and OpenTelemetry conventions.
21
+ - Labels: `route` uses normalized FastAPI route pattern (e.g., `/users/{id}`); `method` is uppercase HTTP verb; `code` is the 3-digit status.
22
+ - Add DB pool metrics with `db_pool_*` prefix when bound (labels: `engine`/`pool_name`).
23
+ 2. SLIs
24
+ - Request Success Rate: 1 - error_ratio, where errors are 5xx by default; optionally include 429/499 as errors per service config.
25
+ - Request Latency: p50/p90/p99 on `http_server_request_duration_seconds` by `route` and overall.
26
+ - Availability (Probes): uptime of `/_ops/live` and `/_ops/ready` endpoints.
27
+ 3. SLOs
28
+ - Default SLOs per service class:
29
+ - Public API: 99.9% success, p99 < 500ms.
30
+ - Internal API/Jobs control plane: 99.5% success, p99 < 1000ms.
31
+ - Error Budget: monthly window; alert on burn rates of 2h (fast) and 24h (slow). Budgets computed from success SLI.
32
+ 4. Dashboards & Alerts
33
+ - Provide Grafana JSON dashboard templates referencing the above metrics and labels.
34
+ - Include alert rules for budget burn (fast/slow).
35
+
36
+ ## Consequences
37
+ - Developers can rely on consistent metrics and labels for dashboards.
38
+ - SLO targets are explicit and can be overridden per service.
39
+ - Future work: Emit `http_server_exceptions_total` where missing; provide helper to register per-route classes (public/internal/admin) to pick default SLOs.
40
+
41
+ ## Alternatives Considered
42
+ - OpenTelemetry SDK direct instrumentation was considered but deferred to keep dependency surface minimal; we keep the naming aligned for easy migration.
43
+
44
+ ## References
45
+ - `src/svc_infra/obs/metrics/asgi.py`
46
+ - `src/svc_infra/api/fastapi/ops/add.py`
47
+ - Google SRE Workbook: SLOs and Error Budgets
@@ -0,0 +1,83 @@
1
+ # ADR 0007: Docs & SDKs — Research and Design
2
+
3
+ Status: Proposed
4
+
5
+ Date: 2025-10-16
6
+
7
+ ## Context
8
+
9
+ We want a production-ready documentation and SDK experience built on our existing FastAPI scaffolding.
10
+ Current capabilities in the codebase:
11
+
12
+ - Docs endpoints and export
13
+ - `add_docs(app, redoc_url, swagger_url, openapi_url, export_openapi_to)` mounts Swagger, ReDoc, and OpenAPI JSON; optional export on startup.
14
+ - `setup_service_api(...)` renders a landing page with per-version doc cards and local-only root docs.
15
+ - `add_prefixed_docs(...)` exposes scoped docs (e.g., for auth/payments) with per-scope OpenAPI, Swagger, ReDoc.
16
+ - OpenAPI conventions and enrichment pipeline
17
+ - Mutators pipeline (`openapi/mutators.py`) with: conventions, normalized Problem schema, pagination params/components, header params, info mutator, and auth scheme installers.
18
+ - Conventions define `Problem` schema and normalize examples.
19
+ - DX checks
20
+ - OpenAPI Problem+JSON lint in `dx/checks.py` and CLI to validate.
21
+ - SDK stub
22
+ - `add_sdk_generation_stub(app, on_generate=...)` exposes a hook endpoint to trigger SDK generation (no hard deps).
23
+
24
+ Gaps for a complete v1 experience:
25
+
26
+ - Enriched OpenAPI with examples and tags is not yet standardized across routers.
27
+ - No built-in SDK generator CLI; only a stub exists. No pinned toolchain or CI integration.
28
+ - No Postman collection generator.
29
+ - No dark-mode toggle/themes for Swagger/ReDoc (landing page supports light/dark).
30
+ - No smoke tests for generated SDKs.
31
+
32
+ ## Decision
33
+
34
+ We will standardize the Docs & SDKs approach around the following:
35
+
36
+ 1) OpenAPI enrichment
37
+ - Use existing mutators pipeline and add small mutators to:
38
+ - Inject global tags and tag descriptions for major areas (auth, payments, webhooks, ops).
39
+ - Attach minimal `x-codeSamples` for common operations (curl/httpie).
40
+ - Ensure `Problem` schema and example responses are present across 4xx/5xx.
41
+ - Keep pagination and header parameter mutators enabled by default.
42
+
43
+ 2) Docs UI
44
+ - Continue with Swagger UI and ReDoc via `add_docs` and `setup_service_api`.
45
+ - Add an optional dark mode toggle for Swagger UI via custom CSS and a query param (design-only; implement later).
46
+ - Keep local-only exposure of root docs; version-specific docs always exposed under their mount path.
47
+
48
+ 3) SDK generation pipeline (tools and layout)
49
+ - TypeScript: `openapi-typescript` to generate types (no runtime client) to `clients/typescript/`.
50
+ - Python: `openapi-python-client` to generate a client package to `clients/python/`.
51
+ - Provide a new CLI group `svc-infra sdk` with subcommands:
52
+ - `svc-infra sdk ts --schema openapi.json --out clients/typescript --package @org/service`
53
+ - `svc-infra sdk py --schema openapi.json --out clients/python --package service_sdk`
54
+ - `svc-infra sdk postman --schema openapi.json --out clients/postman_collection.json` (via converter)
55
+ - Pin generator versions in a minimal tool manifest (poetry extras and npm devDeps suggestions in docs) rather than hard deps in core library.
56
+ - Add optional CI steps to generate SDKs on release tags; artifacts uploaded; publishing pipelines documented.
57
+
58
+ 4) Postman collection
59
+ - Use the Postman converter (`openapi-to-postmanv2`) to produce `clients/postman_collection.json` from the exported OpenAPI.
60
+
61
+ 5) Testing & verification
62
+ - Extend `dx` checks to include: schema export presence, generator dry-run, and minimal smoke tests:
63
+ - TS: typecheck the generated d.ts.
64
+ - Python: `pip install -e` and import a sample client in a quick script.
65
+ - Keep these checks optional (opt-in via CI config) to avoid burdening minimal users.
66
+
67
+ ## Consequences
68
+
69
+ - Pros: Clear, tool-agnostic pipeline; no heavy runtime dependencies; easy local and CI usage; versioned artifacts.
70
+ - Cons: Adds extra tooling expectations (node and python generators) for teams that opt in.
71
+ - Risk: Generator/tooling churn; mitigate by pinning versions and providing stubs/fallbacks.
72
+
73
+ ## Implementation Notes (planned)
74
+
75
+ - Provide a small `svc_infra/cli/cmds/sdk` module with Typer commands that shell out to the generators if available, with helpful error messages if missing.
76
+ - Document usage in `docs/docs-and-sdks.md` (to be added), including examples and troubleshooting.
77
+ - Keep all new code behind DX/CLI; core library remains free of generator dependencies.
78
+
79
+ ## Out of Scope (v1)
80
+
81
+ - Live “try it” consoles beyond Swagger UI.
82
+ - Multi-language example snippets beyond curl/httpie.
83
+ - Automatic publishing to npm/PyPI (documented manual workflows first).
@@ -0,0 +1,143 @@
1
+ # ADR 0008: Billing Primitives (Usage, Quotas, Invoicing)
2
+
3
+ ## Status
4
+
5
+ Proposed — Research and Design complete for v1 scope.
6
+
7
+ ## Context
8
+
9
+ We need shared billing primitives to support both usage-based and subscription features across services. Goals:
10
+ - Capture fine-grained usage events with idempotency and tenant isolation.
11
+ - Aggregate usage into billable buckets (hour/day/month) with rollups.
12
+ - Enforce entitlements/quotas at runtime (hard/soft limits).
13
+ - Produce invoice data structures and events; enable later integration with external providers (Stripe, Paddle) without coupling core DX to any vendor.
14
+
15
+ Non-goals for v1: taxes/VAT, complex proration rules, refunds/credits automation, dunning flows, provider-specific webhooks/end-to-end reconciliation.
16
+
17
+ ## Analysis: APF Payments vs Billing Primitives
18
+
19
+ What APF Payments already covers (provider-facing):
20
+ - Subscriptions lifecycle via provider adapters and HTTP router
21
+ - Endpoints: create/update/cancel/get/list under `/payments/subscriptions` (see `api/fastapi/apf_payments/router.py`).
22
+ - Local mirror rows (e.g., `PaySubscription`) are persisted for reference, but state is owned by the provider (Stripe/Aiydan).
23
+ - Plans as Product + Price on the provider side
24
+ - APF Payments exposes products (`/payments/products`) and prices (`/payments/prices`). In Stripe semantics, a “plan” is represented by a product+price pair.
25
+ - There is no first-class internal Plan entity in APF Payments; plan semantics are encapsulated as provider product/price metadata.
26
+ - Invoices, invoice line items, and previews
27
+ - Create/finalize/void/pay invoices; add/list invoice lines; preview invoices — all via provider adapters.
28
+ - Usage records (metered billing) at the provider
29
+ - Create/list/get usage records mapped to provider subscription items or prices (`/payments/usage_records`).
30
+ - Cross-cutting:
31
+ - Tenant resolution, pagination, idempotency, and Problem+JSON errors are integrated.
32
+
33
+ What APF Payments does not cover (gaps filled by Billing Primitives):
34
+ - An internal, provider-agnostic Plan and Entitlement registry (keys, windows, limits).
35
+ - Quota enforcement at runtime (soft/hard limits) against internal entitlements.
36
+ - Internal usage ingestion and aggregation store independent of provider APIs
37
+ - `UsageEvent` and `UsageAggregate` tables, with idempotent ingestion and windowed rollups.
38
+ - Internal invoice modeling and generation from aggregates (not just provider invoices)
39
+ - `Invoice` and `InvoiceLine` entities produced from internal totals (jobs-based lifecycle).
40
+ - A dedicated `/_billing` router for usage ingestion and aggregate reads (tenant-scoped, RBAC-protected).
41
+
42
+ Where they intersect and can complement each other:
43
+ - You can continue to use APF Payments for provider-side subscriptions/invoices and also use Billing Primitives to meter internal features and enforce quotas.
44
+ - Optional bridging: a provider sync hook can map internally generated invoices/lines to provider invoices or payment intents when you want unified billing.
45
+ - Usage: internal `UsageEvent` can be mirrored to provider usage-records if desired, but internal aggregation enables analytics and quota decisions without provider round-trips.
46
+
47
+ Answering “Are plans and subscriptions covered in APF Payments?”
48
+ - Subscriptions: Yes — fully supported via `/payments/subscriptions` endpoints with adapters (Stripe/Aiydan). APF also persists a local `PaySubscription` record for reference.
49
+ - Plans: APF Payments does not expose a standalone internal Plan model. Instead, providers represent plans as Product + Price. Billing Primitives introduces an internal `Plan` and `PlanEntitlement` registry to support provider-agnostic limits and quotas.
50
+
51
+ ## Decisions
52
+
53
+ 1) Internal-first data model with optional provider adapters
54
+ - Persist usage, aggregates, plans, subscriptions, invoices in our SQL layer.
55
+ - Provide interfaces for provider adapters (Stripe later) to map internal invoices/lines and sync state when enabled.
56
+
57
+ 2) Usage ingestion API + idempotency
58
+ - FastAPI router exposes POST /_billing/usage capturing events: {tenant_id, metric, amount, at, idempotency_key, metadata}.
59
+ - Enforce request idempotency via existing middleware + usage-event unique index on (tenant_id, metric, idempotency_key).
60
+ - Emit webhook event `billing.usage_recorded` (optional).
61
+
62
+ 3) Aggregation job (scheduler)
63
+ - Background job reads new UsageEvent rows, aggregates into UsageAggregate by key (tenant, metric, period_start, period_granularity).
64
+ - Granularities: hour, day, month (config). Maintains running totals; idempotent.
65
+ - Emits `billing.usage_aggregated` webhook.
66
+
67
+ 4) Entitlements and quotas
68
+ - Define Plan and PlanEntitlement models (feature flags, quotas per window).
69
+ - Subscriptions bind tenant -> plan, effective_at/ended_at.
70
+ - Runtime enforcement via dependency/decorator: `require_quota("metric", window="day", soft=True)` which raises/records when limit exceeded.
71
+
72
+ 5) Invoicing primitives
73
+ - Invoice and InvoiceLine models created for each billing cycle (monthly default). Lines derived from aggregates and static prices.
74
+ - Price model: unit amount, currency, metric reference (for metered), or fixed recurring.
75
+ - Emit `billing.invoice_created` and `billing.invoice_finalized` webhooks; provider adapter can consume and sync out.
76
+
77
+ 6) Observability
78
+ - Metrics: `billing_usage_ingest_total`, `billing_aggregate_duration_ms`, `billing_invoice_generated_total`.
79
+ - Logs: aggregation windows processed, invoice cycles.
80
+
81
+ 7) Security & tenancy
82
+ - All models include tenant_id; APIs require tenant context. RBAC: billing.read/billing.write for admin/operator roles.
83
+
84
+ ## Data Model (SQL)
85
+
86
+ Tables (minimal v1):
87
+ - usage_events(id, tenant_id, metric, amount, at_ts, idempotency_key, metadata_json, created_at)
88
+ - Unique (tenant_id, metric, idempotency_key)
89
+ - usage_aggregates(id, tenant_id, metric, period_start, granularity, total, updated_at)
90
+ - Unique (tenant_id, metric, period_start, granularity)
91
+ - plans(id, key, name, description, created_at)
92
+ - plan_entitlements(id, plan_id, key, limit_per_window, window, created_at)
93
+ - subscriptions(id, tenant_id, plan_id, effective_at, ended_at, created_at)
94
+ - prices(id, key, currency, unit_amount, metric, recurring_interval, created_at)
95
+ - invoices(id, tenant_id, period_start, period_end, status, total_amount, currency, created_at)
96
+ - invoice_lines(id, invoice_id, price_id, metric, quantity, amount, created_at)
97
+
98
+ All tables will be scaffolded with our SQL helpers and tenant mixin, with Alembic templates.
99
+
100
+ ## APIs
101
+
102
+ - POST /_billing/usage: record usage events (body as above). Returns 202 with event id.
103
+ - GET /_billing/usage: list usage by metric and window (aggregated).
104
+ - GET /_billing/plans, GET /_billing/subscriptions, POST /_billing/subscriptions.
105
+ - GET /_billing/invoices, GET /_billing/invoices/{id}.
106
+
107
+ Routers mounted under a `/_billing` prefix and hidden behind auth + tenant guard. OpenAPI tags: Billing.
108
+
109
+ ## Jobs & Webhooks
110
+
111
+ - Job: `aggregate_usage` runs on schedule; creates/updates UsageAggregate rows.
112
+ - Job: `generate_invoices` runs monthly; emits invoice events and inserts Invoice/InvoiceLine rows.
113
+ - Webhooks: `billing.usage_recorded`, `billing.usage_aggregated`, `billing.invoice_created`, `billing.invoice_finalized` (signed via existing module).
114
+
115
+ ## Implementation Plan (Phased)
116
+
117
+ Phase 1 (MVP):
118
+ - Models + migrations; CRUD for Plans/Subs/Prices; Usage ingestion + idempotency; Aggregator job (daily granularity); Basic invoice generator (monthly, fixed price + metered by day sum); Webhooks emitted; Tests for ingestion, aggregation, simple invoice.
119
+
120
+ Phase 2:
121
+ - Granularity options (hourly); soft/hard quota decorator; Read APIs; Observability metrics; Docs.
122
+
123
+ Phase 3 (Provider adapter optional):
124
+ - Stripe adapter skeleton: map internal invoices/lines -> Stripe, idempotent sync; basic webhook handler to update statuses.
125
+
126
+ ## Alternatives Considered
127
+
128
+ - Provider-first approach (Stripe-only) rejected for v1 to keep core DX portable and support non-card use-cases.
129
+ - Event-stream aggregation (Kafka) out-of-scope for framework baseline—can be integrated later.
130
+
131
+ ## Risks
132
+
133
+ - Complexity creep around proration and taxes—explicitly out-of-scope for v1.
134
+ - Performance on large tenants—mitigated by granular aggregation and indexes.
135
+
136
+ ## Testing
137
+
138
+ - Unit tests for ingestion idempotency, aggregation correctness, invoice totals.
139
+ - E2E-ish tests using in-memory queue + sqlite.
140
+
141
+ ## Documentation
142
+
143
+ - `docs/billing.md`: usage API, quotas, invoice lifecycle, and Stripe adapter notes.
@@ -0,0 +1,40 @@
1
+ # ADR 0009: Acceptance Harness & Promotion Gate (A0)
2
+
3
+ Date: 2025-10-17
4
+ Status: Proposed
5
+ Decision: Adopt a post-build acceptance harness that brings up an ephemeral stack (Docker Compose) and gates image promotion on acceptance results.
6
+
7
+ ## Context
8
+ - We need a thin but strict pre-deploy acceptance layer that runs after building images, before promotion.
9
+ - It should validate golden paths across domains and basic operational invariants.
10
+ - It must be easy to run locally and in CI and support a backend matrix (in-memory vs Redis+Postgres).
11
+ - Supply-chain checks (SBOM, image scan, provenance) should be part of the gate.
12
+
13
+ ## Decision
14
+ - Introduce A0 Acceptance Harness:
15
+ - Compose stack (api + db + redis), Makefile helpers (accept/up/wait/seed/down).
16
+ - Seed CLI/script to create ADMIN/USER/TENANT fixtures and API key.
17
+ - Acceptance tests under `tests/acceptance` with `@pytest.mark.acceptance` and BASE_URL.
18
+ - CI job `build-and-accept` steps: build → compose up → seed → `pytest -m "acceptance or smoke"` → OpenAPI lint + API Doctor → teardown.
19
+ - Supply-chain: generate SBOM, image scan (Trivy/Grype) with severity threshold; upload SBOM.
20
+ - Provenance: sign/attest images via cosign/SLSA (best-effort for v1).
21
+ - Backend matrix: two jobs (in-memory vs Redis+Postgres).
22
+
23
+ ## Alternatives
24
+ - Testcontainers-only approach (simpler per-test spin-up) — good DX but slower; we can adopt later for certain suites.
25
+ - Kubernetes-in-Docker (kind) for near-prod parity — heavier; likely a v2 improvement.
26
+
27
+ ## Consequences
28
+ - Slightly longer CI time due to matrix and scans.
29
+ - Clearer promotion safety; early detection of config/env gaps.
30
+
31
+ ## Implementation Notes
32
+ - Files to add:
33
+ - `docker-compose.test.yml`
34
+ - `Makefile` targets: `accept`, `compose_up`, `wait`, `seed`, `down`
35
+ - `tests/acceptance/` scaffolding: `conftest.py`, `_seed.py`, `_auth.py`, `_http.py`, first tests (headers/CORS)
36
+ - CI: `.github/workflows/ci.yml` job `build-and-accept`
37
+ - Env contracts:
38
+ - `SQL_URL`, `REDIS_URL` for backend matrix; `APP_ENV=test-accept` for toggles.
39
+ - Evidence:
40
+ - CI run URL, SBOM artifact link, scan report, acceptance summary.
@@ -0,0 +1,54 @@
1
+ # ADR 0010: Timeouts & Resource Limits (A2)
2
+
3
+ ## Context
4
+ Services need consistent, configurable timeouts to protect against slowloris/body drip attacks, expensive handlers, slow downstreams, and long-running DB statements. Today we lack unified settings and middleware behavior; some httpx usages hard-code timeouts. We also want consistent Problem+JSON semantics for timeout errors.
5
+
6
+ ## Decision
7
+ Introduce environment-driven timeouts and wire them via FastAPI middlewares and helper factories:
8
+
9
+ - Request body read timeout: aborts slow body streaming (e.g., slowloris) with 408 Request Timeout.
10
+ - Overall request timeout: caps handler execution time and returns 504 Gateway Timeout.
11
+ - httpx client defaults: central helpers that pick a sane default timeout from env.
12
+ - DB statement timeout: future work (PG: SET LOCAL statement_timeout; SQLite/dev: asyncio.wait_for wrapper). Scoped in follow-ups.
13
+ - Graceful shutdown: track in-flight HTTP requests and wait up to grace period; provide worker runner with stop/grace.
14
+
15
+ ## Configuration
16
+ Environment variables (with suggested defaults):
17
+
18
+ - REQUEST_BODY_TIMEOUT_SECONDS: int, default 15 (prod), 30 (non-prod)
19
+ - REQUEST_TIMEOUT_SECONDS: int, default 30 (prod), 15 (non-prod)
20
+ - HTTP_CLIENT_TIMEOUT_SECONDS: float, default 10.0
21
+
22
+ These are read at process start. Services can override per-env.
23
+
24
+ ## Behavior
25
+ - Body read timeout → 408 application/problem+json with title "Request Timeout"; optional Retry-After not included by default.
26
+ - Handler timeout → 504 application/problem+json with title "Gateway Timeout"; include request trace_id in body if present.
27
+ - Errors use existing problem_response helper.
28
+
29
+ ## Placement
30
+ - Middlewares under svc_infra.api.fastapi.middleware.timeout
31
+ - Wiring in svc_infra.api.fastapi.setup._setup_middlewares (after RequestId, before error catching).
32
+ - httpx helpers under svc_infra.http.client: new_httpx_client/new_async_httpx_client with env-driven defaults.
33
+ - Graceful shutdown under svc_infra.api.fastapi.middleware.graceful_shutdown and svc_infra.jobs.runner.WorkerRunner.
34
+
35
+ ## Alternatives Considered
36
+ - Starlette TimeoutMiddleware: version support/behavior varies; custom middleware gives us consistent Problem+JSON and finer control across environments.
37
+
38
+ ## Consequences
39
+ - Adds two middlewares to every app created via setup_service_api/easy_service_app.
40
+ - Minor overhead per request; mitigated by simple asyncio.wait_for usage.
41
+
42
+ ## Follow-ups
43
+ - PG statement timeout integration; SQLite/dev wrapper.
44
+ - Jobs/webhook runner per-job timeout.
45
+ - Graceful shutdown drainage hooks for servers/workers.
46
+ - Acceptance tests A2-04..A2-06 per PLANS.
47
+
48
+ ## Change log
49
+ - 2025-10-21: Finalized httpx helpers design and placement; proceed to implementation.
50
+
51
+ ---
52
+ Status: Accepted
53
+ Date: 2025-10-21
54
+ Related: PLANS A2 — Timeouts & Resource Limits
@@ -0,0 +1,73 @@
1
+ # 0011 — Admin scope, permissions, and impersonation
2
+
3
+ ## Context
4
+ - The codebase already provides RBAC/permission helpers: `RequireRoles`, `RequirePermission`, ABAC via `RequireABAC`/`owns_resource`.
5
+ - The central permission registry maps roles → permissions (`svc_infra.security.permissions.PERMISSION_REGISTRY`). Notably, the `admin` role includes: `user.read`, `user.write`, `billing.read`, `billing.write`, and `security.session.{list,revoke}`.
6
+ - Acceptance tests demonstrate an “admin-only” route guarded by `RequirePermission("user.write")` and temporary role override to `admin`.
7
+ - There is no dedicated admin API surface yet, and no impersonation flow; observability docs mention an optional route classifier that can label routes like `public|internal|admin`.
8
+
9
+ ## Goals
10
+ - Define a consistent approach for admin-only surfaces and permission alignment.
11
+ - Establish minimal permissions needed for admin operations, including impersonation.
12
+ - Outline an impersonation flow with security and audit guardrails.
13
+ - Prepare for an easy integration helper (`add_admin`) without implementing it yet.
14
+
15
+ ## Non-goals
16
+ - Implement admin endpoints or impersonation logic in this ADR.
17
+ - Replace existing permissions/guards — this ADR aligns and extends them.
18
+
19
+ ## Decisions
20
+
21
+ 1) Permissions alignment and additions
22
+ - Keep permissions as the canonical guard unit; roles remain a mapping to permissions.
23
+ - Extend the registry with a dedicated permission for impersonation:
24
+ - `admin.impersonate`
25
+ - Keep existing entries (`security.session.{list,revoke}` etc.) as-is.
26
+ - Recommended role → permission mapping updates:
27
+ - `admin`: add `admin.impersonate` (retains existing permissions).
28
+ - `auditor`: keep `audit.read` (already present) and may expand in the future.
29
+
30
+ 2) Admin router pattern
31
+ - Provide an admin-only router pattern that layers role and permission checks consistently:
32
+ - Top-level: role gate via `RequireRoles("admin")` to reflect the “admin area”.
33
+ - Endpoint-level: permission gates via `RequirePermission(...)` for specific operations.
34
+ - Rationale: roles communicate the coarse-grained area; fine-grained actions are enforced by permissions.
35
+ - A future helper `admin_router()` can wrap `roles_router("admin")` (from `api.fastapi.dual.protected`) for ergonomic mounting.
36
+
37
+ 3) Impersonation flow (design)
38
+ - Endpoints:
39
+ - `POST /admin/impersonate/start` — body: `{ user_id, reason }`; requires `admin.impersonate`.
40
+ - `POST /admin/impersonate/stop` — ends the session.
41
+ - Mechanics:
42
+ - When starting, issue a short-lived, signed impersonation token (or set a dedicated cookie) that encodes: original admin principal id, target user id, issued-at, expires-at, and nonce.
43
+ - Downstream identity resolution should reflect the impersonated user for request handling, while preserving the original admin as the "actor" for auditing.
44
+ - Stopping invalidates the token/cookie (server-side revocation list or versioned secret), and subsequent requests fall back to the admin’s own identity.
45
+ - Safety guardrails:
46
+ - Always require `admin.impersonate`.
47
+ - Enforce explicit `reason` and capture request fingerprint (ip hash, user-agent) with the event.
48
+ - Limit scope by tenant/org if applicable; optionally block actions explicitly marked non-impersonable.
49
+ - Set short TTL (e.g., 15 minutes) with sliding refresh disabled.
50
+
51
+ 4) Audit logging
52
+ - Emit structured audit events for impersonation lifecycle:
53
+ - `admin.impersonation.started` with actor, target, reason, ip hash, user-agent, and expiry.
54
+ - `admin.impersonation.stopped` with actor, target, and termination reason (expired/manual).
55
+ - Implementation options (future):
56
+ - Minimal: log via the existing logging setup (structured logger, e.g., `logger.bind(...).info("audit", ...)`).
57
+ - Preferred: emit to an audit outbox/table or webhook channel for retention and cross-system visibility.
58
+
59
+ 5) Observability and route classification
60
+ - Encourage passing a `route_classifier` that labels admin routes as `admin` (e.g., for `/admin` base path) so metrics/SLO dashboards can split traffic into `public|internal|admin` classes.
61
+
62
+ ## Consequences
63
+ - Clear, documented permissions and flow for admin-only features.
64
+ - Minimal surface to add later: `admin_router()` and `add_admin(app, ...)` helper that mounts admin routes and wires impersonation endpoints + audit hooks.
65
+ - Tests to plan when implementing:
66
+ - Role vs permission gating behavior on /admin routes.
67
+ - Impersonation start/stop lifecycle and audit emission.
68
+ - Ownership checks that permit admin bypass where intended (e.g., session revocation).
69
+
70
+ ## Follow-ups
71
+ - Update the permission registry to include `admin.impersonate` (and map into `admin`).
72
+ - Implement `admin_router()` and the `add_admin` helper following this ADR.
73
+ - Add admin acceptance tests and documentation for guardrails and operational guidance.