svc-infra 0.1.629__py3-none-any.whl → 0.1.631__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of svc-infra might be problematic. Click here for more details.
- svc_infra/api/fastapi/billing/router.py +64 -0
- svc_infra/api/fastapi/billing/setup.py +19 -0
- svc_infra/api/fastapi/db/sql/session.py +16 -0
- svc_infra/api/fastapi/middleware/errors/handlers.py +15 -0
- svc_infra/api/fastapi/middleware/graceful_shutdown.py +87 -0
- svc_infra/api/fastapi/middleware/ratelimit_store.py +12 -6
- svc_infra/api/fastapi/middleware/timeout.py +144 -0
- svc_infra/api/fastapi/setup.py +10 -0
- svc_infra/billing/async_service.py +147 -0
- svc_infra/billing/jobs.py +230 -0
- svc_infra/billing/quotas.py +101 -0
- svc_infra/billing/schemas.py +33 -0
- svc_infra/cache/__init__.py +4 -0
- svc_infra/cache/add.py +158 -0
- svc_infra/docs/adr/0008-billing-primitives.md +34 -0
- svc_infra/docs/adr/0010-timeouts-and-resource-limits.md +54 -0
- svc_infra/docs/billing.md +190 -0
- svc_infra/docs/cache.md +58 -0
- svc_infra/docs/ops.md +4 -0
- svc_infra/docs/rate-limiting.md +4 -0
- svc_infra/docs/timeouts-and-resource-limits.md +147 -0
- svc_infra/http/__init__.py +13 -0
- svc_infra/http/client.py +64 -0
- svc_infra/jobs/builtins/webhook_delivery.py +14 -2
- svc_infra/jobs/runner.py +75 -0
- svc_infra/jobs/worker.py +17 -1
- svc_infra/security/hibp.py +6 -2
- {svc_infra-0.1.629.dist-info → svc_infra-0.1.631.dist-info}/METADATA +1 -1
- {svc_infra-0.1.629.dist-info → svc_infra-0.1.631.dist-info}/RECORD +31 -16
- {svc_infra-0.1.629.dist-info → svc_infra-0.1.631.dist-info}/WHEEL +0 -0
- {svc_infra-0.1.629.dist-info → svc_infra-0.1.631.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
# Billing Primitives
|
|
2
|
+
|
|
3
|
+
This module provides internal-first billing building blocks for services that need usage-based and subscription billing without coupling to a specific provider. It complements APF Payments (provider-facing) with portable primitives you can use regardless of Stripe/Aiydan/etc.
|
|
4
|
+
|
|
5
|
+
## What you get
|
|
6
|
+
|
|
7
|
+
- Usage ingestion with idempotency (UsageEvent)
|
|
8
|
+
- Windowed usage aggregation (UsageAggregate) — daily baseline
|
|
9
|
+
- Plan and entitlements registry (Plan, PlanEntitlement)
|
|
10
|
+
- Tenant subscriptions (Subscription)
|
|
11
|
+
- Price catalog for fixed/usage items (Price)
|
|
12
|
+
- Invoice and line items (Invoice, InvoiceLine)
|
|
13
|
+
- A small `BillingService` to record usage, aggregate, and generate monthly invoices
|
|
14
|
+
- Optional provider sync hook to mirror internal invoices/lines to your payment provider
|
|
15
|
+
|
|
16
|
+
## Data model (SQL)
|
|
17
|
+
|
|
18
|
+
Tables (v1):
|
|
19
|
+
- usage_events(id, tenant_id, metric, amount, at_ts, idempotency_key, metadata_json, created_at)
|
|
20
|
+
- Unique (tenant_id, metric, idempotency_key)
|
|
21
|
+
- usage_aggregates(id, tenant_id, metric, period_start, granularity, total, updated_at)
|
|
22
|
+
- Unique (tenant_id, metric, period_start, granularity)
|
|
23
|
+
- plans(id, key, name, description, created_at)
|
|
24
|
+
- plan_entitlements(id, plan_id, key, limit_per_window, window, created_at)
|
|
25
|
+
- subscriptions(id, tenant_id, plan_id, effective_at, ended_at, created_at)
|
|
26
|
+
- prices(id, key, currency, unit_amount, metric, recurring_interval, created_at)
|
|
27
|
+
- invoices(id, tenant_id, period_start, period_end, status, total_amount, currency, provider_invoice_id, created_at)
|
|
28
|
+
- invoice_lines(id, invoice_id, price_id, metric, quantity, amount, created_at)
|
|
29
|
+
|
|
30
|
+
See `src/svc_infra/billing/models.py` for full definitions.
|
|
31
|
+
|
|
32
|
+
## Quick start (Python)
|
|
33
|
+
|
|
34
|
+
```python
|
|
35
|
+
from datetime import datetime, timezone
|
|
36
|
+
from sqlalchemy.orm import Session
|
|
37
|
+
from svc_infra.billing import BillingService
|
|
38
|
+
|
|
39
|
+
# session: SQLAlchemy Session (sync) targeting your DB
|
|
40
|
+
bs = BillingService(session=session, tenant_id="t_123")
|
|
41
|
+
|
|
42
|
+
# 1) Record usage (idempotent by (tenant, metric, idempotency_key))
|
|
43
|
+
evt_id = bs.record_usage(
|
|
44
|
+
metric="tokens", amount=42,
|
|
45
|
+
at=datetime.now(tz=timezone.utc),
|
|
46
|
+
idempotency_key="req-42",
|
|
47
|
+
metadata={"model": "gpt"},
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
# 2) Aggregate for a day (baseline v1 granularity)
|
|
51
|
+
bs.aggregate_daily(metric="tokens", day_start=datetime(2025,1,1,tzinfo=timezone.utc))
|
|
52
|
+
|
|
53
|
+
# 3) Generate a monthly invoice (fixed+usage lines TBD)
|
|
54
|
+
inv_id = bs.generate_monthly_invoice(
|
|
55
|
+
period_start=datetime(2025,1,1,tzinfo=timezone.utc),
|
|
56
|
+
period_end=datetime(2025,2,1,tzinfo=timezone.utc),
|
|
57
|
+
currency="usd",
|
|
58
|
+
)
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
Optional: pass a provider sync hook if you want to mirror invoices/lines to Stripe/Aiydan:
|
|
62
|
+
|
|
63
|
+
```python
|
|
64
|
+
from typing import Callable
|
|
65
|
+
from svc_infra.billing.models import Invoice, InvoiceLine
|
|
66
|
+
|
|
67
|
+
async def sync_to_provider(inv: Invoice, lines: list[InvoiceLine]):
|
|
68
|
+
# Map internal invoice/lines to provider calls here
|
|
69
|
+
...
|
|
70
|
+
|
|
71
|
+
bs = BillingService(session=session, tenant_id="t_123", provider_sync=sync_to_provider)
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### FastAPI router (usage ingestion & aggregates)
|
|
75
|
+
|
|
76
|
+
Mount the router and start recording usage with idempotency:
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
from fastapi import FastAPI
|
|
80
|
+
from svc_infra.api.fastapi.billing.setup import add_billing
|
|
81
|
+
from svc_infra.api.fastapi.middleware.idempotency import IdempotencyMiddleware
|
|
82
|
+
from svc_infra.api.fastapi.middleware.errors.handlers import register_error_handlers
|
|
83
|
+
|
|
84
|
+
app = FastAPI()
|
|
85
|
+
app.add_middleware(IdempotencyMiddleware, store={})
|
|
86
|
+
register_error_handlers(app)
|
|
87
|
+
add_billing(app) # mounts under /_billing
|
|
88
|
+
|
|
89
|
+
# POST /_billing/usage {metric, amount, at?, idempotency_key, metadata?} -> 202 {id}
|
|
90
|
+
# GET /_billing/usage?metric=tokens -> {items: [{period_start, granularity, metric, total}], next_cursor}
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
### Quotas (soft/hard limits)
|
|
94
|
+
|
|
95
|
+
Protect your feature endpoints with a quota dependency based on internal plan entitlements and daily aggregates:
|
|
96
|
+
|
|
97
|
+
```python
|
|
98
|
+
from fastapi import Depends
|
|
99
|
+
from svc_infra.billing.quotas import require_quota
|
|
100
|
+
|
|
101
|
+
@app.get("/generate-report", dependencies=[Depends(require_quota("reports", window="day", soft=False))])
|
|
102
|
+
async def generate_report():
|
|
103
|
+
return {"ok": True}
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
## Relationship to APF Payments
|
|
107
|
+
|
|
108
|
+
- APF Payments is provider-facing: customers, intents, methods, products/prices, subscriptions, invoices, usage records via Stripe/Aiydan adapters and HTTP routers.
|
|
109
|
+
- Billing Primitives is provider-agnostic: an internal ledger of usage, plans/entitlements, and invoices that you can keep even if you change providers.
|
|
110
|
+
- You can use both: continue to use APF Payments for card/payments flows, and use Billing to meter custom features and create internal invoices; selectively sync them out later.
|
|
111
|
+
|
|
112
|
+
## Jobs and webhooks
|
|
113
|
+
|
|
114
|
+
Billing includes helpers to enqueue and process jobs and emit webhooks:
|
|
115
|
+
|
|
116
|
+
- Job names:
|
|
117
|
+
- `billing.aggregate_daily` payload: `{tenant_id, metric, day_start: ISO8601}`
|
|
118
|
+
- `billing.generate_monthly_invoice` payload: `{tenant_id, period_start: ISO8601, period_end: ISO8601, currency}`
|
|
119
|
+
- Emitted webhook topics:
|
|
120
|
+
- `billing.usage_aggregated` payload: `{tenant_id, metric, day_start, total}`
|
|
121
|
+
- `billing.invoice.created` payload: `{tenant_id, invoice_id, period_start, period_end, currency}`
|
|
122
|
+
|
|
123
|
+
Usage with the built-in queue/scheduler and webhooks outbox:
|
|
124
|
+
|
|
125
|
+
```python
|
|
126
|
+
from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine
|
|
127
|
+
from svc_infra.jobs.easy import easy_jobs
|
|
128
|
+
from svc_infra.webhooks.add import add_webhooks
|
|
129
|
+
from svc_infra.webhooks.service import WebhookService
|
|
130
|
+
from svc_infra.db.outbox import InMemoryOutboxStore
|
|
131
|
+
from svc_infra.webhooks.service import InMemoryWebhookSubscriptions
|
|
132
|
+
from svc_infra.billing.jobs import (
|
|
133
|
+
enqueue_aggregate_daily,
|
|
134
|
+
enqueue_generate_monthly_invoice,
|
|
135
|
+
make_billing_job_handler,
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
# Create queue + scheduler
|
|
139
|
+
queue, scheduler = easy_jobs()
|
|
140
|
+
|
|
141
|
+
# Setup DB async session factory
|
|
142
|
+
engine = create_async_engine("sqlite+aiosqlite:///:memory:")
|
|
143
|
+
SessionLocal = async_sessionmaker(engine, expire_on_commit=False)
|
|
144
|
+
|
|
145
|
+
# Setup webhooks (in-memory stores shown here)
|
|
146
|
+
outbox = InMemoryOutboxStore()
|
|
147
|
+
subs = InMemoryWebhookSubscriptions()
|
|
148
|
+
subs.add("billing.usage_aggregated", url="https://example.test/hook", secret="sekrit")
|
|
149
|
+
webhooks = WebhookService(outbox=outbox, subs=subs)
|
|
150
|
+
|
|
151
|
+
# Worker handler
|
|
152
|
+
handler = make_billing_job_handler(session_factory=SessionLocal, webhooks=webhooks)
|
|
153
|
+
|
|
154
|
+
# Enqueue example jobs
|
|
155
|
+
from datetime import datetime, timezone
|
|
156
|
+
enqueue_aggregate_daily(queue, tenant_id="t1", metric="tokens", day_start=datetime.now(timezone.utc))
|
|
157
|
+
enqueue_generate_monthly_invoice(
|
|
158
|
+
queue, tenant_id="t1", period_start=datetime(2025,1,1,tzinfo=timezone.utc), period_end=datetime(2025,2,1,tzinfo=timezone.utc), currency="usd"
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
# In your worker loop call process_one(queue, handler)
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
## Roadmap (v1 scope)
|
|
165
|
+
|
|
166
|
+
- Router: `/_billing` endpoints for usage ingestion (idempotent), aggregate listing, plans/subscriptions read.
|
|
167
|
+
- Quotas: decorator/dependency to enforce per-plan limits (soft/hard, day/month windows).
|
|
168
|
+
- Jobs: integrate aggregation and invoice-generation with the scheduler; emit `billing.*` webhooks. (helpers available in `svc_infra.billing.jobs`) — Implemented.
|
|
169
|
+
- Provider sync: optional mapper to Stripe invoices/payment intents; reuse idempotency.
|
|
170
|
+
- Migrations: author initial Alembic migration for billing tables.
|
|
171
|
+
- Docs: examples for quotas and jobs; admin flows for plans and prices.
|
|
172
|
+
|
|
173
|
+
## Testing
|
|
174
|
+
|
|
175
|
+
- See `tests/unit/billing/test_billing_service.py` for usage, aggregation, invoice basics, and idempotency uniqueness.
|
|
176
|
+
- Additions planned: router tests (ingest/list), quotas, job executions, webhook events.
|
|
177
|
+
|
|
178
|
+
## Security & Tenancy
|
|
179
|
+
|
|
180
|
+
- All records are tenant-scoped; ensure tenant_id is enforced in your service layer / router dependencies.
|
|
181
|
+
- Protect HTTP endpoints with RBAC permissions (e.g., billing.read, billing.write) if you expose them.
|
|
182
|
+
|
|
183
|
+
## Observability
|
|
184
|
+
|
|
185
|
+
Planned metrics (names may evolve):
|
|
186
|
+
- billing_usage_ingest_total
|
|
187
|
+
- billing_aggregate_duration_ms
|
|
188
|
+
- billing_invoice_generated_total
|
|
189
|
+
|
|
190
|
+
See ADR 0008 for design details.
|
svc_infra/docs/cache.md
CHANGED
|
@@ -16,3 +16,61 @@ async def get_user(user_id: int):
|
|
|
16
16
|
|
|
17
17
|
- `CACHE_PREFIX`, `CACHE_VERSION` – change the namespace alias used by the decorators. 【F:src/svc_infra/cache/README.md†L20-L173】
|
|
18
18
|
- `CACHE_TTL_DEFAULT`, `CACHE_TTL_SHORT`, `CACHE_TTL_LONG` – override canonical TTL buckets. 【F:src/svc_infra/cache/ttl.py†L26-L55】
|
|
19
|
+
|
|
20
|
+
## Easy integration: add_cache
|
|
21
|
+
|
|
22
|
+
Use the one-liner helper to wire cache initialization into your ASGI app lifecycle with sensible defaults. This doesn’t replace the decorators; it standardizes init/readiness/shutdown and exposes a handle for convenience.
|
|
23
|
+
|
|
24
|
+
```python
|
|
25
|
+
from fastapi import FastAPI
|
|
26
|
+
from svc_infra.cache import add_cache, cache_read, cache_write, resource
|
|
27
|
+
|
|
28
|
+
app = FastAPI()
|
|
29
|
+
|
|
30
|
+
# Wires startup (init + readiness) and shutdown (graceful close). Idempotent.
|
|
31
|
+
add_cache(app)
|
|
32
|
+
|
|
33
|
+
user = resource("user", "user_id")
|
|
34
|
+
|
|
35
|
+
@user.cache_read(suffix="profile", ttl=300)
|
|
36
|
+
async def get_user_profile(user_id: int):
|
|
37
|
+
...
|
|
38
|
+
|
|
39
|
+
@user.cache_write()
|
|
40
|
+
async def update_user_profile(user_id: int, payload):
|
|
41
|
+
...
|
|
42
|
+
|
|
43
|
+
# Optional: direct cache instance for advanced scenarios
|
|
44
|
+
# available after startup when using add_cache(app)
|
|
45
|
+
# app.state.cache -> cashews cache instance
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
### Env-driven defaults
|
|
49
|
+
|
|
50
|
+
- URL: `CACHE_URL` → `REDIS_URL` → `mem://`
|
|
51
|
+
- Prefix: `CACHE_PREFIX` (default `svc`)
|
|
52
|
+
- Version: `CACHE_VERSION` (default `v1`)
|
|
53
|
+
|
|
54
|
+
You can override explicitly:
|
|
55
|
+
|
|
56
|
+
```python
|
|
57
|
+
add_cache(app, url="redis://localhost:6379/0", prefix="myapp", version="v2")
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
### Behavior
|
|
61
|
+
|
|
62
|
+
- Idempotent: multiple calls won’t duplicate handlers.
|
|
63
|
+
- Startup/shutdown hooks: registered when supported by the app; startup performs a readiness probe. Startup is optional for correctness, but recommended for production reliability.
|
|
64
|
+
- app.state exposure: by default, exposes `app.state.cache` to access the underlying cashews instance.
|
|
65
|
+
|
|
66
|
+
### No-app usage
|
|
67
|
+
|
|
68
|
+
If you’re not wiring an app (e.g., a script), you can initialize without startup hooks:
|
|
69
|
+
|
|
70
|
+
```python
|
|
71
|
+
from svc_infra.cache import add_cache
|
|
72
|
+
|
|
73
|
+
shutdown = add_cache(None) # immediate init (best-effort)
|
|
74
|
+
# ... do work ...
|
|
75
|
+
# call shutdown() is a no-op placeholder for symmetry
|
|
76
|
+
```
|
svc_infra/docs/ops.md
CHANGED
|
@@ -31,3 +31,7 @@ This guide explains how to use svc-infra’s probes, circuit breaker, and metric
|
|
|
31
31
|
|
|
32
32
|
- Prometheus middleware is enabled unless `SVC_INFRA_DISABLE_PROMETHEUS=1`.
|
|
33
33
|
- Observability settings: `METRICS_ENABLED`, `METRICS_PATH`, and optional histogram buckets.
|
|
34
|
+
|
|
35
|
+
## See also
|
|
36
|
+
|
|
37
|
+
- Timeouts & Resource Limits: `./timeouts-and-resource-limits.md` — request/body/handler timeouts, outbound client timeouts, DB statement timeouts, jobs/webhooks, and graceful shutdown.
|
svc_infra/docs/rate-limiting.md
CHANGED
|
@@ -115,6 +115,10 @@ metrics.on_suspect_payload = lambda path, size: logger.warning(
|
|
|
115
115
|
- Consider separate limits for read vs write routes.
|
|
116
116
|
- Combine with request size limits and auth lockout for layered defense.
|
|
117
117
|
|
|
118
|
+
## Related
|
|
119
|
+
|
|
120
|
+
- Timeouts & Resource Limits: `./timeouts-and-resource-limits.md` — complements rate limits by bounding slow uploads, long handlers, and downstream timeouts.
|
|
121
|
+
|
|
118
122
|
## Testing
|
|
119
123
|
|
|
120
124
|
- Use `-m ratelimit` to select rate-limiting tests.
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
# Timeouts & Resource Limits
|
|
2
|
+
|
|
3
|
+
This guide covers request/handler timeouts, outbound HTTP client timeouts, database statement timeouts, job/webhook delivery timeouts, and graceful shutdown. It explains defaults, configuration, wiring, and recommended tuning by environment.
|
|
4
|
+
|
|
5
|
+
## Why timeouts?
|
|
6
|
+
|
|
7
|
+
- Protects your service from slowloris uploads and hanging requests
|
|
8
|
+
- Limits blast radius of slow downstreams (HTTP, DB, webhooks)
|
|
9
|
+
- Enables predictable backpressure and faster recovery during incidents
|
|
10
|
+
|
|
11
|
+
## Configuration overview
|
|
12
|
+
|
|
13
|
+
The library exposes simple environment variables with sensible defaults. Use floats for second values unless noted.
|
|
14
|
+
|
|
15
|
+
- REQUEST_BODY_TIMEOUT_SECONDS (int)
|
|
16
|
+
- Default: prod=15, nonprod=30
|
|
17
|
+
- Purpose: Abort slow request body reads (slowloris defense)
|
|
18
|
+
- REQUEST_TIMEOUT_SECONDS (int)
|
|
19
|
+
- Default: prod=30, nonprod=15
|
|
20
|
+
- Purpose: Cap overall handler execution time
|
|
21
|
+
- HTTP_CLIENT_TIMEOUT_SECONDS (float)
|
|
22
|
+
- Default: 10.0
|
|
23
|
+
- Purpose: Default timeout for outbound httpx clients created via helpers
|
|
24
|
+
- DB_STATEMENT_TIMEOUT_MS (int)
|
|
25
|
+
- Default: unset (disabled)
|
|
26
|
+
- Purpose: Per-transaction statement timeout (Postgres via SET LOCAL)
|
|
27
|
+
- JOB_DEFAULT_TIMEOUT_SECONDS (float)
|
|
28
|
+
- Default: unset (disabled)
|
|
29
|
+
- Purpose: Caps per-job handler runtime in the in-process jobs runner
|
|
30
|
+
- WEBHOOK_DELIVERY_TIMEOUT_SECONDS (float)
|
|
31
|
+
- Default: falls back to HTTP client default (10.0)
|
|
32
|
+
- Purpose: Timeout for webhook delivery HTTP calls
|
|
33
|
+
- SHUTDOWN_GRACE_PERIOD_SECONDS (float)
|
|
34
|
+
- Default: prod=20.0, nonprod=5.0
|
|
35
|
+
- Purpose: Wait time for in-flight requests to drain on shutdown
|
|
36
|
+
|
|
37
|
+
See ADR-0010 for design rationale: `src/svc_infra/docs/adr/0010-timeouts-and-resource-limits.md`.
|
|
38
|
+
|
|
39
|
+
## Request/handler timeouts (FastAPI)
|
|
40
|
+
|
|
41
|
+
Two middlewares enforce timeouts inside your ASGI app:
|
|
42
|
+
|
|
43
|
+
- BodyReadTimeoutMiddleware
|
|
44
|
+
- Enforces a per-chunk timeout while reading the incoming request body.
|
|
45
|
+
- If reads stall beyond the timeout, responds with 408 application/problem+json.
|
|
46
|
+
- Module: `svc_infra.api.fastapi.middleware.timeout.BodyReadTimeoutMiddleware`
|
|
47
|
+
- HandlerTimeoutMiddleware
|
|
48
|
+
- Caps overall request handler execution time using asyncio.wait_for.
|
|
49
|
+
- If exceeded, responds with 504 application/problem+json.
|
|
50
|
+
- Module: `svc_infra.api.fastapi.middleware.timeout.HandlerTimeoutMiddleware`
|
|
51
|
+
|
|
52
|
+
Example wiring:
|
|
53
|
+
|
|
54
|
+
```python
|
|
55
|
+
from fastapi import FastAPI
|
|
56
|
+
from svc_infra.api.fastapi.middleware.timeout import (
|
|
57
|
+
BodyReadTimeoutMiddleware,
|
|
58
|
+
HandlerTimeoutMiddleware,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
app = FastAPI()
|
|
62
|
+
|
|
63
|
+
# Abort slow uploads (slowloris) after 15s in prod / 30s nonprod by default
|
|
64
|
+
app.add_middleware(BodyReadTimeoutMiddleware) # or timeout_seconds=20
|
|
65
|
+
|
|
66
|
+
# Cap total handler time (e.g., 30s in prod by default)
|
|
67
|
+
app.add_middleware(HandlerTimeoutMiddleware) # or timeout_seconds=25
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
HTTP semantics:
|
|
71
|
+
|
|
72
|
+
- Body timeout → 408 Request Timeout (Problem+JSON) with fields: type, title, status, detail, instance, trace_id
|
|
73
|
+
- Handler timeout → 504 Gateway Timeout (Problem+JSON) with fields: type, title, status, detail, instance, trace_id
|
|
74
|
+
|
|
75
|
+
## Outbound HTTP client timeouts (httpx)
|
|
76
|
+
|
|
77
|
+
Use the provided helpers to create httpx clients with the default timeout (driven by HTTP_CLIENT_TIMEOUT_SECONDS).
|
|
78
|
+
|
|
79
|
+
- Module: `svc_infra.http.client`
|
|
80
|
+
- `get_default_timeout_seconds()` → float
|
|
81
|
+
- `make_timeout(seconds=None) -> httpx.Timeout`
|
|
82
|
+
- `new_httpx_client(timeout_seconds=None, ...) -> httpx.Client`
|
|
83
|
+
- `new_async_httpx_client(timeout_seconds=None, ...) -> httpx.AsyncClient`
|
|
84
|
+
|
|
85
|
+
Error mapping:
|
|
86
|
+
|
|
87
|
+
- `httpx.TimeoutException` is mapped to 504 Gateway Timeout with Problem+JSON by default when `register_error_handlers(app)` is used.
|
|
88
|
+
- Module: `svc_infra.api.fastapi.middleware.errors.handlers.register_error_handlers`
|
|
89
|
+
|
|
90
|
+
## Database statement timeouts (SQLAlchemy / Postgres)
|
|
91
|
+
|
|
92
|
+
If `DB_STATEMENT_TIMEOUT_MS` is set and Postgres is used, a per-transaction `SET LOCAL statement_timeout = :ms` is executed for sessions yielded by the built-in dependency.
|
|
93
|
+
|
|
94
|
+
- Module: `svc_infra.api.fastapi.db.sql.session.get_session`
|
|
95
|
+
- Non-Postgres dialects (e.g., SQLite) ignore this gracefully.
|
|
96
|
+
|
|
97
|
+
## Jobs and webhooks
|
|
98
|
+
|
|
99
|
+
- Jobs runner
|
|
100
|
+
- Env: `JOB_DEFAULT_TIMEOUT_SECONDS`
|
|
101
|
+
- Module: `svc_infra.jobs.worker.process_one` — wraps job handler with `asyncio.wait_for()` when configured.
|
|
102
|
+
- Webhook delivery
|
|
103
|
+
- Env: `WEBHOOK_DELIVERY_TIMEOUT_SECONDS` (falls back to HTTP client default when unset)
|
|
104
|
+
- Module: `svc_infra.jobs.builtins.webhook_delivery.make_webhook_handler` — uses `new_async_httpx_client` with derived timeout.
|
|
105
|
+
|
|
106
|
+
## Graceful shutdown
|
|
107
|
+
|
|
108
|
+
Install graceful shutdown to wait for in-flight requests (up to a grace period) during application shutdown.
|
|
109
|
+
|
|
110
|
+
- Module: `svc_infra.api.fastapi.middleware.graceful_shutdown.install_graceful_shutdown`
|
|
111
|
+
- Env: `SHUTDOWN_GRACE_PERIOD_SECONDS` (prod=20.0, nonprod=5.0 by default)
|
|
112
|
+
|
|
113
|
+
```python
|
|
114
|
+
from svc_infra.api.fastapi.middleware.graceful_shutdown import install_graceful_shutdown
|
|
115
|
+
|
|
116
|
+
install_graceful_shutdown(app) # or grace_seconds=30.0
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
## Tuning recommendations
|
|
120
|
+
|
|
121
|
+
- Production
|
|
122
|
+
- REQUEST_BODY_TIMEOUT_SECONDS: 10–20s (shorter for public APIs)
|
|
123
|
+
- REQUEST_TIMEOUT_SECONDS: 20–30s (align with upstream proxy/gateway timeouts)
|
|
124
|
+
- HTTP_CLIENT_TIMEOUT_SECONDS: 3–10s (favor quick failover with retries)
|
|
125
|
+
- DB_STATEMENT_TIMEOUT_MS: set per-route/transaction if queries are constrained
|
|
126
|
+
- SHUTDOWN_GRACE_PERIOD_SECONDS: 20–60s depending on peak latencies
|
|
127
|
+
- Staging/Dev
|
|
128
|
+
- Relax timeouts slightly to reduce test flakiness (defaults already reflect this)
|
|
129
|
+
- Gateways/Proxies
|
|
130
|
+
- Ensure upstream (e.g., NGINX, ALB) timeouts exceed app’s body timeout and are aligned with handler timeout to avoid double timeouts.
|
|
131
|
+
|
|
132
|
+
## Testing and acceptance
|
|
133
|
+
|
|
134
|
+
- Unit tests cover body read timeout, handler timeout, outbound timeout mapping, and a smoke check for DB statement timeout.
|
|
135
|
+
- Acceptance tests:
|
|
136
|
+
- A2-04: slow handler → 504 Problem
|
|
137
|
+
- A2-05: slow body → 408 Problem or 413 (size) as applicable
|
|
138
|
+
- A2-06: outbound httpx timeout → 504 Problem
|
|
139
|
+
|
|
140
|
+
## Troubleshooting
|
|
141
|
+
|
|
142
|
+
- Seeing 200 instead of 408 for slow uploads under some servers?
|
|
143
|
+
- Some servers buffer the entire body before invoking the app. The BodyReadTimeoutMiddleware greedily drains with per-chunk timeouts and replays to reliably detect slowloris. Ensure HTTP/1.1 parsing with a streaming-capable server implementation (e.g., uvicorn+httptools) in acceptance tests.
|
|
144
|
+
- Outbound timeouts not mapped to Problem?
|
|
145
|
+
- Ensure `register_error_handlers(app)` is installed so `httpx.TimeoutException` returns a 504 Problem.
|
|
146
|
+
- Statement timeout ignored on SQLite?
|
|
147
|
+
- Expected. Non-Postgres dialects skip `SET LOCAL` safely.
|
svc_infra/http/client.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from typing import Any, Dict, Optional
|
|
5
|
+
|
|
6
|
+
import httpx
|
|
7
|
+
|
|
8
|
+
from svc_infra.app.env import pick
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _parse_float_env(name: str, default: float) -> float:
|
|
12
|
+
raw = os.getenv(name)
|
|
13
|
+
if raw is None or raw == "":
|
|
14
|
+
return default
|
|
15
|
+
try:
|
|
16
|
+
return float(raw)
|
|
17
|
+
except ValueError:
|
|
18
|
+
return default
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def get_default_timeout_seconds() -> float:
|
|
22
|
+
"""Return default outbound HTTP client timeout in seconds.
|
|
23
|
+
|
|
24
|
+
Env var: HTTP_CLIENT_TIMEOUT_SECONDS (float)
|
|
25
|
+
Defaults: 10.0 seconds for all envs unless overridden; tweakable via pick() if needed.
|
|
26
|
+
"""
|
|
27
|
+
default = pick(prod=10.0, nonprod=10.0)
|
|
28
|
+
return _parse_float_env("HTTP_CLIENT_TIMEOUT_SECONDS", default)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def make_timeout(seconds: float | None = None) -> httpx.Timeout:
|
|
32
|
+
s = seconds if seconds is not None else get_default_timeout_seconds()
|
|
33
|
+
# Apply same timeout for connect/read/write/pool for simplicity
|
|
34
|
+
return httpx.Timeout(timeout=s)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def new_httpx_client(
|
|
38
|
+
*,
|
|
39
|
+
timeout_seconds: Optional[float] = None,
|
|
40
|
+
headers: Optional[Dict[str, str]] = None,
|
|
41
|
+
base_url: Optional[str] = None,
|
|
42
|
+
**kwargs: Any,
|
|
43
|
+
) -> httpx.Client:
|
|
44
|
+
"""Create a sync httpx Client with default timeout and optional headers/base_url.
|
|
45
|
+
|
|
46
|
+
Callers can override timeout_seconds; remaining kwargs are forwarded to httpx.Client.
|
|
47
|
+
"""
|
|
48
|
+
timeout = make_timeout(timeout_seconds)
|
|
49
|
+
return httpx.Client(timeout=timeout, headers=headers, base_url=base_url, **kwargs)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def new_async_httpx_client(
|
|
53
|
+
*,
|
|
54
|
+
timeout_seconds: Optional[float] = None,
|
|
55
|
+
headers: Optional[Dict[str, str]] = None,
|
|
56
|
+
base_url: Optional[str] = None,
|
|
57
|
+
**kwargs: Any,
|
|
58
|
+
) -> httpx.AsyncClient:
|
|
59
|
+
"""Create an async httpx AsyncClient with default timeout and optional headers/base_url.
|
|
60
|
+
|
|
61
|
+
Callers can override timeout_seconds; remaining kwargs are forwarded to httpx.AsyncClient.
|
|
62
|
+
"""
|
|
63
|
+
timeout = make_timeout(timeout_seconds)
|
|
64
|
+
return httpx.AsyncClient(timeout=timeout, headers=headers, base_url=base_url, **kwargs)
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import
|
|
3
|
+
import os
|
|
4
4
|
|
|
5
5
|
from svc_infra.db.inbox import InboxStore
|
|
6
6
|
from svc_infra.db.outbox import OutboxStore
|
|
7
|
+
from svc_infra.http import get_default_timeout_seconds, new_async_httpx_client
|
|
7
8
|
from svc_infra.jobs.queue import Job
|
|
8
9
|
from svc_infra.webhooks.signing import sign
|
|
9
10
|
|
|
@@ -65,7 +66,18 @@ def make_webhook_handler(
|
|
|
65
66
|
version = delivery_payload.get("version")
|
|
66
67
|
if version is not None:
|
|
67
68
|
headers["X-Payload-Version"] = str(version)
|
|
68
|
-
|
|
69
|
+
# Derive timeout: dedicated WEBHOOK_DELIVERY_TIMEOUT_SECONDS or default HTTP client timeout
|
|
70
|
+
timeout_seconds = None
|
|
71
|
+
env_timeout = os.getenv("WEBHOOK_DELIVERY_TIMEOUT_SECONDS")
|
|
72
|
+
if env_timeout:
|
|
73
|
+
try:
|
|
74
|
+
timeout_seconds = float(env_timeout)
|
|
75
|
+
except ValueError:
|
|
76
|
+
timeout_seconds = get_default_timeout_seconds()
|
|
77
|
+
else:
|
|
78
|
+
timeout_seconds = get_default_timeout_seconds()
|
|
79
|
+
|
|
80
|
+
async with new_async_httpx_client(timeout_seconds=timeout_seconds) as client:
|
|
69
81
|
resp = await client.post(url, json=delivery_payload, headers=headers)
|
|
70
82
|
if 200 <= resp.status_code < 300:
|
|
71
83
|
# record delivery and mark processed
|
svc_infra/jobs/runner.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import contextlib
|
|
5
|
+
from typing import Awaitable, Callable, Optional
|
|
6
|
+
|
|
7
|
+
from .queue import JobQueue
|
|
8
|
+
|
|
9
|
+
ProcessFunc = Callable[[object], Awaitable[None]]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class WorkerRunner:
|
|
13
|
+
"""Cooperative worker loop with graceful stop.
|
|
14
|
+
|
|
15
|
+
- start(): begin polling the queue and processing jobs
|
|
16
|
+
- stop(grace_seconds): signal stop, wait up to grace for current job to finish
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def __init__(self, queue: JobQueue, handler: ProcessFunc, *, poll_interval: float = 0.25):
|
|
20
|
+
self._queue = queue
|
|
21
|
+
self._handler = handler
|
|
22
|
+
self._poll_interval = poll_interval
|
|
23
|
+
self._task: Optional[asyncio.Task] = None
|
|
24
|
+
self._stopping = asyncio.Event()
|
|
25
|
+
self._inflight: Optional[asyncio.Task] = None
|
|
26
|
+
|
|
27
|
+
async def _loop(self) -> None:
|
|
28
|
+
try:
|
|
29
|
+
while not self._stopping.is_set():
|
|
30
|
+
job = self._queue.reserve_next()
|
|
31
|
+
if not job:
|
|
32
|
+
await asyncio.sleep(self._poll_interval)
|
|
33
|
+
continue
|
|
34
|
+
|
|
35
|
+
# Process one job; track in-flight task for stop()
|
|
36
|
+
async def _run():
|
|
37
|
+
try:
|
|
38
|
+
await self._handler(job)
|
|
39
|
+
except Exception as exc: # pragma: no cover
|
|
40
|
+
self._queue.fail(job.id, error=str(exc))
|
|
41
|
+
return
|
|
42
|
+
self._queue.ack(job.id)
|
|
43
|
+
|
|
44
|
+
self._inflight = asyncio.create_task(_run())
|
|
45
|
+
try:
|
|
46
|
+
await self._inflight
|
|
47
|
+
finally:
|
|
48
|
+
self._inflight = None
|
|
49
|
+
finally:
|
|
50
|
+
# exiting loop
|
|
51
|
+
pass
|
|
52
|
+
|
|
53
|
+
def start(self) -> asyncio.Task:
|
|
54
|
+
if self._task is None or self._task.done():
|
|
55
|
+
self._task = asyncio.create_task(self._loop())
|
|
56
|
+
return self._task
|
|
57
|
+
|
|
58
|
+
async def stop(self, *, grace_seconds: float = 10.0) -> None:
|
|
59
|
+
self._stopping.set()
|
|
60
|
+
# Wait for in-flight job to complete, up to grace
|
|
61
|
+
if self._inflight is not None and not self._inflight.done():
|
|
62
|
+
try:
|
|
63
|
+
await asyncio.wait_for(self._inflight, timeout=grace_seconds)
|
|
64
|
+
except asyncio.TimeoutError:
|
|
65
|
+
# Give up; job will be retried if your queue supports visibility timeouts
|
|
66
|
+
pass
|
|
67
|
+
# Finally, wait for loop to exit (should be quick since stopping is set)
|
|
68
|
+
if self._task is not None:
|
|
69
|
+
try:
|
|
70
|
+
await asyncio.wait_for(self._task, timeout=max(0.1, self._poll_interval + 0.1))
|
|
71
|
+
except asyncio.TimeoutError:
|
|
72
|
+
# Cancel as a last resort
|
|
73
|
+
self._task.cancel()
|
|
74
|
+
with contextlib.suppress(Exception):
|
|
75
|
+
await self._task
|
svc_infra/jobs/worker.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import asyncio
|
|
4
|
+
import os
|
|
3
5
|
from typing import Awaitable, Callable
|
|
4
6
|
|
|
5
7
|
from .queue import Job, JobQueue
|
|
@@ -7,6 +9,16 @@ from .queue import Job, JobQueue
|
|
|
7
9
|
ProcessFunc = Callable[[Job], Awaitable[None]]
|
|
8
10
|
|
|
9
11
|
|
|
12
|
+
def _get_job_timeout_seconds() -> float | None:
|
|
13
|
+
raw = os.getenv("JOB_DEFAULT_TIMEOUT_SECONDS")
|
|
14
|
+
if not raw:
|
|
15
|
+
return None
|
|
16
|
+
try:
|
|
17
|
+
return float(raw)
|
|
18
|
+
except ValueError:
|
|
19
|
+
return None
|
|
20
|
+
|
|
21
|
+
|
|
10
22
|
async def process_one(queue: JobQueue, handler: ProcessFunc) -> bool:
|
|
11
23
|
"""Reserve a job, process with handler, ack on success or fail with backoff.
|
|
12
24
|
|
|
@@ -16,7 +28,11 @@ async def process_one(queue: JobQueue, handler: ProcessFunc) -> bool:
|
|
|
16
28
|
if not job:
|
|
17
29
|
return False
|
|
18
30
|
try:
|
|
19
|
-
|
|
31
|
+
timeout = _get_job_timeout_seconds()
|
|
32
|
+
if timeout and timeout > 0:
|
|
33
|
+
await asyncio.wait_for(handler(job), timeout=timeout)
|
|
34
|
+
else:
|
|
35
|
+
await handler(job)
|
|
20
36
|
except Exception as exc: # pragma: no cover - exercise in tests by raising
|
|
21
37
|
queue.fail(job.id, error=str(exc))
|
|
22
38
|
return True
|
svc_infra/security/hibp.py
CHANGED
|
@@ -5,7 +5,7 @@ import time
|
|
|
5
5
|
from dataclasses import dataclass
|
|
6
6
|
from typing import Dict, Optional
|
|
7
7
|
|
|
8
|
-
import
|
|
8
|
+
from svc_infra.http import new_httpx_client
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
def sha1_hex(data: str) -> str:
|
|
@@ -39,7 +39,11 @@ class HIBPClient:
|
|
|
39
39
|
self.timeout = timeout
|
|
40
40
|
self.user_agent = user_agent
|
|
41
41
|
self._cache: Dict[str, CacheEntry] = {}
|
|
42
|
-
|
|
42
|
+
# Use central factory for consistent defaults; retain explicit timeout override
|
|
43
|
+
self._http = new_httpx_client(
|
|
44
|
+
timeout_seconds=self.timeout,
|
|
45
|
+
headers={"User-Agent": self.user_agent},
|
|
46
|
+
)
|
|
43
47
|
|
|
44
48
|
def _get_cached(self, prefix: str) -> Optional[str]:
|
|
45
49
|
now = time.time()
|