svc-infra 0.1.589__py3-none-any.whl → 0.1.706__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of svc-infra might be problematic. Click here for more details.
- svc_infra/__init__.py +58 -2
- svc_infra/apf_payments/README.md +732 -0
- svc_infra/apf_payments/models.py +133 -42
- svc_infra/apf_payments/provider/__init__.py +4 -0
- svc_infra/apf_payments/provider/aiydan.py +871 -0
- svc_infra/apf_payments/provider/base.py +30 -9
- svc_infra/apf_payments/provider/stripe.py +156 -62
- svc_infra/apf_payments/schemas.py +19 -10
- svc_infra/apf_payments/service.py +211 -68
- svc_infra/apf_payments/settings.py +27 -3
- svc_infra/api/__init__.py +61 -0
- svc_infra/api/fastapi/__init__.py +15 -0
- svc_infra/api/fastapi/admin/__init__.py +3 -0
- svc_infra/api/fastapi/admin/add.py +245 -0
- svc_infra/api/fastapi/apf_payments/router.py +145 -46
- svc_infra/api/fastapi/apf_payments/setup.py +26 -8
- svc_infra/api/fastapi/auth/__init__.py +65 -0
- svc_infra/api/fastapi/auth/_cookies.py +6 -2
- svc_infra/api/fastapi/auth/add.py +27 -14
- svc_infra/api/fastapi/auth/gaurd.py +104 -13
- svc_infra/api/fastapi/auth/mfa/models.py +3 -1
- svc_infra/api/fastapi/auth/mfa/pre_auth.py +10 -6
- svc_infra/api/fastapi/auth/mfa/router.py +15 -8
- svc_infra/api/fastapi/auth/mfa/security.py +1 -2
- svc_infra/api/fastapi/auth/mfa/utils.py +2 -1
- svc_infra/api/fastapi/auth/mfa/verify.py +9 -2
- svc_infra/api/fastapi/auth/policy.py +0 -1
- svc_infra/api/fastapi/auth/providers.py +3 -1
- svc_infra/api/fastapi/auth/routers/apikey_router.py +6 -6
- svc_infra/api/fastapi/auth/routers/oauth_router.py +214 -75
- svc_infra/api/fastapi/auth/routers/session_router.py +67 -0
- svc_infra/api/fastapi/auth/security.py +31 -10
- svc_infra/api/fastapi/auth/sender.py +8 -1
- svc_infra/api/fastapi/auth/settings.py +2 -0
- svc_infra/api/fastapi/auth/state.py +3 -1
- svc_infra/api/fastapi/auth/ws_security.py +275 -0
- svc_infra/api/fastapi/billing/router.py +73 -0
- svc_infra/api/fastapi/billing/setup.py +19 -0
- svc_infra/api/fastapi/cache/add.py +9 -5
- svc_infra/api/fastapi/db/__init__.py +5 -1
- svc_infra/api/fastapi/db/http.py +3 -1
- svc_infra/api/fastapi/db/nosql/__init__.py +39 -1
- svc_infra/api/fastapi/db/nosql/mongo/add.py +47 -32
- svc_infra/api/fastapi/db/nosql/mongo/crud_router.py +30 -11
- svc_infra/api/fastapi/db/sql/__init__.py +5 -1
- svc_infra/api/fastapi/db/sql/add.py +71 -26
- svc_infra/api/fastapi/db/sql/crud_router.py +210 -22
- svc_infra/api/fastapi/db/sql/health.py +3 -1
- svc_infra/api/fastapi/db/sql/session.py +18 -0
- svc_infra/api/fastapi/db/sql/users.py +29 -5
- svc_infra/api/fastapi/dependencies/ratelimit.py +130 -0
- svc_infra/api/fastapi/docs/add.py +173 -0
- svc_infra/api/fastapi/docs/landing.py +4 -2
- svc_infra/api/fastapi/docs/scoped.py +62 -15
- svc_infra/api/fastapi/dual/__init__.py +12 -2
- svc_infra/api/fastapi/dual/dualize.py +1 -1
- svc_infra/api/fastapi/dual/protected.py +126 -4
- svc_infra/api/fastapi/dual/public.py +25 -0
- svc_infra/api/fastapi/dual/router.py +40 -13
- svc_infra/api/fastapi/dx.py +33 -2
- svc_infra/api/fastapi/ease.py +10 -2
- svc_infra/api/fastapi/http/concurrency.py +2 -1
- svc_infra/api/fastapi/http/conditional.py +3 -1
- svc_infra/api/fastapi/middleware/debug.py +4 -1
- svc_infra/api/fastapi/middleware/errors/catchall.py +6 -2
- svc_infra/api/fastapi/middleware/errors/exceptions.py +1 -1
- svc_infra/api/fastapi/middleware/errors/handlers.py +54 -8
- svc_infra/api/fastapi/middleware/graceful_shutdown.py +104 -0
- svc_infra/api/fastapi/middleware/idempotency.py +197 -70
- svc_infra/api/fastapi/middleware/idempotency_store.py +187 -0
- svc_infra/api/fastapi/middleware/optimistic_lock.py +42 -0
- svc_infra/api/fastapi/middleware/ratelimit.py +143 -31
- svc_infra/api/fastapi/middleware/ratelimit_store.py +111 -0
- svc_infra/api/fastapi/middleware/request_id.py +27 -11
- svc_infra/api/fastapi/middleware/request_size_limit.py +36 -0
- svc_infra/api/fastapi/middleware/timeout.py +177 -0
- svc_infra/api/fastapi/openapi/apply.py +5 -3
- svc_infra/api/fastapi/openapi/conventions.py +9 -2
- svc_infra/api/fastapi/openapi/mutators.py +165 -20
- svc_infra/api/fastapi/openapi/pipeline.py +1 -1
- svc_infra/api/fastapi/openapi/security.py +3 -1
- svc_infra/api/fastapi/ops/add.py +75 -0
- svc_infra/api/fastapi/pagination.py +47 -20
- svc_infra/api/fastapi/routers/__init__.py +43 -15
- svc_infra/api/fastapi/routers/ping.py +1 -0
- svc_infra/api/fastapi/setup.py +188 -56
- svc_infra/api/fastapi/tenancy/add.py +19 -0
- svc_infra/api/fastapi/tenancy/context.py +112 -0
- svc_infra/api/fastapi/versioned.py +101 -0
- svc_infra/app/README.md +5 -5
- svc_infra/app/__init__.py +3 -1
- svc_infra/app/env.py +69 -1
- svc_infra/app/logging/add.py +9 -2
- svc_infra/app/logging/formats.py +12 -5
- svc_infra/billing/__init__.py +23 -0
- svc_infra/billing/async_service.py +147 -0
- svc_infra/billing/jobs.py +241 -0
- svc_infra/billing/models.py +177 -0
- svc_infra/billing/quotas.py +103 -0
- svc_infra/billing/schemas.py +36 -0
- svc_infra/billing/service.py +123 -0
- svc_infra/bundled_docs/README.md +5 -0
- svc_infra/bundled_docs/__init__.py +1 -0
- svc_infra/bundled_docs/getting-started.md +6 -0
- svc_infra/cache/__init__.py +9 -0
- svc_infra/cache/add.py +170 -0
- svc_infra/cache/backend.py +7 -6
- svc_infra/cache/decorators.py +81 -15
- svc_infra/cache/demo.py +2 -2
- svc_infra/cache/keys.py +24 -4
- svc_infra/cache/recache.py +26 -14
- svc_infra/cache/resources.py +14 -5
- svc_infra/cache/tags.py +19 -44
- svc_infra/cache/utils.py +3 -1
- svc_infra/cli/__init__.py +52 -8
- svc_infra/cli/__main__.py +4 -0
- svc_infra/cli/cmds/__init__.py +39 -2
- svc_infra/cli/cmds/db/nosql/mongo/mongo_cmds.py +7 -4
- svc_infra/cli/cmds/db/nosql/mongo/mongo_scaffold_cmds.py +7 -5
- svc_infra/cli/cmds/db/ops_cmds.py +270 -0
- svc_infra/cli/cmds/db/sql/alembic_cmds.py +103 -18
- svc_infra/cli/cmds/db/sql/sql_export_cmds.py +88 -0
- svc_infra/cli/cmds/db/sql/sql_scaffold_cmds.py +3 -3
- svc_infra/cli/cmds/docs/docs_cmds.py +142 -0
- svc_infra/cli/cmds/dx/__init__.py +12 -0
- svc_infra/cli/cmds/dx/dx_cmds.py +116 -0
- svc_infra/cli/cmds/health/__init__.py +179 -0
- svc_infra/cli/cmds/health/health_cmds.py +8 -0
- svc_infra/cli/cmds/help.py +4 -0
- svc_infra/cli/cmds/jobs/__init__.py +1 -0
- svc_infra/cli/cmds/jobs/jobs_cmds.py +47 -0
- svc_infra/cli/cmds/obs/obs_cmds.py +36 -15
- svc_infra/cli/cmds/sdk/__init__.py +0 -0
- svc_infra/cli/cmds/sdk/sdk_cmds.py +112 -0
- svc_infra/cli/foundation/runner.py +6 -2
- svc_infra/data/add.py +61 -0
- svc_infra/data/backup.py +58 -0
- svc_infra/data/erasure.py +45 -0
- svc_infra/data/fixtures.py +42 -0
- svc_infra/data/retention.py +61 -0
- svc_infra/db/__init__.py +15 -0
- svc_infra/db/crud_schema.py +9 -9
- svc_infra/db/inbox.py +67 -0
- svc_infra/db/nosql/__init__.py +3 -0
- svc_infra/db/nosql/core.py +30 -9
- svc_infra/db/nosql/indexes.py +3 -1
- svc_infra/db/nosql/management.py +1 -1
- svc_infra/db/nosql/mongo/README.md +13 -13
- svc_infra/db/nosql/mongo/client.py +19 -2
- svc_infra/db/nosql/mongo/settings.py +6 -2
- svc_infra/db/nosql/repository.py +35 -15
- svc_infra/db/nosql/resource.py +20 -3
- svc_infra/db/nosql/scaffold.py +9 -3
- svc_infra/db/nosql/service.py +3 -1
- svc_infra/db/nosql/types.py +6 -2
- svc_infra/db/ops.py +384 -0
- svc_infra/db/outbox.py +108 -0
- svc_infra/db/sql/apikey.py +37 -9
- svc_infra/db/sql/authref.py +9 -3
- svc_infra/db/sql/constants.py +12 -8
- svc_infra/db/sql/core.py +2 -2
- svc_infra/db/sql/management.py +11 -8
- svc_infra/db/sql/repository.py +99 -26
- svc_infra/db/sql/resource.py +5 -0
- svc_infra/db/sql/scaffold.py +6 -2
- svc_infra/db/sql/service.py +15 -5
- svc_infra/db/sql/templates/models_schemas/auth/models.py.tmpl +7 -56
- svc_infra/db/sql/templates/models_schemas/auth/schemas.py.tmpl +1 -1
- svc_infra/db/sql/templates/setup/env_async.py.tmpl +34 -12
- svc_infra/db/sql/templates/setup/env_sync.py.tmpl +29 -7
- svc_infra/db/sql/tenant.py +88 -0
- svc_infra/db/sql/uniq_hooks.py +9 -3
- svc_infra/db/sql/utils.py +138 -51
- svc_infra/db/sql/versioning.py +14 -0
- svc_infra/deploy/__init__.py +538 -0
- svc_infra/documents/__init__.py +100 -0
- svc_infra/documents/add.py +264 -0
- svc_infra/documents/ease.py +233 -0
- svc_infra/documents/models.py +114 -0
- svc_infra/documents/storage.py +264 -0
- svc_infra/dx/add.py +65 -0
- svc_infra/dx/changelog.py +74 -0
- svc_infra/dx/checks.py +68 -0
- svc_infra/exceptions.py +141 -0
- svc_infra/health/__init__.py +864 -0
- svc_infra/http/__init__.py +13 -0
- svc_infra/http/client.py +105 -0
- svc_infra/jobs/builtins/outbox_processor.py +40 -0
- svc_infra/jobs/builtins/webhook_delivery.py +95 -0
- svc_infra/jobs/easy.py +33 -0
- svc_infra/jobs/loader.py +50 -0
- svc_infra/jobs/queue.py +116 -0
- svc_infra/jobs/redis_queue.py +256 -0
- svc_infra/jobs/runner.py +79 -0
- svc_infra/jobs/scheduler.py +53 -0
- svc_infra/jobs/worker.py +40 -0
- svc_infra/loaders/__init__.py +186 -0
- svc_infra/loaders/base.py +142 -0
- svc_infra/loaders/github.py +311 -0
- svc_infra/loaders/models.py +147 -0
- svc_infra/loaders/url.py +235 -0
- svc_infra/logging/__init__.py +374 -0
- svc_infra/mcp/svc_infra_mcp.py +91 -33
- svc_infra/obs/README.md +2 -0
- svc_infra/obs/add.py +65 -9
- svc_infra/obs/cloud_dash.py +2 -1
- svc_infra/obs/grafana/dashboards/http-overview.json +45 -0
- svc_infra/obs/metrics/__init__.py +52 -0
- svc_infra/obs/metrics/asgi.py +13 -7
- svc_infra/obs/metrics/http.py +9 -5
- svc_infra/obs/metrics/sqlalchemy.py +13 -9
- svc_infra/obs/metrics.py +53 -0
- svc_infra/obs/settings.py +6 -2
- svc_infra/security/add.py +217 -0
- svc_infra/security/audit.py +212 -0
- svc_infra/security/audit_service.py +74 -0
- svc_infra/security/headers.py +52 -0
- svc_infra/security/hibp.py +101 -0
- svc_infra/security/jwt_rotation.py +105 -0
- svc_infra/security/lockout.py +102 -0
- svc_infra/security/models.py +287 -0
- svc_infra/security/oauth_models.py +73 -0
- svc_infra/security/org_invites.py +130 -0
- svc_infra/security/passwords.py +79 -0
- svc_infra/security/permissions.py +171 -0
- svc_infra/security/session.py +98 -0
- svc_infra/security/signed_cookies.py +100 -0
- svc_infra/storage/__init__.py +93 -0
- svc_infra/storage/add.py +253 -0
- svc_infra/storage/backends/__init__.py +11 -0
- svc_infra/storage/backends/local.py +339 -0
- svc_infra/storage/backends/memory.py +216 -0
- svc_infra/storage/backends/s3.py +353 -0
- svc_infra/storage/base.py +239 -0
- svc_infra/storage/easy.py +185 -0
- svc_infra/storage/settings.py +195 -0
- svc_infra/testing/__init__.py +685 -0
- svc_infra/utils.py +7 -3
- svc_infra/webhooks/__init__.py +69 -0
- svc_infra/webhooks/add.py +339 -0
- svc_infra/webhooks/encryption.py +115 -0
- svc_infra/webhooks/fastapi.py +39 -0
- svc_infra/webhooks/router.py +55 -0
- svc_infra/webhooks/service.py +70 -0
- svc_infra/webhooks/signing.py +34 -0
- svc_infra/websocket/__init__.py +79 -0
- svc_infra/websocket/add.py +140 -0
- svc_infra/websocket/client.py +282 -0
- svc_infra/websocket/config.py +69 -0
- svc_infra/websocket/easy.py +76 -0
- svc_infra/websocket/exceptions.py +61 -0
- svc_infra/websocket/manager.py +344 -0
- svc_infra/websocket/models.py +49 -0
- svc_infra-0.1.706.dist-info/LICENSE +21 -0
- svc_infra-0.1.706.dist-info/METADATA +356 -0
- svc_infra-0.1.706.dist-info/RECORD +357 -0
- svc_infra-0.1.589.dist-info/METADATA +0 -79
- svc_infra-0.1.589.dist-info/RECORD +0 -234
- {svc_infra-0.1.589.dist-info → svc_infra-0.1.706.dist-info}/WHEEL +0 -0
- {svc_infra-0.1.589.dist-info → svc_infra-0.1.706.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
from dataclasses import asdict
|
|
6
|
+
from datetime import datetime, timezone
|
|
7
|
+
from typing import Any, Dict, Optional, cast
|
|
8
|
+
|
|
9
|
+
from redis import Redis
|
|
10
|
+
|
|
11
|
+
from .queue import Job, JobQueue
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
# Lua script for atomic reserve: pop from ready, push to processing, set visibility timeout
|
|
16
|
+
# Returns job_id if successful, nil if queue is empty
|
|
17
|
+
_RESERVE_LUA = """
|
|
18
|
+
local ready_key = KEYS[1]
|
|
19
|
+
local processing_key = KEYS[2]
|
|
20
|
+
local processing_vt_key = KEYS[3]
|
|
21
|
+
local visible_at = ARGV[1]
|
|
22
|
+
|
|
23
|
+
local job_id = redis.call('RPOPLPUSH', ready_key, processing_key)
|
|
24
|
+
if job_id then
|
|
25
|
+
redis.call('ZADD', processing_vt_key, visible_at, job_id)
|
|
26
|
+
end
|
|
27
|
+
return job_id
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class RedisJobQueue(JobQueue):
|
|
32
|
+
"""Redis-backed job queue with visibility timeout and delayed retries.
|
|
33
|
+
|
|
34
|
+
Keys (with optional prefix):
|
|
35
|
+
- {p}:ready (LIST) ready job ids
|
|
36
|
+
- {p}:processing (LIST) in-flight job ids
|
|
37
|
+
- {p}:processing_vt (ZSET) id -> visible_at (epoch seconds)
|
|
38
|
+
- {p}:delayed (ZSET) id -> available_at (epoch seconds)
|
|
39
|
+
- {p}:seq (STRING) INCR for job ids
|
|
40
|
+
- {p}:job:{id} (HASH) job fields (json payload)
|
|
41
|
+
- {p}:dlq (LIST) dead-letter job ids
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
def __init__(
|
|
45
|
+
self, client: Redis, *, prefix: str = "jobs", visibility_timeout: int = 60
|
|
46
|
+
):
|
|
47
|
+
self._r = client
|
|
48
|
+
self._p = prefix
|
|
49
|
+
self._vt = visibility_timeout
|
|
50
|
+
# Try to register Lua script for atomic reserve
|
|
51
|
+
# Falls back to non-atomic if Lua scripting isn't available (e.g., fakeredis in tests)
|
|
52
|
+
self._reserve_script = None
|
|
53
|
+
try:
|
|
54
|
+
self._reserve_script = client.register_script(_RESERVE_LUA)
|
|
55
|
+
except Exception as e:
|
|
56
|
+
logger.debug("Lua scripting not available, using non-atomic reserve: %s", e)
|
|
57
|
+
|
|
58
|
+
# Key helpers
|
|
59
|
+
def _k(self, name: str) -> str:
|
|
60
|
+
return f"{self._p}:{name}"
|
|
61
|
+
|
|
62
|
+
def _job_key(self, job_id: str) -> str:
|
|
63
|
+
return f"{self._p}:job:{job_id}"
|
|
64
|
+
|
|
65
|
+
# Core ops
|
|
66
|
+
def enqueue(self, name: str, payload: Dict, *, delay_seconds: int = 0) -> Job:
|
|
67
|
+
now = datetime.now(timezone.utc)
|
|
68
|
+
job_id = str(self._r.incr(self._k("seq")))
|
|
69
|
+
job = Job(id=job_id, name=name, payload=dict(payload))
|
|
70
|
+
# Persist job
|
|
71
|
+
data = asdict(job)
|
|
72
|
+
data["payload"] = json.dumps(data["payload"]) # store payload as JSON string
|
|
73
|
+
# available_at stored as ISO format
|
|
74
|
+
data["available_at"] = job.available_at.isoformat()
|
|
75
|
+
self._r.hset(
|
|
76
|
+
self._job_key(job_id),
|
|
77
|
+
mapping={k: str(v) for k, v in data.items() if v is not None},
|
|
78
|
+
)
|
|
79
|
+
if delay_seconds and delay_seconds > 0:
|
|
80
|
+
at = int(now.timestamp()) + int(delay_seconds)
|
|
81
|
+
self._r.zadd(self._k("delayed"), {job_id: at})
|
|
82
|
+
else:
|
|
83
|
+
# push to ready
|
|
84
|
+
self._r.lpush(self._k("ready"), job_id)
|
|
85
|
+
return job
|
|
86
|
+
|
|
87
|
+
def _move_due_delayed_to_ready(self) -> None:
|
|
88
|
+
now_ts = int(datetime.now(timezone.utc).timestamp())
|
|
89
|
+
ids = cast(list[Any], self._r.zrangebyscore(self._k("delayed"), "-inf", now_ts))
|
|
90
|
+
if not ids:
|
|
91
|
+
return
|
|
92
|
+
pipe = self._r.pipeline()
|
|
93
|
+
for jid in ids:
|
|
94
|
+
jid_s = jid.decode() if isinstance(jid, (bytes, bytearray)) else str(jid)
|
|
95
|
+
pipe.lpush(self._k("ready"), jid_s)
|
|
96
|
+
pipe.zrem(self._k("delayed"), jid_s)
|
|
97
|
+
pipe.execute()
|
|
98
|
+
|
|
99
|
+
def _requeue_timed_out_processing(self) -> None:
|
|
100
|
+
now_ts = int(datetime.now(timezone.utc).timestamp())
|
|
101
|
+
ids = cast(
|
|
102
|
+
list[Any], self._r.zrangebyscore(self._k("processing_vt"), "-inf", now_ts)
|
|
103
|
+
)
|
|
104
|
+
if not ids:
|
|
105
|
+
return
|
|
106
|
+
pipe = self._r.pipeline()
|
|
107
|
+
for jid in ids:
|
|
108
|
+
jid_s = jid.decode() if isinstance(jid, (bytes, bytearray)) else str(jid)
|
|
109
|
+
pipe.lrem(self._k("processing"), 1, jid_s)
|
|
110
|
+
pipe.lpush(self._k("ready"), jid_s)
|
|
111
|
+
pipe.zrem(self._k("processing_vt"), jid_s)
|
|
112
|
+
# clear stale visibility timestamp so next reservation can set a fresh one
|
|
113
|
+
pipe.hdel(self._job_key(jid_s), "visible_at")
|
|
114
|
+
pipe.execute()
|
|
115
|
+
|
|
116
|
+
def reserve_next(self) -> Optional[Job]:
|
|
117
|
+
# opportunistically move due delayed jobs
|
|
118
|
+
self._move_due_delayed_to_ready()
|
|
119
|
+
# move timed-out processing jobs back to ready before reserving
|
|
120
|
+
self._requeue_timed_out_processing()
|
|
121
|
+
|
|
122
|
+
# Calculate visibility timeout BEFORE reserve to prevent race condition
|
|
123
|
+
visible_at = int(datetime.now(timezone.utc).timestamp()) + int(self._vt)
|
|
124
|
+
|
|
125
|
+
# Try atomic reserve using Lua script if available
|
|
126
|
+
# This prevents race condition where two workers could reserve the same job
|
|
127
|
+
if self._reserve_script is not None:
|
|
128
|
+
try:
|
|
129
|
+
jid = self._reserve_script(
|
|
130
|
+
keys=[
|
|
131
|
+
self._k("ready"),
|
|
132
|
+
self._k("processing"),
|
|
133
|
+
self._k("processing_vt"),
|
|
134
|
+
],
|
|
135
|
+
args=[visible_at],
|
|
136
|
+
)
|
|
137
|
+
except Exception as e:
|
|
138
|
+
# Fall back to non-atomic if Lua fails at runtime
|
|
139
|
+
logger.warning("Lua script failed, using non-atomic reserve: %s", e)
|
|
140
|
+
jid = self._r.rpoplpush(self._k("ready"), self._k("processing"))
|
|
141
|
+
if jid:
|
|
142
|
+
job_id_tmp = (
|
|
143
|
+
jid.decode()
|
|
144
|
+
if isinstance(jid, (bytes, bytearray))
|
|
145
|
+
else str(jid)
|
|
146
|
+
)
|
|
147
|
+
self._r.zadd(self._k("processing_vt"), {job_id_tmp: visible_at})
|
|
148
|
+
else:
|
|
149
|
+
# Non-atomic fallback (for fakeredis in tests, or older Redis versions)
|
|
150
|
+
jid = self._r.rpoplpush(self._k("ready"), self._k("processing"))
|
|
151
|
+
if jid:
|
|
152
|
+
job_id_tmp = (
|
|
153
|
+
jid.decode() if isinstance(jid, (bytes, bytearray)) else str(jid)
|
|
154
|
+
)
|
|
155
|
+
self._r.zadd(self._k("processing_vt"), {job_id_tmp: visible_at})
|
|
156
|
+
|
|
157
|
+
if not jid:
|
|
158
|
+
return None
|
|
159
|
+
job_id = jid.decode() if isinstance(jid, (bytes, bytearray)) else str(jid)
|
|
160
|
+
key = self._job_key(job_id)
|
|
161
|
+
data = cast(dict[Any, Any], self._r.hgetall(key))
|
|
162
|
+
if not data:
|
|
163
|
+
# corrupted entry; ack and skip
|
|
164
|
+
self._r.lrem(self._k("processing"), 1, job_id)
|
|
165
|
+
self._r.zrem(self._k("processing_vt"), job_id)
|
|
166
|
+
return None
|
|
167
|
+
|
|
168
|
+
# Decode fields
|
|
169
|
+
def _get(field: str, default: Optional[str] = None) -> Optional[str]:
|
|
170
|
+
val = (
|
|
171
|
+
data.get(field.encode())
|
|
172
|
+
if isinstance(next(iter(data.keys())), bytes)
|
|
173
|
+
else data.get(field)
|
|
174
|
+
)
|
|
175
|
+
if val is None:
|
|
176
|
+
return default
|
|
177
|
+
return val.decode() if isinstance(val, (bytes, bytearray)) else str(val)
|
|
178
|
+
|
|
179
|
+
attempts = int(_get("attempts", "0") or "0") + 1
|
|
180
|
+
max_attempts = int(_get("max_attempts", "5") or "5")
|
|
181
|
+
backoff_seconds = int(_get("backoff_seconds", "60") or "60")
|
|
182
|
+
name = _get("name", "") or ""
|
|
183
|
+
payload_json = _get("payload", "{}") or "{}"
|
|
184
|
+
try:
|
|
185
|
+
payload = json.loads(payload_json)
|
|
186
|
+
except Exception: # pragma: no cover
|
|
187
|
+
payload = {}
|
|
188
|
+
available_at_str = _get("available_at")
|
|
189
|
+
available_at = (
|
|
190
|
+
datetime.fromisoformat(available_at_str)
|
|
191
|
+
if available_at_str
|
|
192
|
+
else datetime.now(timezone.utc)
|
|
193
|
+
)
|
|
194
|
+
# If exceeded max_attempts → DLQ and skip
|
|
195
|
+
if attempts > max_attempts:
|
|
196
|
+
self._r.lrem(self._k("processing"), 1, job_id)
|
|
197
|
+
self._r.zrem(self._k("processing_vt"), job_id)
|
|
198
|
+
self._r.lpush(self._k("dlq"), job_id)
|
|
199
|
+
return None
|
|
200
|
+
# Update attempts count in job hash (visibility timeout already set atomically in Lua script)
|
|
201
|
+
self._r.hset(key, mapping={"attempts": attempts, "visible_at": visible_at})
|
|
202
|
+
return Job(
|
|
203
|
+
id=job_id,
|
|
204
|
+
name=name,
|
|
205
|
+
payload=payload,
|
|
206
|
+
available_at=available_at,
|
|
207
|
+
attempts=attempts,
|
|
208
|
+
max_attempts=max_attempts,
|
|
209
|
+
backoff_seconds=backoff_seconds,
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
def ack(self, job_id: str) -> None:
|
|
213
|
+
self._r.lrem(self._k("processing"), 1, job_id)
|
|
214
|
+
self._r.zrem(self._k("processing_vt"), job_id)
|
|
215
|
+
self._r.delete(self._job_key(job_id))
|
|
216
|
+
|
|
217
|
+
def fail(self, job_id: str, *, error: str | None = None) -> None:
|
|
218
|
+
key = self._job_key(job_id)
|
|
219
|
+
data = cast(dict[Any, Any], self._r.hgetall(key))
|
|
220
|
+
if not data:
|
|
221
|
+
# nothing to do
|
|
222
|
+
self._r.lrem(self._k("processing"), 1, job_id)
|
|
223
|
+
return
|
|
224
|
+
|
|
225
|
+
def _get(field: str, default: Optional[str] = None) -> Optional[str]:
|
|
226
|
+
val = (
|
|
227
|
+
data.get(field.encode())
|
|
228
|
+
if isinstance(next(iter(data.keys())), bytes)
|
|
229
|
+
else data.get(field)
|
|
230
|
+
)
|
|
231
|
+
if val is None:
|
|
232
|
+
return default
|
|
233
|
+
return val.decode() if isinstance(val, (bytes, bytearray)) else str(val)
|
|
234
|
+
|
|
235
|
+
attempts = int(_get("attempts", "0") or "0")
|
|
236
|
+
max_attempts = int(_get("max_attempts", "5") or "5")
|
|
237
|
+
backoff_seconds = int(_get("backoff_seconds", "60") or "60")
|
|
238
|
+
now_ts = int(datetime.now(timezone.utc).timestamp())
|
|
239
|
+
# DLQ if at or beyond max_attempts
|
|
240
|
+
if attempts >= max_attempts:
|
|
241
|
+
self._r.lrem(self._k("processing"), 1, job_id)
|
|
242
|
+
self._r.zrem(self._k("processing_vt"), job_id)
|
|
243
|
+
self._r.lpush(self._k("dlq"), job_id)
|
|
244
|
+
return
|
|
245
|
+
delay = backoff_seconds * max(1, attempts)
|
|
246
|
+
available_at_ts = now_ts + delay
|
|
247
|
+
mapping: dict[str, str] = {
|
|
248
|
+
"last_error": error or "",
|
|
249
|
+
"available_at": datetime.fromtimestamp(
|
|
250
|
+
available_at_ts, tz=timezone.utc
|
|
251
|
+
).isoformat(),
|
|
252
|
+
}
|
|
253
|
+
self._r.hset(key, mapping=mapping)
|
|
254
|
+
self._r.lrem(self._k("processing"), 1, job_id)
|
|
255
|
+
self._r.zrem(self._k("processing_vt"), job_id)
|
|
256
|
+
self._r.zadd(self._k("delayed"), {job_id: available_at_ts})
|
svc_infra/jobs/runner.py
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import contextlib
|
|
5
|
+
from typing import Awaitable, Callable, Optional
|
|
6
|
+
|
|
7
|
+
from .queue import JobQueue
|
|
8
|
+
|
|
9
|
+
ProcessFunc = Callable[[object], Awaitable[None]]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class WorkerRunner:
|
|
13
|
+
"""Cooperative worker loop with graceful stop.
|
|
14
|
+
|
|
15
|
+
- start(): begin polling the queue and processing jobs
|
|
16
|
+
- stop(grace_seconds): signal stop, wait up to grace for current job to finish
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def __init__(
|
|
20
|
+
self, queue: JobQueue, handler: ProcessFunc, *, poll_interval: float = 0.25
|
|
21
|
+
):
|
|
22
|
+
self._queue = queue
|
|
23
|
+
self._handler = handler
|
|
24
|
+
self._poll_interval = poll_interval
|
|
25
|
+
self._task: Optional[asyncio.Task] = None
|
|
26
|
+
self._stopping = asyncio.Event()
|
|
27
|
+
self._inflight: Optional[asyncio.Task] = None
|
|
28
|
+
|
|
29
|
+
async def _loop(self) -> None:
|
|
30
|
+
try:
|
|
31
|
+
while not self._stopping.is_set():
|
|
32
|
+
job = self._queue.reserve_next()
|
|
33
|
+
if not job:
|
|
34
|
+
await asyncio.sleep(self._poll_interval)
|
|
35
|
+
continue
|
|
36
|
+
|
|
37
|
+
# Process one job; track in-flight task for stop()
|
|
38
|
+
async def _run():
|
|
39
|
+
try:
|
|
40
|
+
await self._handler(job)
|
|
41
|
+
except Exception as exc: # pragma: no cover
|
|
42
|
+
self._queue.fail(job.id, error=str(exc))
|
|
43
|
+
return
|
|
44
|
+
self._queue.ack(job.id)
|
|
45
|
+
|
|
46
|
+
self._inflight = asyncio.create_task(_run())
|
|
47
|
+
try:
|
|
48
|
+
await self._inflight
|
|
49
|
+
finally:
|
|
50
|
+
self._inflight = None
|
|
51
|
+
finally:
|
|
52
|
+
# exiting loop
|
|
53
|
+
pass
|
|
54
|
+
|
|
55
|
+
def start(self) -> asyncio.Task:
|
|
56
|
+
if self._task is None or self._task.done():
|
|
57
|
+
self._task = asyncio.create_task(self._loop())
|
|
58
|
+
return self._task
|
|
59
|
+
|
|
60
|
+
async def stop(self, *, grace_seconds: float = 10.0) -> None:
|
|
61
|
+
self._stopping.set()
|
|
62
|
+
# Wait for in-flight job to complete, up to grace
|
|
63
|
+
if self._inflight is not None and not self._inflight.done():
|
|
64
|
+
try:
|
|
65
|
+
await asyncio.wait_for(self._inflight, timeout=grace_seconds)
|
|
66
|
+
except asyncio.TimeoutError:
|
|
67
|
+
# Give up; job will be retried if your queue supports visibility timeouts
|
|
68
|
+
pass
|
|
69
|
+
# Finally, wait for loop to exit (should be quick since stopping is set)
|
|
70
|
+
if self._task is not None:
|
|
71
|
+
try:
|
|
72
|
+
await asyncio.wait_for(
|
|
73
|
+
self._task, timeout=max(0.1, self._poll_interval + 0.1)
|
|
74
|
+
)
|
|
75
|
+
except asyncio.TimeoutError:
|
|
76
|
+
# Cancel as a last resort
|
|
77
|
+
self._task.cancel()
|
|
78
|
+
with contextlib.suppress(Exception):
|
|
79
|
+
await self._task
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from datetime import datetime, timedelta, timezone
|
|
6
|
+
from typing import Awaitable, Callable, Dict
|
|
7
|
+
|
|
8
|
+
CronFunc = Callable[[], Awaitable[None]]
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class ScheduledTask:
|
|
13
|
+
name: str
|
|
14
|
+
interval_seconds: int
|
|
15
|
+
func: CronFunc
|
|
16
|
+
next_run_at: datetime
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class InMemoryScheduler:
|
|
20
|
+
"""Interval-based scheduler for simple periodic tasks (tests/local).
|
|
21
|
+
|
|
22
|
+
Not a full cron parser. Tracks next_run_at per task.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(self, tick_interval: float = 60.0):
|
|
26
|
+
self._tasks: Dict[str, ScheduledTask] = {}
|
|
27
|
+
self._tick_interval = tick_interval
|
|
28
|
+
|
|
29
|
+
def add_task(self, name: str, interval_seconds: int, func: CronFunc) -> None:
|
|
30
|
+
now = datetime.now(timezone.utc)
|
|
31
|
+
self._tasks[name] = ScheduledTask(
|
|
32
|
+
name=name,
|
|
33
|
+
interval_seconds=interval_seconds,
|
|
34
|
+
func=func,
|
|
35
|
+
next_run_at=now + timedelta(seconds=interval_seconds),
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
async def tick(self) -> None:
|
|
39
|
+
now = datetime.now(timezone.utc)
|
|
40
|
+
for task in self._tasks.values():
|
|
41
|
+
if task.next_run_at <= now:
|
|
42
|
+
await task.func()
|
|
43
|
+
task.next_run_at = now + timedelta(seconds=task.interval_seconds)
|
|
44
|
+
|
|
45
|
+
async def run(self) -> None:
|
|
46
|
+
"""Run the scheduler loop indefinitely.
|
|
47
|
+
|
|
48
|
+
Calls tick() at regular intervals to check and execute due tasks.
|
|
49
|
+
This method runs forever until cancelled.
|
|
50
|
+
"""
|
|
51
|
+
while True:
|
|
52
|
+
await self.tick()
|
|
53
|
+
await asyncio.sleep(self._tick_interval)
|
svc_infra/jobs/worker.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import os
|
|
5
|
+
from typing import Awaitable, Callable
|
|
6
|
+
|
|
7
|
+
from .queue import Job, JobQueue
|
|
8
|
+
|
|
9
|
+
ProcessFunc = Callable[[Job], Awaitable[None]]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _get_job_timeout_seconds() -> float | None:
|
|
13
|
+
raw = os.getenv("JOB_DEFAULT_TIMEOUT_SECONDS")
|
|
14
|
+
if not raw:
|
|
15
|
+
return None
|
|
16
|
+
try:
|
|
17
|
+
return float(raw)
|
|
18
|
+
except ValueError:
|
|
19
|
+
return None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
async def process_one(queue: JobQueue, handler: ProcessFunc) -> bool:
|
|
23
|
+
"""Reserve a job, process with handler, ack on success or fail with backoff.
|
|
24
|
+
|
|
25
|
+
Returns True if a job was processed (success or fail), False if no job was available.
|
|
26
|
+
"""
|
|
27
|
+
job = queue.reserve_next()
|
|
28
|
+
if not job:
|
|
29
|
+
return False
|
|
30
|
+
try:
|
|
31
|
+
timeout = _get_job_timeout_seconds()
|
|
32
|
+
if timeout and timeout > 0:
|
|
33
|
+
await asyncio.wait_for(handler(job), timeout=timeout)
|
|
34
|
+
else:
|
|
35
|
+
await handler(job)
|
|
36
|
+
except Exception as exc: # pragma: no cover - exercise in tests by raising
|
|
37
|
+
queue.fail(job.id, error=str(exc))
|
|
38
|
+
return True
|
|
39
|
+
queue.ack(job.id)
|
|
40
|
+
return True
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Content loaders for fetching from remote and local sources.
|
|
3
|
+
|
|
4
|
+
This module provides async-first loaders for GitHub, URLs, and other sources.
|
|
5
|
+
All loaders return a consistent `LoadedContent` format that is compatible
|
|
6
|
+
with ai-infra's Retriever.add_text() method.
|
|
7
|
+
|
|
8
|
+
Quick Start:
|
|
9
|
+
>>> from svc_infra.loaders import GitHubLoader, URLLoader
|
|
10
|
+
>>>
|
|
11
|
+
>>> # Load from GitHub
|
|
12
|
+
>>> loader = GitHubLoader("nfraxlab/svc-infra", path="docs")
|
|
13
|
+
>>> contents = await loader.load()
|
|
14
|
+
>>>
|
|
15
|
+
>>> # Load from URL
|
|
16
|
+
>>> loader = URLLoader("https://example.com/guide.md")
|
|
17
|
+
>>> contents = await loader.load()
|
|
18
|
+
>>>
|
|
19
|
+
>>> # Sync usage (for scripts/notebooks)
|
|
20
|
+
>>> contents = loader.load_sync()
|
|
21
|
+
|
|
22
|
+
With ai-infra Retriever:
|
|
23
|
+
>>> from ai_infra import Retriever
|
|
24
|
+
>>> from svc_infra.loaders import GitHubLoader
|
|
25
|
+
>>>
|
|
26
|
+
>>> retriever = Retriever()
|
|
27
|
+
>>> loader = GitHubLoader("nfraxlab/svc-infra", path="docs")
|
|
28
|
+
>>>
|
|
29
|
+
>>> for content in await loader.load():
|
|
30
|
+
... retriever.add_text(content.content, metadata=content.metadata)
|
|
31
|
+
|
|
32
|
+
Convenience Functions:
|
|
33
|
+
>>> from svc_infra.loaders import load_github, load_url
|
|
34
|
+
>>>
|
|
35
|
+
>>> # One-liner loading
|
|
36
|
+
>>> contents = await load_github("nfraxlab/svc-infra", path="docs")
|
|
37
|
+
>>> contents = await load_url("https://example.com/guide.md")
|
|
38
|
+
|
|
39
|
+
Available Loaders:
|
|
40
|
+
- GitHubLoader: Load files from GitHub repositories
|
|
41
|
+
- URLLoader: Load content from URLs (with HTML text extraction)
|
|
42
|
+
|
|
43
|
+
Future Loaders (planned):
|
|
44
|
+
- S3Loader: Load files from S3-compatible storage
|
|
45
|
+
- NotionLoader: Load pages from Notion
|
|
46
|
+
- ConfluenceLoader: Load pages from Confluence
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
from .base import BaseLoader
|
|
50
|
+
from .github import GitHubLoader
|
|
51
|
+
from .models import LoadedContent, LoadedDocument, to_loaded_documents
|
|
52
|
+
from .url import URLLoader
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
async def load_github(
|
|
56
|
+
repo: str,
|
|
57
|
+
path: str = "",
|
|
58
|
+
branch: str = "main",
|
|
59
|
+
pattern: str = "*.md",
|
|
60
|
+
**kwargs,
|
|
61
|
+
) -> list[LoadedContent]:
|
|
62
|
+
"""Convenience function to load content from GitHub.
|
|
63
|
+
|
|
64
|
+
This is a shortcut for creating a GitHubLoader and calling load().
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
repo: Repository in "owner/repo" format
|
|
68
|
+
path: Path within repo (empty for root)
|
|
69
|
+
branch: Branch name (default: "main")
|
|
70
|
+
pattern: Glob pattern for files (default: "*.md")
|
|
71
|
+
**kwargs: Additional arguments passed to GitHubLoader
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
List of LoadedContent objects.
|
|
75
|
+
|
|
76
|
+
Example:
|
|
77
|
+
>>> contents = await load_github("nfraxlab/svc-infra", path="docs")
|
|
78
|
+
>>> for c in contents:
|
|
79
|
+
... print(f"{c.source}: {len(c.content)} chars")
|
|
80
|
+
"""
|
|
81
|
+
loader = GitHubLoader(repo, path=path, branch=branch, pattern=pattern, **kwargs)
|
|
82
|
+
return await loader.load()
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
async def load_url(
|
|
86
|
+
urls: str | list[str],
|
|
87
|
+
**kwargs,
|
|
88
|
+
) -> list[LoadedContent]:
|
|
89
|
+
"""Convenience function to load content from URL(s).
|
|
90
|
+
|
|
91
|
+
This is a shortcut for creating a URLLoader and calling load().
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
urls: Single URL or list of URLs to load
|
|
95
|
+
**kwargs: Additional arguments passed to URLLoader
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
List of LoadedContent objects.
|
|
99
|
+
|
|
100
|
+
Example:
|
|
101
|
+
>>> # Single URL
|
|
102
|
+
>>> contents = await load_url("https://example.com/guide.md")
|
|
103
|
+
>>>
|
|
104
|
+
>>> # Multiple URLs
|
|
105
|
+
>>> contents = await load_url([
|
|
106
|
+
... "https://example.com/page1",
|
|
107
|
+
... "https://example.com/page2",
|
|
108
|
+
... ])
|
|
109
|
+
"""
|
|
110
|
+
loader = URLLoader(urls, **kwargs)
|
|
111
|
+
return await loader.load()
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def load_github_sync(
|
|
115
|
+
repo: str,
|
|
116
|
+
path: str = "",
|
|
117
|
+
branch: str = "main",
|
|
118
|
+
pattern: str = "*.md",
|
|
119
|
+
**kwargs,
|
|
120
|
+
) -> list[LoadedContent]:
|
|
121
|
+
"""Synchronous convenience function to load content from GitHub.
|
|
122
|
+
|
|
123
|
+
This is a shortcut for creating a GitHubLoader and calling load_sync().
|
|
124
|
+
Use this in scripts, notebooks, or non-async contexts.
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
repo: Repository in "owner/repo" format
|
|
128
|
+
path: Path within repo (empty for root)
|
|
129
|
+
branch: Branch name (default: "main")
|
|
130
|
+
pattern: Glob pattern for files (default: "*.md")
|
|
131
|
+
**kwargs: Additional arguments passed to GitHubLoader
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
List of LoadedContent objects.
|
|
135
|
+
|
|
136
|
+
Example:
|
|
137
|
+
>>> # In a script or notebook (no await needed)
|
|
138
|
+
>>> contents = load_github_sync("nfraxlab/svc-infra", path="docs")
|
|
139
|
+
>>> for c in contents:
|
|
140
|
+
... print(f"{c.source}: {len(c.content)} chars")
|
|
141
|
+
"""
|
|
142
|
+
loader = GitHubLoader(repo, path=path, branch=branch, pattern=pattern, **kwargs)
|
|
143
|
+
return loader.load_sync()
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def load_url_sync(
|
|
147
|
+
urls: str | list[str],
|
|
148
|
+
**kwargs,
|
|
149
|
+
) -> list[LoadedContent]:
|
|
150
|
+
"""Synchronous convenience function to load content from URL(s).
|
|
151
|
+
|
|
152
|
+
This is a shortcut for creating a URLLoader and calling load_sync().
|
|
153
|
+
Use this in scripts, notebooks, or non-async contexts.
|
|
154
|
+
|
|
155
|
+
Args:
|
|
156
|
+
urls: Single URL or list of URLs to load
|
|
157
|
+
**kwargs: Additional arguments passed to URLLoader
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
List of LoadedContent objects.
|
|
161
|
+
|
|
162
|
+
Example:
|
|
163
|
+
>>> # In a script or notebook (no await needed)
|
|
164
|
+
>>> contents = load_url_sync("https://example.com/guide.md")
|
|
165
|
+
"""
|
|
166
|
+
loader = URLLoader(urls, **kwargs)
|
|
167
|
+
return loader.load_sync()
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
__all__ = [
|
|
171
|
+
# Base classes
|
|
172
|
+
"BaseLoader",
|
|
173
|
+
"LoadedContent",
|
|
174
|
+
# Compatibility
|
|
175
|
+
"LoadedDocument",
|
|
176
|
+
"to_loaded_documents",
|
|
177
|
+
# Loaders
|
|
178
|
+
"GitHubLoader",
|
|
179
|
+
"URLLoader",
|
|
180
|
+
# Async convenience functions
|
|
181
|
+
"load_github",
|
|
182
|
+
"load_url",
|
|
183
|
+
# Sync convenience functions
|
|
184
|
+
"load_github_sync",
|
|
185
|
+
"load_url_sync",
|
|
186
|
+
]
|