truthound-dashboard 1.3.1__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthound_dashboard/api/alerts.py +258 -0
- truthound_dashboard/api/anomaly.py +1302 -0
- truthound_dashboard/api/cross_alerts.py +352 -0
- truthound_dashboard/api/deps.py +143 -0
- truthound_dashboard/api/drift_monitor.py +540 -0
- truthound_dashboard/api/lineage.py +1151 -0
- truthound_dashboard/api/maintenance.py +363 -0
- truthound_dashboard/api/middleware.py +373 -1
- truthound_dashboard/api/model_monitoring.py +805 -0
- truthound_dashboard/api/notifications_advanced.py +2452 -0
- truthound_dashboard/api/plugins.py +2096 -0
- truthound_dashboard/api/profile.py +211 -14
- truthound_dashboard/api/reports.py +853 -0
- truthound_dashboard/api/router.py +147 -0
- truthound_dashboard/api/rule_suggestions.py +310 -0
- truthound_dashboard/api/schema_evolution.py +231 -0
- truthound_dashboard/api/sources.py +47 -3
- truthound_dashboard/api/triggers.py +190 -0
- truthound_dashboard/api/validations.py +13 -0
- truthound_dashboard/api/validators.py +333 -4
- truthound_dashboard/api/versioning.py +309 -0
- truthound_dashboard/api/websocket.py +301 -0
- truthound_dashboard/core/__init__.py +27 -0
- truthound_dashboard/core/anomaly.py +1395 -0
- truthound_dashboard/core/anomaly_explainer.py +633 -0
- truthound_dashboard/core/cache.py +206 -0
- truthound_dashboard/core/cached_services.py +422 -0
- truthound_dashboard/core/charts.py +352 -0
- truthound_dashboard/core/connections.py +1069 -42
- truthound_dashboard/core/cross_alerts.py +837 -0
- truthound_dashboard/core/drift_monitor.py +1477 -0
- truthound_dashboard/core/drift_sampling.py +669 -0
- truthound_dashboard/core/i18n/__init__.py +42 -0
- truthound_dashboard/core/i18n/detector.py +173 -0
- truthound_dashboard/core/i18n/messages.py +564 -0
- truthound_dashboard/core/lineage.py +971 -0
- truthound_dashboard/core/maintenance.py +443 -5
- truthound_dashboard/core/model_monitoring.py +1043 -0
- truthound_dashboard/core/notifications/channels.py +1020 -1
- truthound_dashboard/core/notifications/deduplication/__init__.py +143 -0
- truthound_dashboard/core/notifications/deduplication/policies.py +274 -0
- truthound_dashboard/core/notifications/deduplication/service.py +400 -0
- truthound_dashboard/core/notifications/deduplication/stores.py +2365 -0
- truthound_dashboard/core/notifications/deduplication/strategies.py +422 -0
- truthound_dashboard/core/notifications/dispatcher.py +43 -0
- truthound_dashboard/core/notifications/escalation/__init__.py +149 -0
- truthound_dashboard/core/notifications/escalation/backends.py +1384 -0
- truthound_dashboard/core/notifications/escalation/engine.py +429 -0
- truthound_dashboard/core/notifications/escalation/models.py +336 -0
- truthound_dashboard/core/notifications/escalation/scheduler.py +1187 -0
- truthound_dashboard/core/notifications/escalation/state_machine.py +330 -0
- truthound_dashboard/core/notifications/escalation/stores.py +2896 -0
- truthound_dashboard/core/notifications/events.py +49 -0
- truthound_dashboard/core/notifications/metrics/__init__.py +115 -0
- truthound_dashboard/core/notifications/metrics/base.py +528 -0
- truthound_dashboard/core/notifications/metrics/collectors.py +583 -0
- truthound_dashboard/core/notifications/routing/__init__.py +169 -0
- truthound_dashboard/core/notifications/routing/combinators.py +184 -0
- truthound_dashboard/core/notifications/routing/config.py +375 -0
- truthound_dashboard/core/notifications/routing/config_parser.py +867 -0
- truthound_dashboard/core/notifications/routing/engine.py +382 -0
- truthound_dashboard/core/notifications/routing/expression_engine.py +1269 -0
- truthound_dashboard/core/notifications/routing/jinja2_engine.py +774 -0
- truthound_dashboard/core/notifications/routing/rules.py +625 -0
- truthound_dashboard/core/notifications/routing/validator.py +678 -0
- truthound_dashboard/core/notifications/service.py +2 -0
- truthound_dashboard/core/notifications/stats_aggregator.py +850 -0
- truthound_dashboard/core/notifications/throttling/__init__.py +83 -0
- truthound_dashboard/core/notifications/throttling/builder.py +311 -0
- truthound_dashboard/core/notifications/throttling/stores.py +1859 -0
- truthound_dashboard/core/notifications/throttling/throttlers.py +633 -0
- truthound_dashboard/core/openlineage.py +1028 -0
- truthound_dashboard/core/plugins/__init__.py +39 -0
- truthound_dashboard/core/plugins/docs/__init__.py +39 -0
- truthound_dashboard/core/plugins/docs/extractor.py +703 -0
- truthound_dashboard/core/plugins/docs/renderers.py +804 -0
- truthound_dashboard/core/plugins/hooks/__init__.py +63 -0
- truthound_dashboard/core/plugins/hooks/decorators.py +367 -0
- truthound_dashboard/core/plugins/hooks/manager.py +403 -0
- truthound_dashboard/core/plugins/hooks/protocols.py +265 -0
- truthound_dashboard/core/plugins/lifecycle/__init__.py +41 -0
- truthound_dashboard/core/plugins/lifecycle/hot_reload.py +584 -0
- truthound_dashboard/core/plugins/lifecycle/machine.py +419 -0
- truthound_dashboard/core/plugins/lifecycle/states.py +266 -0
- truthound_dashboard/core/plugins/loader.py +504 -0
- truthound_dashboard/core/plugins/registry.py +810 -0
- truthound_dashboard/core/plugins/reporter_executor.py +588 -0
- truthound_dashboard/core/plugins/sandbox/__init__.py +59 -0
- truthound_dashboard/core/plugins/sandbox/code_validator.py +243 -0
- truthound_dashboard/core/plugins/sandbox/engines.py +770 -0
- truthound_dashboard/core/plugins/sandbox/protocols.py +194 -0
- truthound_dashboard/core/plugins/sandbox.py +617 -0
- truthound_dashboard/core/plugins/security/__init__.py +68 -0
- truthound_dashboard/core/plugins/security/analyzer.py +535 -0
- truthound_dashboard/core/plugins/security/policies.py +311 -0
- truthound_dashboard/core/plugins/security/protocols.py +296 -0
- truthound_dashboard/core/plugins/security/signing.py +842 -0
- truthound_dashboard/core/plugins/security.py +446 -0
- truthound_dashboard/core/plugins/validator_executor.py +401 -0
- truthound_dashboard/core/plugins/versioning/__init__.py +51 -0
- truthound_dashboard/core/plugins/versioning/constraints.py +377 -0
- truthound_dashboard/core/plugins/versioning/dependencies.py +541 -0
- truthound_dashboard/core/plugins/versioning/semver.py +266 -0
- truthound_dashboard/core/profile_comparison.py +601 -0
- truthound_dashboard/core/report_history.py +570 -0
- truthound_dashboard/core/reporters/__init__.py +57 -0
- truthound_dashboard/core/reporters/base.py +296 -0
- truthound_dashboard/core/reporters/csv_reporter.py +155 -0
- truthound_dashboard/core/reporters/html_reporter.py +598 -0
- truthound_dashboard/core/reporters/i18n/__init__.py +65 -0
- truthound_dashboard/core/reporters/i18n/base.py +494 -0
- truthound_dashboard/core/reporters/i18n/catalogs.py +930 -0
- truthound_dashboard/core/reporters/json_reporter.py +160 -0
- truthound_dashboard/core/reporters/junit_reporter.py +233 -0
- truthound_dashboard/core/reporters/markdown_reporter.py +207 -0
- truthound_dashboard/core/reporters/pdf_reporter.py +209 -0
- truthound_dashboard/core/reporters/registry.py +272 -0
- truthound_dashboard/core/rule_generator.py +2088 -0
- truthound_dashboard/core/scheduler.py +822 -12
- truthound_dashboard/core/schema_evolution.py +858 -0
- truthound_dashboard/core/services.py +152 -9
- truthound_dashboard/core/statistics.py +718 -0
- truthound_dashboard/core/streaming_anomaly.py +883 -0
- truthound_dashboard/core/triggers/__init__.py +45 -0
- truthound_dashboard/core/triggers/base.py +226 -0
- truthound_dashboard/core/triggers/evaluators.py +609 -0
- truthound_dashboard/core/triggers/factory.py +363 -0
- truthound_dashboard/core/unified_alerts.py +870 -0
- truthound_dashboard/core/validation_limits.py +509 -0
- truthound_dashboard/core/versioning.py +709 -0
- truthound_dashboard/core/websocket/__init__.py +59 -0
- truthound_dashboard/core/websocket/manager.py +512 -0
- truthound_dashboard/core/websocket/messages.py +130 -0
- truthound_dashboard/db/__init__.py +30 -0
- truthound_dashboard/db/models.py +3375 -3
- truthound_dashboard/main.py +22 -0
- truthound_dashboard/schemas/__init__.py +396 -1
- truthound_dashboard/schemas/anomaly.py +1258 -0
- truthound_dashboard/schemas/base.py +4 -0
- truthound_dashboard/schemas/cross_alerts.py +334 -0
- truthound_dashboard/schemas/drift_monitor.py +890 -0
- truthound_dashboard/schemas/lineage.py +428 -0
- truthound_dashboard/schemas/maintenance.py +154 -0
- truthound_dashboard/schemas/model_monitoring.py +374 -0
- truthound_dashboard/schemas/notifications_advanced.py +1363 -0
- truthound_dashboard/schemas/openlineage.py +704 -0
- truthound_dashboard/schemas/plugins.py +1293 -0
- truthound_dashboard/schemas/profile.py +420 -34
- truthound_dashboard/schemas/profile_comparison.py +242 -0
- truthound_dashboard/schemas/reports.py +285 -0
- truthound_dashboard/schemas/rule_suggestion.py +434 -0
- truthound_dashboard/schemas/schema_evolution.py +164 -0
- truthound_dashboard/schemas/source.py +117 -2
- truthound_dashboard/schemas/triggers.py +511 -0
- truthound_dashboard/schemas/unified_alerts.py +223 -0
- truthound_dashboard/schemas/validation.py +25 -1
- truthound_dashboard/schemas/validators/__init__.py +11 -0
- truthound_dashboard/schemas/validators/base.py +151 -0
- truthound_dashboard/schemas/versioning.py +152 -0
- truthound_dashboard/static/index.html +2 -2
- {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.0.dist-info}/METADATA +142 -22
- truthound_dashboard-1.4.0.dist-info/RECORD +239 -0
- truthound_dashboard/static/assets/index-BZG20KuF.js +0 -586
- truthound_dashboard/static/assets/index-D_HyZ3pb.css +0 -1
- truthound_dashboard/static/assets/unmerged_dictionaries-CtpqQBm0.js +0 -1
- truthound_dashboard-1.3.1.dist-info/RECORD +0 -110
- {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.0.dist-info}/WHEEL +0 -0
- {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.0.dist-info}/entry_points.txt +0 -0
- {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,1384 @@
|
|
|
1
|
+
"""Scheduler backend implementations for persistent and in-memory job storage.
|
|
2
|
+
|
|
3
|
+
This module provides abstract and concrete implementations for scheduler backends,
|
|
4
|
+
enabling different storage strategies for APScheduler jobs.
|
|
5
|
+
|
|
6
|
+
Backends:
|
|
7
|
+
- InMemorySchedulerBackend: Fast, ephemeral storage (lost on restart)
|
|
8
|
+
- SQLAlchemySchedulerBackend: Persistent SQLite storage (survives restarts)
|
|
9
|
+
|
|
10
|
+
Features:
|
|
11
|
+
- Abstract base class for custom implementations
|
|
12
|
+
- Configurable misfire handling with grace time
|
|
13
|
+
- Exponential backoff for error recovery
|
|
14
|
+
- Job coalescing to prevent duplicate executions
|
|
15
|
+
- Thread-safe operations with proper locking
|
|
16
|
+
- Graceful shutdown with pending job handling
|
|
17
|
+
|
|
18
|
+
Usage:
|
|
19
|
+
from truthound_dashboard.core.notifications.escalation.backends import (
|
|
20
|
+
SQLAlchemySchedulerBackend,
|
|
21
|
+
SchedulerBackendConfig,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
config = SchedulerBackendConfig(
|
|
25
|
+
backend_type="sqlalchemy",
|
|
26
|
+
misfire_grace_time=60,
|
|
27
|
+
coalesce=True,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
backend = SQLAlchemySchedulerBackend(config)
|
|
31
|
+
await backend.initialize()
|
|
32
|
+
await backend.add_job(job_data)
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
from __future__ import annotations
|
|
36
|
+
|
|
37
|
+
import asyncio
|
|
38
|
+
import json
|
|
39
|
+
import logging
|
|
40
|
+
import os
|
|
41
|
+
from abc import ABC, abstractmethod
|
|
42
|
+
from dataclasses import dataclass, field
|
|
43
|
+
from datetime import datetime, timedelta
|
|
44
|
+
from enum import Enum
|
|
45
|
+
from typing import Any, Callable
|
|
46
|
+
from uuid import uuid4
|
|
47
|
+
|
|
48
|
+
logger = logging.getLogger(__name__)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
# =============================================================================
|
|
52
|
+
# Enums and Configuration
|
|
53
|
+
# =============================================================================
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class BackendType(str, Enum):
|
|
57
|
+
"""Type of scheduler backend."""
|
|
58
|
+
|
|
59
|
+
MEMORY = "memory"
|
|
60
|
+
SQLALCHEMY = "sqlalchemy"
|
|
61
|
+
# Future: REDIS = "redis"
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class JobState(str, Enum):
|
|
65
|
+
"""State of a scheduled job."""
|
|
66
|
+
|
|
67
|
+
PENDING = "pending"
|
|
68
|
+
RUNNING = "running"
|
|
69
|
+
COMPLETED = "completed"
|
|
70
|
+
FAILED = "failed"
|
|
71
|
+
MISFIRED = "misfired"
|
|
72
|
+
PAUSED = "paused"
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class MisfirePolicy(str, Enum):
|
|
76
|
+
"""Policy for handling misfired jobs.
|
|
77
|
+
|
|
78
|
+
- SKIP: Skip the misfired execution entirely
|
|
79
|
+
- RUN_ONCE: Run once if misfired (coalesce multiple misfires)
|
|
80
|
+
- RUN_ALL: Run all misfired executions (catch up)
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
SKIP = "skip"
|
|
84
|
+
RUN_ONCE = "run_once"
|
|
85
|
+
RUN_ALL = "run_all"
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@dataclass
|
|
89
|
+
class SchedulerBackendConfig:
|
|
90
|
+
"""Configuration for scheduler backends.
|
|
91
|
+
|
|
92
|
+
Attributes:
|
|
93
|
+
backend_type: Type of backend (memory, sqlalchemy).
|
|
94
|
+
misfire_grace_time: Seconds to allow for late job execution.
|
|
95
|
+
coalesce: Combine multiple pending executions into one.
|
|
96
|
+
max_instances: Maximum concurrent instances of same job.
|
|
97
|
+
max_retries: Maximum retry attempts on failure.
|
|
98
|
+
retry_base_delay: Base delay in seconds for exponential backoff.
|
|
99
|
+
retry_max_delay: Maximum delay in seconds for backoff.
|
|
100
|
+
shutdown_timeout: Seconds to wait for jobs during shutdown.
|
|
101
|
+
database_url: Database URL for SQLAlchemy backend.
|
|
102
|
+
job_table_name: Table name for job storage.
|
|
103
|
+
cleanup_interval: Seconds between cleanup runs.
|
|
104
|
+
job_retention_days: Days to retain completed jobs.
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
backend_type: BackendType = BackendType.SQLALCHEMY
|
|
108
|
+
misfire_grace_time: int = 60
|
|
109
|
+
coalesce: bool = True
|
|
110
|
+
max_instances: int = 1
|
|
111
|
+
max_retries: int = 3
|
|
112
|
+
retry_base_delay: float = 5.0
|
|
113
|
+
retry_max_delay: float = 300.0
|
|
114
|
+
shutdown_timeout: float = 30.0
|
|
115
|
+
database_url: str | None = None
|
|
116
|
+
job_table_name: str = "scheduler_jobs"
|
|
117
|
+
cleanup_interval: int = 3600 # 1 hour
|
|
118
|
+
job_retention_days: int = 7
|
|
119
|
+
|
|
120
|
+
@classmethod
|
|
121
|
+
def from_env(cls) -> SchedulerBackendConfig:
|
|
122
|
+
"""Create configuration from environment variables.
|
|
123
|
+
|
|
124
|
+
Environment variables:
|
|
125
|
+
TRUTHOUND_SCHEDULER_BACKEND: Backend type (memory, sqlalchemy)
|
|
126
|
+
TRUTHOUND_SCHEDULER_MISFIRE_GRACE_TIME: Seconds for misfire grace
|
|
127
|
+
TRUTHOUND_SCHEDULER_COALESCE: Whether to coalesce jobs (true/false)
|
|
128
|
+
TRUTHOUND_SCHEDULER_MAX_RETRIES: Maximum retry attempts
|
|
129
|
+
TRUTHOUND_SCHEDULER_SHUTDOWN_TIMEOUT: Shutdown timeout seconds
|
|
130
|
+
"""
|
|
131
|
+
return cls(
|
|
132
|
+
backend_type=BackendType(
|
|
133
|
+
os.getenv("TRUTHOUND_SCHEDULER_BACKEND", "sqlalchemy")
|
|
134
|
+
),
|
|
135
|
+
misfire_grace_time=int(
|
|
136
|
+
os.getenv("TRUTHOUND_SCHEDULER_MISFIRE_GRACE_TIME", "60")
|
|
137
|
+
),
|
|
138
|
+
coalesce=os.getenv("TRUTHOUND_SCHEDULER_COALESCE", "true").lower() == "true",
|
|
139
|
+
max_retries=int(os.getenv("TRUTHOUND_SCHEDULER_MAX_RETRIES", "3")),
|
|
140
|
+
shutdown_timeout=float(
|
|
141
|
+
os.getenv("TRUTHOUND_SCHEDULER_SHUTDOWN_TIMEOUT", "30")
|
|
142
|
+
),
|
|
143
|
+
job_retention_days=int(
|
|
144
|
+
os.getenv("TRUTHOUND_SCHEDULER_JOB_RETENTION_DAYS", "7")
|
|
145
|
+
),
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
@dataclass
|
|
150
|
+
class JobData:
|
|
151
|
+
"""Data structure for a scheduled job.
|
|
152
|
+
|
|
153
|
+
Attributes:
|
|
154
|
+
id: Unique job identifier.
|
|
155
|
+
name: Human-readable job name.
|
|
156
|
+
func_ref: Reference to the function to execute.
|
|
157
|
+
trigger_type: Type of trigger (interval, cron, date).
|
|
158
|
+
trigger_args: Arguments for the trigger.
|
|
159
|
+
args: Positional arguments for the function.
|
|
160
|
+
kwargs: Keyword arguments for the function.
|
|
161
|
+
next_run_time: Next scheduled execution time.
|
|
162
|
+
state: Current job state.
|
|
163
|
+
retry_count: Number of retry attempts.
|
|
164
|
+
last_run_time: Last execution time.
|
|
165
|
+
last_error: Last error message.
|
|
166
|
+
metadata: Additional job metadata.
|
|
167
|
+
created_at: When the job was created.
|
|
168
|
+
updated_at: Last update timestamp.
|
|
169
|
+
"""
|
|
170
|
+
|
|
171
|
+
id: str = field(default_factory=lambda: str(uuid4()))
|
|
172
|
+
name: str = ""
|
|
173
|
+
func_ref: str = ""
|
|
174
|
+
trigger_type: str = "interval"
|
|
175
|
+
trigger_args: dict[str, Any] = field(default_factory=dict)
|
|
176
|
+
args: tuple[Any, ...] = field(default_factory=tuple)
|
|
177
|
+
kwargs: dict[str, Any] = field(default_factory=dict)
|
|
178
|
+
next_run_time: datetime | None = None
|
|
179
|
+
state: JobState = JobState.PENDING
|
|
180
|
+
retry_count: int = 0
|
|
181
|
+
last_run_time: datetime | None = None
|
|
182
|
+
last_error: str | None = None
|
|
183
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
184
|
+
created_at: datetime = field(default_factory=datetime.utcnow)
|
|
185
|
+
updated_at: datetime = field(default_factory=datetime.utcnow)
|
|
186
|
+
|
|
187
|
+
def to_dict(self) -> dict[str, Any]:
|
|
188
|
+
"""Convert to dictionary for serialization."""
|
|
189
|
+
return {
|
|
190
|
+
"id": self.id,
|
|
191
|
+
"name": self.name,
|
|
192
|
+
"func_ref": self.func_ref,
|
|
193
|
+
"trigger_type": self.trigger_type,
|
|
194
|
+
"trigger_args": self.trigger_args,
|
|
195
|
+
"args": list(self.args),
|
|
196
|
+
"kwargs": self.kwargs,
|
|
197
|
+
"next_run_time": self.next_run_time.isoformat() if self.next_run_time else None,
|
|
198
|
+
"state": self.state.value,
|
|
199
|
+
"retry_count": self.retry_count,
|
|
200
|
+
"last_run_time": self.last_run_time.isoformat() if self.last_run_time else None,
|
|
201
|
+
"last_error": self.last_error,
|
|
202
|
+
"metadata": self.metadata,
|
|
203
|
+
"created_at": self.created_at.isoformat(),
|
|
204
|
+
"updated_at": self.updated_at.isoformat(),
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
@classmethod
|
|
208
|
+
def from_dict(cls, data: dict[str, Any]) -> JobData:
|
|
209
|
+
"""Create from dictionary."""
|
|
210
|
+
return cls(
|
|
211
|
+
id=data.get("id", str(uuid4())),
|
|
212
|
+
name=data.get("name", ""),
|
|
213
|
+
func_ref=data.get("func_ref", ""),
|
|
214
|
+
trigger_type=data.get("trigger_type", "interval"),
|
|
215
|
+
trigger_args=data.get("trigger_args", {}),
|
|
216
|
+
args=tuple(data.get("args", [])),
|
|
217
|
+
kwargs=data.get("kwargs", {}),
|
|
218
|
+
next_run_time=(
|
|
219
|
+
datetime.fromisoformat(data["next_run_time"])
|
|
220
|
+
if data.get("next_run_time")
|
|
221
|
+
else None
|
|
222
|
+
),
|
|
223
|
+
state=JobState(data.get("state", "pending")),
|
|
224
|
+
retry_count=data.get("retry_count", 0),
|
|
225
|
+
last_run_time=(
|
|
226
|
+
datetime.fromisoformat(data["last_run_time"])
|
|
227
|
+
if data.get("last_run_time")
|
|
228
|
+
else None
|
|
229
|
+
),
|
|
230
|
+
last_error=data.get("last_error"),
|
|
231
|
+
metadata=data.get("metadata", {}),
|
|
232
|
+
created_at=(
|
|
233
|
+
datetime.fromisoformat(data["created_at"])
|
|
234
|
+
if data.get("created_at")
|
|
235
|
+
else datetime.utcnow()
|
|
236
|
+
),
|
|
237
|
+
updated_at=(
|
|
238
|
+
datetime.fromisoformat(data["updated_at"])
|
|
239
|
+
if data.get("updated_at")
|
|
240
|
+
else datetime.utcnow()
|
|
241
|
+
),
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
@dataclass
|
|
246
|
+
class JobExecutionResult:
|
|
247
|
+
"""Result of a job execution.
|
|
248
|
+
|
|
249
|
+
Attributes:
|
|
250
|
+
success: Whether the execution succeeded.
|
|
251
|
+
result: Return value from the job function.
|
|
252
|
+
error: Error message if failed.
|
|
253
|
+
duration_ms: Execution duration in milliseconds.
|
|
254
|
+
retry_scheduled: Whether a retry was scheduled.
|
|
255
|
+
"""
|
|
256
|
+
|
|
257
|
+
success: bool
|
|
258
|
+
result: Any = None
|
|
259
|
+
error: str | None = None
|
|
260
|
+
duration_ms: int = 0
|
|
261
|
+
retry_scheduled: bool = False
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
# =============================================================================
|
|
265
|
+
# Abstract Base Class
|
|
266
|
+
# =============================================================================
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
class SchedulerBackend(ABC):
|
|
270
|
+
"""Abstract base class for scheduler backends.
|
|
271
|
+
|
|
272
|
+
Subclasses must implement all abstract methods to provide
|
|
273
|
+
custom storage strategies for scheduled jobs.
|
|
274
|
+
|
|
275
|
+
This class defines the contract for:
|
|
276
|
+
- Job lifecycle management (add, update, remove)
|
|
277
|
+
- Job retrieval and querying
|
|
278
|
+
- Misfire handling
|
|
279
|
+
- Error recovery
|
|
280
|
+
- Cleanup and maintenance
|
|
281
|
+
"""
|
|
282
|
+
|
|
283
|
+
def __init__(self, config: SchedulerBackendConfig | None = None) -> None:
|
|
284
|
+
"""Initialize the backend.
|
|
285
|
+
|
|
286
|
+
Args:
|
|
287
|
+
config: Backend configuration. Uses defaults if None.
|
|
288
|
+
"""
|
|
289
|
+
self.config = config or SchedulerBackendConfig()
|
|
290
|
+
self._initialized = False
|
|
291
|
+
self._shutdown = False
|
|
292
|
+
self._lock = asyncio.Lock()
|
|
293
|
+
self._running_jobs: set[str] = set()
|
|
294
|
+
|
|
295
|
+
@property
|
|
296
|
+
@abstractmethod
|
|
297
|
+
def backend_type(self) -> BackendType:
|
|
298
|
+
"""Return the backend type identifier."""
|
|
299
|
+
...
|
|
300
|
+
|
|
301
|
+
@abstractmethod
|
|
302
|
+
async def initialize(self) -> None:
|
|
303
|
+
"""Initialize the backend (create tables, connections, etc.).
|
|
304
|
+
|
|
305
|
+
This method is called before any other operations and should
|
|
306
|
+
set up any required infrastructure.
|
|
307
|
+
"""
|
|
308
|
+
...
|
|
309
|
+
|
|
310
|
+
@abstractmethod
|
|
311
|
+
async def shutdown(self) -> None:
|
|
312
|
+
"""Shutdown the backend gracefully.
|
|
313
|
+
|
|
314
|
+
Should wait for running jobs and clean up resources.
|
|
315
|
+
"""
|
|
316
|
+
...
|
|
317
|
+
|
|
318
|
+
@abstractmethod
|
|
319
|
+
async def add_job(self, job: JobData) -> JobData:
|
|
320
|
+
"""Add a new job to the scheduler.
|
|
321
|
+
|
|
322
|
+
Args:
|
|
323
|
+
job: Job data to add.
|
|
324
|
+
|
|
325
|
+
Returns:
|
|
326
|
+
The added job with any modifications (e.g., assigned ID).
|
|
327
|
+
|
|
328
|
+
Raises:
|
|
329
|
+
ValueError: If job with same ID already exists.
|
|
330
|
+
"""
|
|
331
|
+
...
|
|
332
|
+
|
|
333
|
+
@abstractmethod
|
|
334
|
+
async def update_job(self, job: JobData) -> JobData:
|
|
335
|
+
"""Update an existing job.
|
|
336
|
+
|
|
337
|
+
Args:
|
|
338
|
+
job: Updated job data.
|
|
339
|
+
|
|
340
|
+
Returns:
|
|
341
|
+
The updated job.
|
|
342
|
+
|
|
343
|
+
Raises:
|
|
344
|
+
KeyError: If job not found.
|
|
345
|
+
"""
|
|
346
|
+
...
|
|
347
|
+
|
|
348
|
+
@abstractmethod
|
|
349
|
+
async def remove_job(self, job_id: str) -> bool:
|
|
350
|
+
"""Remove a job from the scheduler.
|
|
351
|
+
|
|
352
|
+
Args:
|
|
353
|
+
job_id: ID of job to remove.
|
|
354
|
+
|
|
355
|
+
Returns:
|
|
356
|
+
True if job was removed, False if not found.
|
|
357
|
+
"""
|
|
358
|
+
...
|
|
359
|
+
|
|
360
|
+
@abstractmethod
|
|
361
|
+
async def get_job(self, job_id: str) -> JobData | None:
|
|
362
|
+
"""Get a job by ID.
|
|
363
|
+
|
|
364
|
+
Args:
|
|
365
|
+
job_id: Job ID to retrieve.
|
|
366
|
+
|
|
367
|
+
Returns:
|
|
368
|
+
Job data or None if not found.
|
|
369
|
+
"""
|
|
370
|
+
...
|
|
371
|
+
|
|
372
|
+
@abstractmethod
|
|
373
|
+
async def get_jobs(
|
|
374
|
+
self,
|
|
375
|
+
state: JobState | None = None,
|
|
376
|
+
limit: int | None = None,
|
|
377
|
+
) -> list[JobData]:
|
|
378
|
+
"""Get jobs, optionally filtered by state.
|
|
379
|
+
|
|
380
|
+
Args:
|
|
381
|
+
state: Optional state filter.
|
|
382
|
+
limit: Maximum number of jobs to return.
|
|
383
|
+
|
|
384
|
+
Returns:
|
|
385
|
+
List of matching jobs.
|
|
386
|
+
"""
|
|
387
|
+
...
|
|
388
|
+
|
|
389
|
+
@abstractmethod
|
|
390
|
+
async def get_due_jobs(self, now: datetime | None = None) -> list[JobData]:
|
|
391
|
+
"""Get jobs that are due for execution.
|
|
392
|
+
|
|
393
|
+
Args:
|
|
394
|
+
now: Current time (defaults to utcnow).
|
|
395
|
+
|
|
396
|
+
Returns:
|
|
397
|
+
List of jobs ready to run.
|
|
398
|
+
"""
|
|
399
|
+
...
|
|
400
|
+
|
|
401
|
+
@abstractmethod
|
|
402
|
+
async def mark_job_running(self, job_id: str) -> bool:
|
|
403
|
+
"""Mark a job as running.
|
|
404
|
+
|
|
405
|
+
Args:
|
|
406
|
+
job_id: Job ID to mark.
|
|
407
|
+
|
|
408
|
+
Returns:
|
|
409
|
+
True if marked successfully, False if already running or not found.
|
|
410
|
+
"""
|
|
411
|
+
...
|
|
412
|
+
|
|
413
|
+
@abstractmethod
|
|
414
|
+
async def mark_job_completed(
|
|
415
|
+
self,
|
|
416
|
+
job_id: str,
|
|
417
|
+
next_run_time: datetime | None = None,
|
|
418
|
+
) -> bool:
|
|
419
|
+
"""Mark a job as completed.
|
|
420
|
+
|
|
421
|
+
Args:
|
|
422
|
+
job_id: Job ID to mark.
|
|
423
|
+
next_run_time: Next scheduled run time (for recurring jobs).
|
|
424
|
+
|
|
425
|
+
Returns:
|
|
426
|
+
True if marked successfully.
|
|
427
|
+
"""
|
|
428
|
+
...
|
|
429
|
+
|
|
430
|
+
@abstractmethod
|
|
431
|
+
async def mark_job_failed(
|
|
432
|
+
self,
|
|
433
|
+
job_id: str,
|
|
434
|
+
error: str,
|
|
435
|
+
schedule_retry: bool = True,
|
|
436
|
+
) -> bool:
|
|
437
|
+
"""Mark a job as failed.
|
|
438
|
+
|
|
439
|
+
Args:
|
|
440
|
+
job_id: Job ID to mark.
|
|
441
|
+
error: Error message.
|
|
442
|
+
schedule_retry: Whether to schedule a retry.
|
|
443
|
+
|
|
444
|
+
Returns:
|
|
445
|
+
True if marked successfully.
|
|
446
|
+
"""
|
|
447
|
+
...
|
|
448
|
+
|
|
449
|
+
@abstractmethod
|
|
450
|
+
async def cleanup_old_jobs(self, older_than: datetime) -> int:
|
|
451
|
+
"""Remove completed/failed jobs older than specified time.
|
|
452
|
+
|
|
453
|
+
Args:
|
|
454
|
+
older_than: Remove jobs updated before this time.
|
|
455
|
+
|
|
456
|
+
Returns:
|
|
457
|
+
Number of jobs removed.
|
|
458
|
+
"""
|
|
459
|
+
...
|
|
460
|
+
|
|
461
|
+
# -------------------------------------------------------------------------
|
|
462
|
+
# Default implementations
|
|
463
|
+
# -------------------------------------------------------------------------
|
|
464
|
+
|
|
465
|
+
def calculate_retry_delay(self, retry_count: int) -> float:
|
|
466
|
+
"""Calculate exponential backoff delay for retry.
|
|
467
|
+
|
|
468
|
+
Args:
|
|
469
|
+
retry_count: Current retry attempt number.
|
|
470
|
+
|
|
471
|
+
Returns:
|
|
472
|
+
Delay in seconds before next retry.
|
|
473
|
+
"""
|
|
474
|
+
delay = self.config.retry_base_delay * (2 ** retry_count)
|
|
475
|
+
return min(delay, self.config.retry_max_delay)
|
|
476
|
+
|
|
477
|
+
def is_misfired(self, job: JobData, now: datetime | None = None) -> bool:
|
|
478
|
+
"""Check if a job has misfired.
|
|
479
|
+
|
|
480
|
+
A job is considered misfired if its next_run_time plus the
|
|
481
|
+
misfire grace time is before the current time.
|
|
482
|
+
|
|
483
|
+
Args:
|
|
484
|
+
job: Job to check.
|
|
485
|
+
now: Current time (defaults to utcnow).
|
|
486
|
+
|
|
487
|
+
Returns:
|
|
488
|
+
True if the job has misfired.
|
|
489
|
+
"""
|
|
490
|
+
if not job.next_run_time:
|
|
491
|
+
return False
|
|
492
|
+
|
|
493
|
+
now = now or datetime.utcnow()
|
|
494
|
+
grace_deadline = job.next_run_time + timedelta(
|
|
495
|
+
seconds=self.config.misfire_grace_time
|
|
496
|
+
)
|
|
497
|
+
return now > grace_deadline
|
|
498
|
+
|
|
499
|
+
async def handle_misfire(self, job: JobData) -> JobData:
|
|
500
|
+
"""Handle a misfired job according to policy.
|
|
501
|
+
|
|
502
|
+
Args:
|
|
503
|
+
job: The misfired job.
|
|
504
|
+
|
|
505
|
+
Returns:
|
|
506
|
+
Updated job data.
|
|
507
|
+
"""
|
|
508
|
+
logger.warning(
|
|
509
|
+
f"Job {job.id} ({job.name}) misfired. "
|
|
510
|
+
f"Scheduled: {job.next_run_time}, Grace: {self.config.misfire_grace_time}s"
|
|
511
|
+
)
|
|
512
|
+
|
|
513
|
+
# Mark as misfired
|
|
514
|
+
job.state = JobState.MISFIRED
|
|
515
|
+
job.updated_at = datetime.utcnow()
|
|
516
|
+
job.metadata["misfire_count"] = job.metadata.get("misfire_count", 0) + 1
|
|
517
|
+
job.metadata["last_misfire_at"] = datetime.utcnow().isoformat()
|
|
518
|
+
|
|
519
|
+
# Calculate new next_run_time based on trigger
|
|
520
|
+
if job.trigger_type == "interval":
|
|
521
|
+
interval_seconds = job.trigger_args.get("seconds", 60)
|
|
522
|
+
job.next_run_time = datetime.utcnow() + timedelta(seconds=interval_seconds)
|
|
523
|
+
job.state = JobState.PENDING
|
|
524
|
+
|
|
525
|
+
await self.update_job(job)
|
|
526
|
+
return job
|
|
527
|
+
|
|
528
|
+
def get_status(self) -> dict[str, Any]:
|
|
529
|
+
"""Get backend status information.
|
|
530
|
+
|
|
531
|
+
Returns:
|
|
532
|
+
Status dictionary with backend state and metrics.
|
|
533
|
+
"""
|
|
534
|
+
return {
|
|
535
|
+
"backend_type": self.backend_type.value,
|
|
536
|
+
"initialized": self._initialized,
|
|
537
|
+
"shutdown": self._shutdown,
|
|
538
|
+
"running_jobs": len(self._running_jobs),
|
|
539
|
+
"config": {
|
|
540
|
+
"misfire_grace_time": self.config.misfire_grace_time,
|
|
541
|
+
"coalesce": self.config.coalesce,
|
|
542
|
+
"max_retries": self.config.max_retries,
|
|
543
|
+
"max_instances": self.config.max_instances,
|
|
544
|
+
},
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
|
|
548
|
+
# =============================================================================
|
|
549
|
+
# In-Memory Backend
|
|
550
|
+
# =============================================================================
|
|
551
|
+
|
|
552
|
+
|
|
553
|
+
class InMemorySchedulerBackend(SchedulerBackend):
|
|
554
|
+
"""In-memory scheduler backend for ephemeral job storage.
|
|
555
|
+
|
|
556
|
+
Jobs are stored in memory and lost on process restart.
|
|
557
|
+
Best for development, testing, or non-critical workloads.
|
|
558
|
+
|
|
559
|
+
Features:
|
|
560
|
+
- Fast access without database overhead
|
|
561
|
+
- Thread-safe with asyncio locks
|
|
562
|
+
- Supports all job lifecycle operations
|
|
563
|
+
|
|
564
|
+
Limitations:
|
|
565
|
+
- Jobs lost on restart
|
|
566
|
+
- Not suitable for multi-process deployments
|
|
567
|
+
- Memory grows with job count
|
|
568
|
+
"""
|
|
569
|
+
|
|
570
|
+
def __init__(self, config: SchedulerBackendConfig | None = None) -> None:
|
|
571
|
+
"""Initialize in-memory backend."""
|
|
572
|
+
if config is None:
|
|
573
|
+
config = SchedulerBackendConfig(backend_type=BackendType.MEMORY)
|
|
574
|
+
super().__init__(config)
|
|
575
|
+
self._jobs: dict[str, JobData] = {}
|
|
576
|
+
|
|
577
|
+
@property
|
|
578
|
+
def backend_type(self) -> BackendType:
|
|
579
|
+
return BackendType.MEMORY
|
|
580
|
+
|
|
581
|
+
async def initialize(self) -> None:
|
|
582
|
+
"""Initialize the in-memory backend."""
|
|
583
|
+
if self._initialized:
|
|
584
|
+
return
|
|
585
|
+
|
|
586
|
+
logger.info("Initializing in-memory scheduler backend")
|
|
587
|
+
self._jobs.clear()
|
|
588
|
+
self._initialized = True
|
|
589
|
+
self._shutdown = False
|
|
590
|
+
logger.info("In-memory scheduler backend initialized")
|
|
591
|
+
|
|
592
|
+
async def shutdown(self) -> None:
|
|
593
|
+
"""Shutdown the in-memory backend."""
|
|
594
|
+
if self._shutdown:
|
|
595
|
+
return
|
|
596
|
+
|
|
597
|
+
logger.info("Shutting down in-memory scheduler backend")
|
|
598
|
+
|
|
599
|
+
# Wait for running jobs
|
|
600
|
+
if self._running_jobs:
|
|
601
|
+
logger.info(f"Waiting for {len(self._running_jobs)} running jobs...")
|
|
602
|
+
wait_until = datetime.utcnow() + timedelta(
|
|
603
|
+
seconds=self.config.shutdown_timeout
|
|
604
|
+
)
|
|
605
|
+
while self._running_jobs and datetime.utcnow() < wait_until:
|
|
606
|
+
await asyncio.sleep(0.5)
|
|
607
|
+
|
|
608
|
+
if self._running_jobs:
|
|
609
|
+
logger.warning(
|
|
610
|
+
f"Timeout waiting for jobs: {self._running_jobs}"
|
|
611
|
+
)
|
|
612
|
+
|
|
613
|
+
self._shutdown = True
|
|
614
|
+
self._initialized = False
|
|
615
|
+
logger.info("In-memory scheduler backend shut down")
|
|
616
|
+
|
|
617
|
+
async def add_job(self, job: JobData) -> JobData:
|
|
618
|
+
"""Add a job to memory."""
|
|
619
|
+
async with self._lock:
|
|
620
|
+
if job.id in self._jobs:
|
|
621
|
+
raise ValueError(f"Job {job.id} already exists")
|
|
622
|
+
|
|
623
|
+
job.created_at = datetime.utcnow()
|
|
624
|
+
job.updated_at = datetime.utcnow()
|
|
625
|
+
self._jobs[job.id] = job
|
|
626
|
+
logger.debug(f"Added job {job.id} ({job.name})")
|
|
627
|
+
return job
|
|
628
|
+
|
|
629
|
+
async def update_job(self, job: JobData) -> JobData:
|
|
630
|
+
"""Update a job in memory."""
|
|
631
|
+
async with self._lock:
|
|
632
|
+
if job.id not in self._jobs:
|
|
633
|
+
raise KeyError(f"Job {job.id} not found")
|
|
634
|
+
|
|
635
|
+
job.updated_at = datetime.utcnow()
|
|
636
|
+
self._jobs[job.id] = job
|
|
637
|
+
logger.debug(f"Updated job {job.id} ({job.name})")
|
|
638
|
+
return job
|
|
639
|
+
|
|
640
|
+
async def remove_job(self, job_id: str) -> bool:
|
|
641
|
+
"""Remove a job from memory."""
|
|
642
|
+
async with self._lock:
|
|
643
|
+
if job_id in self._jobs:
|
|
644
|
+
del self._jobs[job_id]
|
|
645
|
+
logger.debug(f"Removed job {job_id}")
|
|
646
|
+
return True
|
|
647
|
+
return False
|
|
648
|
+
|
|
649
|
+
async def get_job(self, job_id: str) -> JobData | None:
|
|
650
|
+
"""Get a job by ID."""
|
|
651
|
+
return self._jobs.get(job_id)
|
|
652
|
+
|
|
653
|
+
async def get_jobs(
|
|
654
|
+
self,
|
|
655
|
+
state: JobState | None = None,
|
|
656
|
+
limit: int | None = None,
|
|
657
|
+
) -> list[JobData]:
|
|
658
|
+
"""Get jobs, optionally filtered by state."""
|
|
659
|
+
jobs = list(self._jobs.values())
|
|
660
|
+
|
|
661
|
+
if state:
|
|
662
|
+
jobs = [j for j in jobs if j.state == state]
|
|
663
|
+
|
|
664
|
+
# Sort by next_run_time
|
|
665
|
+
jobs.sort(key=lambda j: j.next_run_time or datetime.max)
|
|
666
|
+
|
|
667
|
+
if limit:
|
|
668
|
+
jobs = jobs[:limit]
|
|
669
|
+
|
|
670
|
+
return jobs
|
|
671
|
+
|
|
672
|
+
async def get_due_jobs(self, now: datetime | None = None) -> list[JobData]:
|
|
673
|
+
"""Get jobs due for execution."""
|
|
674
|
+
now = now or datetime.utcnow()
|
|
675
|
+
due_jobs = []
|
|
676
|
+
|
|
677
|
+
for job in self._jobs.values():
|
|
678
|
+
if job.state not in (JobState.PENDING, JobState.MISFIRED):
|
|
679
|
+
continue
|
|
680
|
+
if job.id in self._running_jobs:
|
|
681
|
+
continue
|
|
682
|
+
if job.next_run_time and job.next_run_time <= now:
|
|
683
|
+
due_jobs.append(job)
|
|
684
|
+
|
|
685
|
+
# Sort by next_run_time (earliest first)
|
|
686
|
+
due_jobs.sort(key=lambda j: j.next_run_time or datetime.min)
|
|
687
|
+
return due_jobs
|
|
688
|
+
|
|
689
|
+
async def mark_job_running(self, job_id: str) -> bool:
|
|
690
|
+
"""Mark a job as running."""
|
|
691
|
+
async with self._lock:
|
|
692
|
+
job = self._jobs.get(job_id)
|
|
693
|
+
if not job:
|
|
694
|
+
return False
|
|
695
|
+
|
|
696
|
+
if job_id in self._running_jobs:
|
|
697
|
+
return False
|
|
698
|
+
|
|
699
|
+
# Check max instances
|
|
700
|
+
if len(self._running_jobs) >= self.config.max_instances:
|
|
701
|
+
logger.debug(f"Max instances reached, cannot run {job_id}")
|
|
702
|
+
return False
|
|
703
|
+
|
|
704
|
+
job.state = JobState.RUNNING
|
|
705
|
+
job.last_run_time = datetime.utcnow()
|
|
706
|
+
job.updated_at = datetime.utcnow()
|
|
707
|
+
self._running_jobs.add(job_id)
|
|
708
|
+
logger.debug(f"Job {job_id} marked as running")
|
|
709
|
+
return True
|
|
710
|
+
|
|
711
|
+
async def mark_job_completed(
|
|
712
|
+
self,
|
|
713
|
+
job_id: str,
|
|
714
|
+
next_run_time: datetime | None = None,
|
|
715
|
+
) -> bool:
|
|
716
|
+
"""Mark a job as completed."""
|
|
717
|
+
async with self._lock:
|
|
718
|
+
job = self._jobs.get(job_id)
|
|
719
|
+
if not job:
|
|
720
|
+
return False
|
|
721
|
+
|
|
722
|
+
job.state = JobState.COMPLETED if not next_run_time else JobState.PENDING
|
|
723
|
+
job.next_run_time = next_run_time
|
|
724
|
+
job.retry_count = 0
|
|
725
|
+
job.last_error = None
|
|
726
|
+
job.updated_at = datetime.utcnow()
|
|
727
|
+
self._running_jobs.discard(job_id)
|
|
728
|
+
logger.debug(f"Job {job_id} marked as completed")
|
|
729
|
+
return True
|
|
730
|
+
|
|
731
|
+
async def mark_job_failed(
|
|
732
|
+
self,
|
|
733
|
+
job_id: str,
|
|
734
|
+
error: str,
|
|
735
|
+
schedule_retry: bool = True,
|
|
736
|
+
) -> bool:
|
|
737
|
+
"""Mark a job as failed."""
|
|
738
|
+
async with self._lock:
|
|
739
|
+
job = self._jobs.get(job_id)
|
|
740
|
+
if not job:
|
|
741
|
+
return False
|
|
742
|
+
|
|
743
|
+
job.last_error = error
|
|
744
|
+
job.updated_at = datetime.utcnow()
|
|
745
|
+
self._running_jobs.discard(job_id)
|
|
746
|
+
|
|
747
|
+
if schedule_retry and job.retry_count < self.config.max_retries:
|
|
748
|
+
job.retry_count += 1
|
|
749
|
+
delay = self.calculate_retry_delay(job.retry_count)
|
|
750
|
+
job.next_run_time = datetime.utcnow() + timedelta(seconds=delay)
|
|
751
|
+
job.state = JobState.PENDING
|
|
752
|
+
logger.info(
|
|
753
|
+
f"Job {job_id} failed, retry {job.retry_count}/{self.config.max_retries} "
|
|
754
|
+
f"scheduled in {delay:.1f}s"
|
|
755
|
+
)
|
|
756
|
+
else:
|
|
757
|
+
job.state = JobState.FAILED
|
|
758
|
+
logger.error(f"Job {job_id} failed permanently: {error}")
|
|
759
|
+
|
|
760
|
+
return True
|
|
761
|
+
|
|
762
|
+
async def cleanup_old_jobs(self, older_than: datetime) -> int:
|
|
763
|
+
"""Remove old completed/failed jobs."""
|
|
764
|
+
async with self._lock:
|
|
765
|
+
to_remove = []
|
|
766
|
+
for job_id, job in self._jobs.items():
|
|
767
|
+
if job.state in (JobState.COMPLETED, JobState.FAILED):
|
|
768
|
+
if job.updated_at < older_than:
|
|
769
|
+
to_remove.append(job_id)
|
|
770
|
+
|
|
771
|
+
for job_id in to_remove:
|
|
772
|
+
del self._jobs[job_id]
|
|
773
|
+
|
|
774
|
+
if to_remove:
|
|
775
|
+
logger.info(f"Cleaned up {len(to_remove)} old jobs")
|
|
776
|
+
return len(to_remove)
|
|
777
|
+
|
|
778
|
+
|
|
779
|
+
# =============================================================================
|
|
780
|
+
# SQLAlchemy Backend
|
|
781
|
+
# =============================================================================
|
|
782
|
+
|
|
783
|
+
|
|
784
|
+
class SQLAlchemySchedulerBackend(SchedulerBackend):
|
|
785
|
+
"""SQLAlchemy-based scheduler backend for persistent job storage.
|
|
786
|
+
|
|
787
|
+
Jobs are stored in SQLite database and survive process restarts.
|
|
788
|
+
Suitable for production workloads requiring durability.
|
|
789
|
+
|
|
790
|
+
Features:
|
|
791
|
+
- Persistent storage in SQLite
|
|
792
|
+
- Automatic table creation
|
|
793
|
+
- Thread-safe with row-level locking
|
|
794
|
+
- Supports all job lifecycle operations
|
|
795
|
+
- Automatic cleanup of old jobs
|
|
796
|
+
|
|
797
|
+
Usage:
|
|
798
|
+
backend = SQLAlchemySchedulerBackend(config)
|
|
799
|
+
await backend.initialize()
|
|
800
|
+
|
|
801
|
+
job = JobData(name="my_job", func_ref="module:function")
|
|
802
|
+
await backend.add_job(job)
|
|
803
|
+
"""
|
|
804
|
+
|
|
805
|
+
def __init__(self, config: SchedulerBackendConfig | None = None) -> None:
|
|
806
|
+
"""Initialize SQLAlchemy backend."""
|
|
807
|
+
if config is None:
|
|
808
|
+
config = SchedulerBackendConfig(backend_type=BackendType.SQLALCHEMY)
|
|
809
|
+
super().__init__(config)
|
|
810
|
+
self._cleanup_task: asyncio.Task | None = None
|
|
811
|
+
|
|
812
|
+
@property
|
|
813
|
+
def backend_type(self) -> BackendType:
|
|
814
|
+
return BackendType.SQLALCHEMY
|
|
815
|
+
|
|
816
|
+
async def initialize(self) -> None:
|
|
817
|
+
"""Initialize the SQLAlchemy backend and ensure table exists."""
|
|
818
|
+
if self._initialized:
|
|
819
|
+
return
|
|
820
|
+
|
|
821
|
+
logger.info("Initializing SQLAlchemy scheduler backend")
|
|
822
|
+
|
|
823
|
+
try:
|
|
824
|
+
# Import here to avoid circular imports
|
|
825
|
+
from ....db import get_session, init_db
|
|
826
|
+
|
|
827
|
+
# Ensure database tables are created
|
|
828
|
+
await init_db()
|
|
829
|
+
|
|
830
|
+
# Recover misfired jobs on startup
|
|
831
|
+
await self._recover_misfired_jobs()
|
|
832
|
+
|
|
833
|
+
# Start cleanup task
|
|
834
|
+
self._cleanup_task = asyncio.create_task(self._periodic_cleanup())
|
|
835
|
+
|
|
836
|
+
self._initialized = True
|
|
837
|
+
self._shutdown = False
|
|
838
|
+
logger.info("SQLAlchemy scheduler backend initialized")
|
|
839
|
+
|
|
840
|
+
except Exception as e:
|
|
841
|
+
logger.error(f"Failed to initialize SQLAlchemy backend: {e}")
|
|
842
|
+
raise
|
|
843
|
+
|
|
844
|
+
async def shutdown(self) -> None:
|
|
845
|
+
"""Shutdown the SQLAlchemy backend."""
|
|
846
|
+
if self._shutdown:
|
|
847
|
+
return
|
|
848
|
+
|
|
849
|
+
logger.info("Shutting down SQLAlchemy scheduler backend")
|
|
850
|
+
|
|
851
|
+
# Cancel cleanup task
|
|
852
|
+
if self._cleanup_task:
|
|
853
|
+
self._cleanup_task.cancel()
|
|
854
|
+
try:
|
|
855
|
+
await self._cleanup_task
|
|
856
|
+
except asyncio.CancelledError:
|
|
857
|
+
pass
|
|
858
|
+
|
|
859
|
+
# Wait for running jobs
|
|
860
|
+
if self._running_jobs:
|
|
861
|
+
logger.info(f"Waiting for {len(self._running_jobs)} running jobs...")
|
|
862
|
+
wait_until = datetime.utcnow() + timedelta(
|
|
863
|
+
seconds=self.config.shutdown_timeout
|
|
864
|
+
)
|
|
865
|
+
while self._running_jobs and datetime.utcnow() < wait_until:
|
|
866
|
+
await asyncio.sleep(0.5)
|
|
867
|
+
|
|
868
|
+
# Mark remaining running jobs as pending for recovery on restart
|
|
869
|
+
if self._running_jobs:
|
|
870
|
+
logger.warning(
|
|
871
|
+
f"Marking {len(self._running_jobs)} jobs as pending for recovery"
|
|
872
|
+
)
|
|
873
|
+
for job_id in list(self._running_jobs):
|
|
874
|
+
try:
|
|
875
|
+
job = await self.get_job(job_id)
|
|
876
|
+
if job:
|
|
877
|
+
job.state = JobState.PENDING
|
|
878
|
+
await self.update_job(job)
|
|
879
|
+
except Exception as e:
|
|
880
|
+
logger.error(f"Error recovering job {job_id}: {e}")
|
|
881
|
+
|
|
882
|
+
self._shutdown = True
|
|
883
|
+
self._initialized = False
|
|
884
|
+
logger.info("SQLAlchemy scheduler backend shut down")
|
|
885
|
+
|
|
886
|
+
async def _recover_misfired_jobs(self) -> None:
|
|
887
|
+
"""Recover jobs that were running during previous shutdown."""
|
|
888
|
+
from ....db import get_session
|
|
889
|
+
from ....db.models import SchedulerJob
|
|
890
|
+
|
|
891
|
+
try:
|
|
892
|
+
async with get_session() as session:
|
|
893
|
+
from sqlalchemy import select, update
|
|
894
|
+
|
|
895
|
+
# Find jobs that were left in running state
|
|
896
|
+
result = await session.execute(
|
|
897
|
+
select(SchedulerJob).where(
|
|
898
|
+
SchedulerJob.state == JobState.RUNNING.value
|
|
899
|
+
)
|
|
900
|
+
)
|
|
901
|
+
running_jobs = result.scalars().all()
|
|
902
|
+
|
|
903
|
+
for db_job in running_jobs:
|
|
904
|
+
logger.info(f"Recovering job {db_job.id} from running state")
|
|
905
|
+
db_job.state = JobState.PENDING.value
|
|
906
|
+
db_job.updated_at = datetime.utcnow()
|
|
907
|
+
|
|
908
|
+
await session.commit()
|
|
909
|
+
|
|
910
|
+
if running_jobs:
|
|
911
|
+
logger.info(f"Recovered {len(running_jobs)} jobs from running state")
|
|
912
|
+
|
|
913
|
+
except Exception as e:
|
|
914
|
+
logger.error(f"Error recovering misfired jobs: {e}")
|
|
915
|
+
|
|
916
|
+
async def _periodic_cleanup(self) -> None:
|
|
917
|
+
"""Periodically clean up old jobs."""
|
|
918
|
+
while not self._shutdown:
|
|
919
|
+
try:
|
|
920
|
+
await asyncio.sleep(self.config.cleanup_interval)
|
|
921
|
+
if self._shutdown:
|
|
922
|
+
break
|
|
923
|
+
|
|
924
|
+
older_than = datetime.utcnow() - timedelta(
|
|
925
|
+
days=self.config.job_retention_days
|
|
926
|
+
)
|
|
927
|
+
removed = await self.cleanup_old_jobs(older_than)
|
|
928
|
+
if removed > 0:
|
|
929
|
+
logger.debug(f"Periodic cleanup removed {removed} old jobs")
|
|
930
|
+
|
|
931
|
+
except asyncio.CancelledError:
|
|
932
|
+
break
|
|
933
|
+
except Exception as e:
|
|
934
|
+
logger.error(f"Error in periodic cleanup: {e}")
|
|
935
|
+
|
|
936
|
+
def _job_to_model(self, job: JobData) -> "SchedulerJob":
|
|
937
|
+
"""Convert JobData to database model."""
|
|
938
|
+
from ....db.models import SchedulerJob
|
|
939
|
+
|
|
940
|
+
return SchedulerJob(
|
|
941
|
+
id=job.id,
|
|
942
|
+
name=job.name,
|
|
943
|
+
func_ref=job.func_ref,
|
|
944
|
+
trigger_type=job.trigger_type,
|
|
945
|
+
trigger_args=job.trigger_args,
|
|
946
|
+
args=list(job.args),
|
|
947
|
+
kwargs=job.kwargs,
|
|
948
|
+
next_run_time=job.next_run_time,
|
|
949
|
+
state=job.state.value,
|
|
950
|
+
retry_count=job.retry_count,
|
|
951
|
+
last_run_time=job.last_run_time,
|
|
952
|
+
last_error=job.last_error,
|
|
953
|
+
job_metadata=job.metadata,
|
|
954
|
+
created_at=job.created_at,
|
|
955
|
+
updated_at=job.updated_at,
|
|
956
|
+
)
|
|
957
|
+
|
|
958
|
+
def _model_to_job(self, model: "SchedulerJob") -> JobData:
|
|
959
|
+
"""Convert database model to JobData."""
|
|
960
|
+
return JobData(
|
|
961
|
+
id=model.id,
|
|
962
|
+
name=model.name,
|
|
963
|
+
func_ref=model.func_ref,
|
|
964
|
+
trigger_type=model.trigger_type,
|
|
965
|
+
trigger_args=model.trigger_args or {},
|
|
966
|
+
args=tuple(model.args) if model.args else (),
|
|
967
|
+
kwargs=model.kwargs or {},
|
|
968
|
+
next_run_time=model.next_run_time,
|
|
969
|
+
state=JobState(model.state),
|
|
970
|
+
retry_count=model.retry_count,
|
|
971
|
+
last_run_time=model.last_run_time,
|
|
972
|
+
last_error=model.last_error,
|
|
973
|
+
metadata=model.job_metadata or {},
|
|
974
|
+
created_at=model.created_at,
|
|
975
|
+
updated_at=model.updated_at,
|
|
976
|
+
)
|
|
977
|
+
|
|
978
|
+
async def add_job(self, job: JobData) -> JobData:
|
|
979
|
+
"""Add a job to the database."""
|
|
980
|
+
from ....db import get_session
|
|
981
|
+
from ....db.models import SchedulerJob
|
|
982
|
+
|
|
983
|
+
async with self._lock:
|
|
984
|
+
try:
|
|
985
|
+
async with get_session() as session:
|
|
986
|
+
from sqlalchemy import select
|
|
987
|
+
|
|
988
|
+
# Check if job exists
|
|
989
|
+
result = await session.execute(
|
|
990
|
+
select(SchedulerJob).where(SchedulerJob.id == job.id)
|
|
991
|
+
)
|
|
992
|
+
if result.scalar_one_or_none():
|
|
993
|
+
raise ValueError(f"Job {job.id} already exists")
|
|
994
|
+
|
|
995
|
+
job.created_at = datetime.utcnow()
|
|
996
|
+
job.updated_at = datetime.utcnow()
|
|
997
|
+
|
|
998
|
+
db_job = self._job_to_model(job)
|
|
999
|
+
session.add(db_job)
|
|
1000
|
+
await session.commit()
|
|
1001
|
+
|
|
1002
|
+
logger.debug(f"Added job {job.id} ({job.name})")
|
|
1003
|
+
return job
|
|
1004
|
+
|
|
1005
|
+
except ValueError:
|
|
1006
|
+
raise
|
|
1007
|
+
except Exception as e:
|
|
1008
|
+
logger.error(f"Error adding job {job.id}: {e}")
|
|
1009
|
+
raise
|
|
1010
|
+
|
|
1011
|
+
async def update_job(self, job: JobData) -> JobData:
|
|
1012
|
+
"""Update a job in the database."""
|
|
1013
|
+
from ....db import get_session
|
|
1014
|
+
from ....db.models import SchedulerJob
|
|
1015
|
+
|
|
1016
|
+
async with self._lock:
|
|
1017
|
+
try:
|
|
1018
|
+
async with get_session() as session:
|
|
1019
|
+
from sqlalchemy import select
|
|
1020
|
+
|
|
1021
|
+
result = await session.execute(
|
|
1022
|
+
select(SchedulerJob).where(SchedulerJob.id == job.id)
|
|
1023
|
+
)
|
|
1024
|
+
db_job = result.scalar_one_or_none()
|
|
1025
|
+
|
|
1026
|
+
if not db_job:
|
|
1027
|
+
raise KeyError(f"Job {job.id} not found")
|
|
1028
|
+
|
|
1029
|
+
job.updated_at = datetime.utcnow()
|
|
1030
|
+
|
|
1031
|
+
db_job.name = job.name
|
|
1032
|
+
db_job.func_ref = job.func_ref
|
|
1033
|
+
db_job.trigger_type = job.trigger_type
|
|
1034
|
+
db_job.trigger_args = job.trigger_args
|
|
1035
|
+
db_job.args = list(job.args)
|
|
1036
|
+
db_job.kwargs = job.kwargs
|
|
1037
|
+
db_job.next_run_time = job.next_run_time
|
|
1038
|
+
db_job.state = job.state.value
|
|
1039
|
+
db_job.retry_count = job.retry_count
|
|
1040
|
+
db_job.last_run_time = job.last_run_time
|
|
1041
|
+
db_job.last_error = job.last_error
|
|
1042
|
+
db_job.job_metadata = job.metadata
|
|
1043
|
+
db_job.updated_at = job.updated_at
|
|
1044
|
+
|
|
1045
|
+
await session.commit()
|
|
1046
|
+
logger.debug(f"Updated job {job.id} ({job.name})")
|
|
1047
|
+
return job
|
|
1048
|
+
|
|
1049
|
+
except KeyError:
|
|
1050
|
+
raise
|
|
1051
|
+
except Exception as e:
|
|
1052
|
+
logger.error(f"Error updating job {job.id}: {e}")
|
|
1053
|
+
raise
|
|
1054
|
+
|
|
1055
|
+
async def remove_job(self, job_id: str) -> bool:
|
|
1056
|
+
"""Remove a job from the database."""
|
|
1057
|
+
from ....db import get_session
|
|
1058
|
+
from ....db.models import SchedulerJob
|
|
1059
|
+
|
|
1060
|
+
async with self._lock:
|
|
1061
|
+
try:
|
|
1062
|
+
async with get_session() as session:
|
|
1063
|
+
from sqlalchemy import delete
|
|
1064
|
+
|
|
1065
|
+
result = await session.execute(
|
|
1066
|
+
delete(SchedulerJob).where(SchedulerJob.id == job_id)
|
|
1067
|
+
)
|
|
1068
|
+
await session.commit()
|
|
1069
|
+
|
|
1070
|
+
removed = result.rowcount > 0
|
|
1071
|
+
if removed:
|
|
1072
|
+
logger.debug(f"Removed job {job_id}")
|
|
1073
|
+
return removed
|
|
1074
|
+
|
|
1075
|
+
except Exception as e:
|
|
1076
|
+
logger.error(f"Error removing job {job_id}: {e}")
|
|
1077
|
+
return False
|
|
1078
|
+
|
|
1079
|
+
async def get_job(self, job_id: str) -> JobData | None:
|
|
1080
|
+
"""Get a job by ID from the database."""
|
|
1081
|
+
from ....db import get_session
|
|
1082
|
+
from ....db.models import SchedulerJob
|
|
1083
|
+
|
|
1084
|
+
try:
|
|
1085
|
+
async with get_session() as session:
|
|
1086
|
+
from sqlalchemy import select
|
|
1087
|
+
|
|
1088
|
+
result = await session.execute(
|
|
1089
|
+
select(SchedulerJob).where(SchedulerJob.id == job_id)
|
|
1090
|
+
)
|
|
1091
|
+
db_job = result.scalar_one_or_none()
|
|
1092
|
+
|
|
1093
|
+
if db_job:
|
|
1094
|
+
return self._model_to_job(db_job)
|
|
1095
|
+
return None
|
|
1096
|
+
|
|
1097
|
+
except Exception as e:
|
|
1098
|
+
logger.error(f"Error getting job {job_id}: {e}")
|
|
1099
|
+
return None
|
|
1100
|
+
|
|
1101
|
+
async def get_jobs(
|
|
1102
|
+
self,
|
|
1103
|
+
state: JobState | None = None,
|
|
1104
|
+
limit: int | None = None,
|
|
1105
|
+
) -> list[JobData]:
|
|
1106
|
+
"""Get jobs from the database."""
|
|
1107
|
+
from ....db import get_session
|
|
1108
|
+
from ....db.models import SchedulerJob
|
|
1109
|
+
|
|
1110
|
+
try:
|
|
1111
|
+
async with get_session() as session:
|
|
1112
|
+
from sqlalchemy import select
|
|
1113
|
+
|
|
1114
|
+
query = select(SchedulerJob)
|
|
1115
|
+
|
|
1116
|
+
if state:
|
|
1117
|
+
query = query.where(SchedulerJob.state == state.value)
|
|
1118
|
+
|
|
1119
|
+
query = query.order_by(SchedulerJob.next_run_time)
|
|
1120
|
+
|
|
1121
|
+
if limit:
|
|
1122
|
+
query = query.limit(limit)
|
|
1123
|
+
|
|
1124
|
+
result = await session.execute(query)
|
|
1125
|
+
db_jobs = result.scalars().all()
|
|
1126
|
+
|
|
1127
|
+
return [self._model_to_job(j) for j in db_jobs]
|
|
1128
|
+
|
|
1129
|
+
except Exception as e:
|
|
1130
|
+
logger.error(f"Error getting jobs: {e}")
|
|
1131
|
+
return []
|
|
1132
|
+
|
|
1133
|
+
async def get_due_jobs(self, now: datetime | None = None) -> list[JobData]:
|
|
1134
|
+
"""Get jobs due for execution from the database."""
|
|
1135
|
+
from ....db import get_session
|
|
1136
|
+
from ....db.models import SchedulerJob
|
|
1137
|
+
|
|
1138
|
+
now = now or datetime.utcnow()
|
|
1139
|
+
|
|
1140
|
+
try:
|
|
1141
|
+
async with get_session() as session:
|
|
1142
|
+
from sqlalchemy import select
|
|
1143
|
+
|
|
1144
|
+
query = (
|
|
1145
|
+
select(SchedulerJob)
|
|
1146
|
+
.where(
|
|
1147
|
+
SchedulerJob.state.in_([
|
|
1148
|
+
JobState.PENDING.value,
|
|
1149
|
+
JobState.MISFIRED.value,
|
|
1150
|
+
])
|
|
1151
|
+
)
|
|
1152
|
+
.where(SchedulerJob.next_run_time <= now)
|
|
1153
|
+
.order_by(SchedulerJob.next_run_time)
|
|
1154
|
+
)
|
|
1155
|
+
|
|
1156
|
+
result = await session.execute(query)
|
|
1157
|
+
db_jobs = result.scalars().all()
|
|
1158
|
+
|
|
1159
|
+
jobs = []
|
|
1160
|
+
for db_job in db_jobs:
|
|
1161
|
+
if db_job.id not in self._running_jobs:
|
|
1162
|
+
jobs.append(self._model_to_job(db_job))
|
|
1163
|
+
|
|
1164
|
+
return jobs
|
|
1165
|
+
|
|
1166
|
+
except Exception as e:
|
|
1167
|
+
logger.error(f"Error getting due jobs: {e}")
|
|
1168
|
+
return []
|
|
1169
|
+
|
|
1170
|
+
async def mark_job_running(self, job_id: str) -> bool:
|
|
1171
|
+
"""Mark a job as running in the database."""
|
|
1172
|
+
from ....db import get_session
|
|
1173
|
+
from ....db.models import SchedulerJob
|
|
1174
|
+
|
|
1175
|
+
async with self._lock:
|
|
1176
|
+
if job_id in self._running_jobs:
|
|
1177
|
+
return False
|
|
1178
|
+
|
|
1179
|
+
try:
|
|
1180
|
+
async with get_session() as session:
|
|
1181
|
+
from sqlalchemy import select
|
|
1182
|
+
|
|
1183
|
+
result = await session.execute(
|
|
1184
|
+
select(SchedulerJob).where(SchedulerJob.id == job_id)
|
|
1185
|
+
)
|
|
1186
|
+
db_job = result.scalar_one_or_none()
|
|
1187
|
+
|
|
1188
|
+
if not db_job:
|
|
1189
|
+
return False
|
|
1190
|
+
|
|
1191
|
+
if db_job.state == JobState.RUNNING.value:
|
|
1192
|
+
return False
|
|
1193
|
+
|
|
1194
|
+
db_job.state = JobState.RUNNING.value
|
|
1195
|
+
db_job.last_run_time = datetime.utcnow()
|
|
1196
|
+
db_job.updated_at = datetime.utcnow()
|
|
1197
|
+
|
|
1198
|
+
await session.commit()
|
|
1199
|
+
self._running_jobs.add(job_id)
|
|
1200
|
+
logger.debug(f"Job {job_id} marked as running")
|
|
1201
|
+
return True
|
|
1202
|
+
|
|
1203
|
+
except Exception as e:
|
|
1204
|
+
logger.error(f"Error marking job {job_id} as running: {e}")
|
|
1205
|
+
return False
|
|
1206
|
+
|
|
1207
|
+
async def mark_job_completed(
|
|
1208
|
+
self,
|
|
1209
|
+
job_id: str,
|
|
1210
|
+
next_run_time: datetime | None = None,
|
|
1211
|
+
) -> bool:
|
|
1212
|
+
"""Mark a job as completed in the database."""
|
|
1213
|
+
from ....db import get_session
|
|
1214
|
+
from ....db.models import SchedulerJob
|
|
1215
|
+
|
|
1216
|
+
async with self._lock:
|
|
1217
|
+
try:
|
|
1218
|
+
async with get_session() as session:
|
|
1219
|
+
from sqlalchemy import select
|
|
1220
|
+
|
|
1221
|
+
result = await session.execute(
|
|
1222
|
+
select(SchedulerJob).where(SchedulerJob.id == job_id)
|
|
1223
|
+
)
|
|
1224
|
+
db_job = result.scalar_one_or_none()
|
|
1225
|
+
|
|
1226
|
+
if not db_job:
|
|
1227
|
+
return False
|
|
1228
|
+
|
|
1229
|
+
if next_run_time:
|
|
1230
|
+
db_job.state = JobState.PENDING.value
|
|
1231
|
+
db_job.next_run_time = next_run_time
|
|
1232
|
+
else:
|
|
1233
|
+
db_job.state = JobState.COMPLETED.value
|
|
1234
|
+
|
|
1235
|
+
db_job.retry_count = 0
|
|
1236
|
+
db_job.last_error = None
|
|
1237
|
+
db_job.updated_at = datetime.utcnow()
|
|
1238
|
+
|
|
1239
|
+
await session.commit()
|
|
1240
|
+
self._running_jobs.discard(job_id)
|
|
1241
|
+
logger.debug(f"Job {job_id} marked as completed")
|
|
1242
|
+
return True
|
|
1243
|
+
|
|
1244
|
+
except Exception as e:
|
|
1245
|
+
logger.error(f"Error marking job {job_id} as completed: {e}")
|
|
1246
|
+
return False
|
|
1247
|
+
|
|
1248
|
+
async def mark_job_failed(
|
|
1249
|
+
self,
|
|
1250
|
+
job_id: str,
|
|
1251
|
+
error: str,
|
|
1252
|
+
schedule_retry: bool = True,
|
|
1253
|
+
) -> bool:
|
|
1254
|
+
"""Mark a job as failed in the database."""
|
|
1255
|
+
from ....db import get_session
|
|
1256
|
+
from ....db.models import SchedulerJob
|
|
1257
|
+
|
|
1258
|
+
async with self._lock:
|
|
1259
|
+
try:
|
|
1260
|
+
async with get_session() as session:
|
|
1261
|
+
from sqlalchemy import select
|
|
1262
|
+
|
|
1263
|
+
result = await session.execute(
|
|
1264
|
+
select(SchedulerJob).where(SchedulerJob.id == job_id)
|
|
1265
|
+
)
|
|
1266
|
+
db_job = result.scalar_one_or_none()
|
|
1267
|
+
|
|
1268
|
+
if not db_job:
|
|
1269
|
+
return False
|
|
1270
|
+
|
|
1271
|
+
db_job.last_error = error
|
|
1272
|
+
db_job.updated_at = datetime.utcnow()
|
|
1273
|
+
self._running_jobs.discard(job_id)
|
|
1274
|
+
|
|
1275
|
+
if schedule_retry and db_job.retry_count < self.config.max_retries:
|
|
1276
|
+
db_job.retry_count += 1
|
|
1277
|
+
delay = self.calculate_retry_delay(db_job.retry_count)
|
|
1278
|
+
db_job.next_run_time = datetime.utcnow() + timedelta(seconds=delay)
|
|
1279
|
+
db_job.state = JobState.PENDING.value
|
|
1280
|
+
logger.info(
|
|
1281
|
+
f"Job {job_id} failed, retry {db_job.retry_count}/"
|
|
1282
|
+
f"{self.config.max_retries} scheduled in {delay:.1f}s"
|
|
1283
|
+
)
|
|
1284
|
+
else:
|
|
1285
|
+
db_job.state = JobState.FAILED.value
|
|
1286
|
+
logger.error(f"Job {job_id} failed permanently: {error}")
|
|
1287
|
+
|
|
1288
|
+
await session.commit()
|
|
1289
|
+
return True
|
|
1290
|
+
|
|
1291
|
+
except Exception as e:
|
|
1292
|
+
logger.error(f"Error marking job {job_id} as failed: {e}")
|
|
1293
|
+
return False
|
|
1294
|
+
|
|
1295
|
+
async def cleanup_old_jobs(self, older_than: datetime) -> int:
|
|
1296
|
+
"""Remove old completed/failed jobs from the database."""
|
|
1297
|
+
from ....db import get_session
|
|
1298
|
+
from ....db.models import SchedulerJob
|
|
1299
|
+
|
|
1300
|
+
try:
|
|
1301
|
+
async with get_session() as session:
|
|
1302
|
+
from sqlalchemy import delete
|
|
1303
|
+
|
|
1304
|
+
result = await session.execute(
|
|
1305
|
+
delete(SchedulerJob)
|
|
1306
|
+
.where(
|
|
1307
|
+
SchedulerJob.state.in_([
|
|
1308
|
+
JobState.COMPLETED.value,
|
|
1309
|
+
JobState.FAILED.value,
|
|
1310
|
+
])
|
|
1311
|
+
)
|
|
1312
|
+
.where(SchedulerJob.updated_at < older_than)
|
|
1313
|
+
)
|
|
1314
|
+
await session.commit()
|
|
1315
|
+
|
|
1316
|
+
removed = result.rowcount
|
|
1317
|
+
if removed > 0:
|
|
1318
|
+
logger.info(f"Cleaned up {removed} old jobs")
|
|
1319
|
+
return removed
|
|
1320
|
+
|
|
1321
|
+
except Exception as e:
|
|
1322
|
+
logger.error(f"Error cleaning up old jobs: {e}")
|
|
1323
|
+
return 0
|
|
1324
|
+
|
|
1325
|
+
def get_status(self) -> dict[str, Any]:
|
|
1326
|
+
"""Get backend status with database-specific information."""
|
|
1327
|
+
status = super().get_status()
|
|
1328
|
+
status["cleanup_interval"] = self.config.cleanup_interval
|
|
1329
|
+
status["job_retention_days"] = self.config.job_retention_days
|
|
1330
|
+
return status
|
|
1331
|
+
|
|
1332
|
+
|
|
1333
|
+
# =============================================================================
|
|
1334
|
+
# Factory Function
|
|
1335
|
+
# =============================================================================
|
|
1336
|
+
|
|
1337
|
+
|
|
1338
|
+
def create_scheduler_backend(
|
|
1339
|
+
config: SchedulerBackendConfig | None = None,
|
|
1340
|
+
) -> SchedulerBackend:
|
|
1341
|
+
"""Create a scheduler backend based on configuration.
|
|
1342
|
+
|
|
1343
|
+
Args:
|
|
1344
|
+
config: Backend configuration. Uses environment config if None.
|
|
1345
|
+
|
|
1346
|
+
Returns:
|
|
1347
|
+
Configured scheduler backend instance.
|
|
1348
|
+
|
|
1349
|
+
Example:
|
|
1350
|
+
# Create from environment
|
|
1351
|
+
backend = create_scheduler_backend()
|
|
1352
|
+
|
|
1353
|
+
# Create with specific config
|
|
1354
|
+
config = SchedulerBackendConfig(backend_type=BackendType.MEMORY)
|
|
1355
|
+
backend = create_scheduler_backend(config)
|
|
1356
|
+
"""
|
|
1357
|
+
if config is None:
|
|
1358
|
+
config = SchedulerBackendConfig.from_env()
|
|
1359
|
+
|
|
1360
|
+
if config.backend_type == BackendType.MEMORY:
|
|
1361
|
+
return InMemorySchedulerBackend(config)
|
|
1362
|
+
elif config.backend_type == BackendType.SQLALCHEMY:
|
|
1363
|
+
return SQLAlchemySchedulerBackend(config)
|
|
1364
|
+
else:
|
|
1365
|
+
raise ValueError(f"Unknown backend type: {config.backend_type}")
|
|
1366
|
+
|
|
1367
|
+
|
|
1368
|
+
__all__ = [
|
|
1369
|
+
# Enums
|
|
1370
|
+
"BackendType",
|
|
1371
|
+
"JobState",
|
|
1372
|
+
"MisfirePolicy",
|
|
1373
|
+
# Configuration
|
|
1374
|
+
"SchedulerBackendConfig",
|
|
1375
|
+
"JobData",
|
|
1376
|
+
"JobExecutionResult",
|
|
1377
|
+
# Abstract base
|
|
1378
|
+
"SchedulerBackend",
|
|
1379
|
+
# Implementations
|
|
1380
|
+
"InMemorySchedulerBackend",
|
|
1381
|
+
"SQLAlchemySchedulerBackend",
|
|
1382
|
+
# Factory
|
|
1383
|
+
"create_scheduler_backend",
|
|
1384
|
+
]
|