truthound-dashboard 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. truthound_dashboard/api/alerts.py +258 -0
  2. truthound_dashboard/api/anomaly.py +1302 -0
  3. truthound_dashboard/api/cross_alerts.py +352 -0
  4. truthound_dashboard/api/deps.py +143 -0
  5. truthound_dashboard/api/drift_monitor.py +540 -0
  6. truthound_dashboard/api/lineage.py +1151 -0
  7. truthound_dashboard/api/maintenance.py +363 -0
  8. truthound_dashboard/api/middleware.py +373 -1
  9. truthound_dashboard/api/model_monitoring.py +805 -0
  10. truthound_dashboard/api/notifications_advanced.py +2452 -0
  11. truthound_dashboard/api/plugins.py +2096 -0
  12. truthound_dashboard/api/profile.py +211 -14
  13. truthound_dashboard/api/reports.py +853 -0
  14. truthound_dashboard/api/router.py +147 -0
  15. truthound_dashboard/api/rule_suggestions.py +310 -0
  16. truthound_dashboard/api/schema_evolution.py +231 -0
  17. truthound_dashboard/api/sources.py +47 -3
  18. truthound_dashboard/api/triggers.py +190 -0
  19. truthound_dashboard/api/validations.py +13 -0
  20. truthound_dashboard/api/validators.py +333 -4
  21. truthound_dashboard/api/versioning.py +309 -0
  22. truthound_dashboard/api/websocket.py +301 -0
  23. truthound_dashboard/core/__init__.py +27 -0
  24. truthound_dashboard/core/anomaly.py +1395 -0
  25. truthound_dashboard/core/anomaly_explainer.py +633 -0
  26. truthound_dashboard/core/cache.py +206 -0
  27. truthound_dashboard/core/cached_services.py +422 -0
  28. truthound_dashboard/core/charts.py +352 -0
  29. truthound_dashboard/core/connections.py +1069 -42
  30. truthound_dashboard/core/cross_alerts.py +837 -0
  31. truthound_dashboard/core/drift_monitor.py +1477 -0
  32. truthound_dashboard/core/drift_sampling.py +669 -0
  33. truthound_dashboard/core/i18n/__init__.py +42 -0
  34. truthound_dashboard/core/i18n/detector.py +173 -0
  35. truthound_dashboard/core/i18n/messages.py +564 -0
  36. truthound_dashboard/core/lineage.py +971 -0
  37. truthound_dashboard/core/maintenance.py +443 -5
  38. truthound_dashboard/core/model_monitoring.py +1043 -0
  39. truthound_dashboard/core/notifications/channels.py +1020 -1
  40. truthound_dashboard/core/notifications/deduplication/__init__.py +143 -0
  41. truthound_dashboard/core/notifications/deduplication/policies.py +274 -0
  42. truthound_dashboard/core/notifications/deduplication/service.py +400 -0
  43. truthound_dashboard/core/notifications/deduplication/stores.py +2365 -0
  44. truthound_dashboard/core/notifications/deduplication/strategies.py +422 -0
  45. truthound_dashboard/core/notifications/dispatcher.py +43 -0
  46. truthound_dashboard/core/notifications/escalation/__init__.py +149 -0
  47. truthound_dashboard/core/notifications/escalation/backends.py +1384 -0
  48. truthound_dashboard/core/notifications/escalation/engine.py +429 -0
  49. truthound_dashboard/core/notifications/escalation/models.py +336 -0
  50. truthound_dashboard/core/notifications/escalation/scheduler.py +1187 -0
  51. truthound_dashboard/core/notifications/escalation/state_machine.py +330 -0
  52. truthound_dashboard/core/notifications/escalation/stores.py +2896 -0
  53. truthound_dashboard/core/notifications/events.py +49 -0
  54. truthound_dashboard/core/notifications/metrics/__init__.py +115 -0
  55. truthound_dashboard/core/notifications/metrics/base.py +528 -0
  56. truthound_dashboard/core/notifications/metrics/collectors.py +583 -0
  57. truthound_dashboard/core/notifications/routing/__init__.py +169 -0
  58. truthound_dashboard/core/notifications/routing/combinators.py +184 -0
  59. truthound_dashboard/core/notifications/routing/config.py +375 -0
  60. truthound_dashboard/core/notifications/routing/config_parser.py +867 -0
  61. truthound_dashboard/core/notifications/routing/engine.py +382 -0
  62. truthound_dashboard/core/notifications/routing/expression_engine.py +1269 -0
  63. truthound_dashboard/core/notifications/routing/jinja2_engine.py +774 -0
  64. truthound_dashboard/core/notifications/routing/rules.py +625 -0
  65. truthound_dashboard/core/notifications/routing/validator.py +678 -0
  66. truthound_dashboard/core/notifications/service.py +2 -0
  67. truthound_dashboard/core/notifications/stats_aggregator.py +850 -0
  68. truthound_dashboard/core/notifications/throttling/__init__.py +83 -0
  69. truthound_dashboard/core/notifications/throttling/builder.py +311 -0
  70. truthound_dashboard/core/notifications/throttling/stores.py +1859 -0
  71. truthound_dashboard/core/notifications/throttling/throttlers.py +633 -0
  72. truthound_dashboard/core/openlineage.py +1028 -0
  73. truthound_dashboard/core/plugins/__init__.py +39 -0
  74. truthound_dashboard/core/plugins/docs/__init__.py +39 -0
  75. truthound_dashboard/core/plugins/docs/extractor.py +703 -0
  76. truthound_dashboard/core/plugins/docs/renderers.py +804 -0
  77. truthound_dashboard/core/plugins/hooks/__init__.py +63 -0
  78. truthound_dashboard/core/plugins/hooks/decorators.py +367 -0
  79. truthound_dashboard/core/plugins/hooks/manager.py +403 -0
  80. truthound_dashboard/core/plugins/hooks/protocols.py +265 -0
  81. truthound_dashboard/core/plugins/lifecycle/__init__.py +41 -0
  82. truthound_dashboard/core/plugins/lifecycle/hot_reload.py +584 -0
  83. truthound_dashboard/core/plugins/lifecycle/machine.py +419 -0
  84. truthound_dashboard/core/plugins/lifecycle/states.py +266 -0
  85. truthound_dashboard/core/plugins/loader.py +504 -0
  86. truthound_dashboard/core/plugins/registry.py +810 -0
  87. truthound_dashboard/core/plugins/reporter_executor.py +588 -0
  88. truthound_dashboard/core/plugins/sandbox/__init__.py +59 -0
  89. truthound_dashboard/core/plugins/sandbox/code_validator.py +243 -0
  90. truthound_dashboard/core/plugins/sandbox/engines.py +770 -0
  91. truthound_dashboard/core/plugins/sandbox/protocols.py +194 -0
  92. truthound_dashboard/core/plugins/sandbox.py +617 -0
  93. truthound_dashboard/core/plugins/security/__init__.py +68 -0
  94. truthound_dashboard/core/plugins/security/analyzer.py +535 -0
  95. truthound_dashboard/core/plugins/security/policies.py +311 -0
  96. truthound_dashboard/core/plugins/security/protocols.py +296 -0
  97. truthound_dashboard/core/plugins/security/signing.py +842 -0
  98. truthound_dashboard/core/plugins/security.py +446 -0
  99. truthound_dashboard/core/plugins/validator_executor.py +401 -0
  100. truthound_dashboard/core/plugins/versioning/__init__.py +51 -0
  101. truthound_dashboard/core/plugins/versioning/constraints.py +377 -0
  102. truthound_dashboard/core/plugins/versioning/dependencies.py +541 -0
  103. truthound_dashboard/core/plugins/versioning/semver.py +266 -0
  104. truthound_dashboard/core/profile_comparison.py +601 -0
  105. truthound_dashboard/core/report_history.py +570 -0
  106. truthound_dashboard/core/reporters/__init__.py +57 -0
  107. truthound_dashboard/core/reporters/base.py +296 -0
  108. truthound_dashboard/core/reporters/csv_reporter.py +155 -0
  109. truthound_dashboard/core/reporters/html_reporter.py +598 -0
  110. truthound_dashboard/core/reporters/i18n/__init__.py +65 -0
  111. truthound_dashboard/core/reporters/i18n/base.py +494 -0
  112. truthound_dashboard/core/reporters/i18n/catalogs.py +930 -0
  113. truthound_dashboard/core/reporters/json_reporter.py +160 -0
  114. truthound_dashboard/core/reporters/junit_reporter.py +233 -0
  115. truthound_dashboard/core/reporters/markdown_reporter.py +207 -0
  116. truthound_dashboard/core/reporters/pdf_reporter.py +209 -0
  117. truthound_dashboard/core/reporters/registry.py +272 -0
  118. truthound_dashboard/core/rule_generator.py +2088 -0
  119. truthound_dashboard/core/scheduler.py +822 -12
  120. truthound_dashboard/core/schema_evolution.py +858 -0
  121. truthound_dashboard/core/services.py +152 -9
  122. truthound_dashboard/core/statistics.py +718 -0
  123. truthound_dashboard/core/streaming_anomaly.py +883 -0
  124. truthound_dashboard/core/triggers/__init__.py +45 -0
  125. truthound_dashboard/core/triggers/base.py +226 -0
  126. truthound_dashboard/core/triggers/evaluators.py +609 -0
  127. truthound_dashboard/core/triggers/factory.py +363 -0
  128. truthound_dashboard/core/unified_alerts.py +870 -0
  129. truthound_dashboard/core/validation_limits.py +509 -0
  130. truthound_dashboard/core/versioning.py +709 -0
  131. truthound_dashboard/core/websocket/__init__.py +59 -0
  132. truthound_dashboard/core/websocket/manager.py +512 -0
  133. truthound_dashboard/core/websocket/messages.py +130 -0
  134. truthound_dashboard/db/__init__.py +30 -0
  135. truthound_dashboard/db/models.py +3375 -3
  136. truthound_dashboard/main.py +22 -0
  137. truthound_dashboard/schemas/__init__.py +396 -1
  138. truthound_dashboard/schemas/anomaly.py +1258 -0
  139. truthound_dashboard/schemas/base.py +4 -0
  140. truthound_dashboard/schemas/cross_alerts.py +334 -0
  141. truthound_dashboard/schemas/drift_monitor.py +890 -0
  142. truthound_dashboard/schemas/lineage.py +428 -0
  143. truthound_dashboard/schemas/maintenance.py +154 -0
  144. truthound_dashboard/schemas/model_monitoring.py +374 -0
  145. truthound_dashboard/schemas/notifications_advanced.py +1363 -0
  146. truthound_dashboard/schemas/openlineage.py +704 -0
  147. truthound_dashboard/schemas/plugins.py +1293 -0
  148. truthound_dashboard/schemas/profile.py +420 -34
  149. truthound_dashboard/schemas/profile_comparison.py +242 -0
  150. truthound_dashboard/schemas/reports.py +285 -0
  151. truthound_dashboard/schemas/rule_suggestion.py +434 -0
  152. truthound_dashboard/schemas/schema_evolution.py +164 -0
  153. truthound_dashboard/schemas/source.py +117 -2
  154. truthound_dashboard/schemas/triggers.py +511 -0
  155. truthound_dashboard/schemas/unified_alerts.py +223 -0
  156. truthound_dashboard/schemas/validation.py +25 -1
  157. truthound_dashboard/schemas/validators/__init__.py +11 -0
  158. truthound_dashboard/schemas/validators/base.py +151 -0
  159. truthound_dashboard/schemas/versioning.py +152 -0
  160. truthound_dashboard/static/index.html +2 -2
  161. {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/METADATA +142 -18
  162. truthound_dashboard-1.4.0.dist-info/RECORD +239 -0
  163. truthound_dashboard/static/assets/index-BCA8H1hO.js +0 -574
  164. truthound_dashboard/static/assets/index-BNsSQ2fN.css +0 -1
  165. truthound_dashboard/static/assets/unmerged_dictionaries-CsJWCRx9.js +0 -1
  166. truthound_dashboard-1.3.0.dist-info/RECORD +0 -110
  167. {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/WHEEL +0 -0
  168. {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/entry_points.txt +0 -0
  169. {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,1384 @@
1
+ """Scheduler backend implementations for persistent and in-memory job storage.
2
+
3
+ This module provides abstract and concrete implementations for scheduler backends,
4
+ enabling different storage strategies for APScheduler jobs.
5
+
6
+ Backends:
7
+ - InMemorySchedulerBackend: Fast, ephemeral storage (lost on restart)
8
+ - SQLAlchemySchedulerBackend: Persistent SQLite storage (survives restarts)
9
+
10
+ Features:
11
+ - Abstract base class for custom implementations
12
+ - Configurable misfire handling with grace time
13
+ - Exponential backoff for error recovery
14
+ - Job coalescing to prevent duplicate executions
15
+ - Thread-safe operations with proper locking
16
+ - Graceful shutdown with pending job handling
17
+
18
+ Usage:
19
+ from truthound_dashboard.core.notifications.escalation.backends import (
20
+ SQLAlchemySchedulerBackend,
21
+ SchedulerBackendConfig,
22
+ )
23
+
24
+ config = SchedulerBackendConfig(
25
+ backend_type="sqlalchemy",
26
+ misfire_grace_time=60,
27
+ coalesce=True,
28
+ )
29
+
30
+ backend = SQLAlchemySchedulerBackend(config)
31
+ await backend.initialize()
32
+ await backend.add_job(job_data)
33
+ """
34
+
35
+ from __future__ import annotations
36
+
37
+ import asyncio
38
+ import json
39
+ import logging
40
+ import os
41
+ from abc import ABC, abstractmethod
42
+ from dataclasses import dataclass, field
43
+ from datetime import datetime, timedelta
44
+ from enum import Enum
45
+ from typing import Any, Callable
46
+ from uuid import uuid4
47
+
48
+ logger = logging.getLogger(__name__)
49
+
50
+
51
+ # =============================================================================
52
+ # Enums and Configuration
53
+ # =============================================================================
54
+
55
+
56
+ class BackendType(str, Enum):
57
+ """Type of scheduler backend."""
58
+
59
+ MEMORY = "memory"
60
+ SQLALCHEMY = "sqlalchemy"
61
+ # Future: REDIS = "redis"
62
+
63
+
64
+ class JobState(str, Enum):
65
+ """State of a scheduled job."""
66
+
67
+ PENDING = "pending"
68
+ RUNNING = "running"
69
+ COMPLETED = "completed"
70
+ FAILED = "failed"
71
+ MISFIRED = "misfired"
72
+ PAUSED = "paused"
73
+
74
+
75
+ class MisfirePolicy(str, Enum):
76
+ """Policy for handling misfired jobs.
77
+
78
+ - SKIP: Skip the misfired execution entirely
79
+ - RUN_ONCE: Run once if misfired (coalesce multiple misfires)
80
+ - RUN_ALL: Run all misfired executions (catch up)
81
+ """
82
+
83
+ SKIP = "skip"
84
+ RUN_ONCE = "run_once"
85
+ RUN_ALL = "run_all"
86
+
87
+
88
+ @dataclass
89
+ class SchedulerBackendConfig:
90
+ """Configuration for scheduler backends.
91
+
92
+ Attributes:
93
+ backend_type: Type of backend (memory, sqlalchemy).
94
+ misfire_grace_time: Seconds to allow for late job execution.
95
+ coalesce: Combine multiple pending executions into one.
96
+ max_instances: Maximum concurrent instances of same job.
97
+ max_retries: Maximum retry attempts on failure.
98
+ retry_base_delay: Base delay in seconds for exponential backoff.
99
+ retry_max_delay: Maximum delay in seconds for backoff.
100
+ shutdown_timeout: Seconds to wait for jobs during shutdown.
101
+ database_url: Database URL for SQLAlchemy backend.
102
+ job_table_name: Table name for job storage.
103
+ cleanup_interval: Seconds between cleanup runs.
104
+ job_retention_days: Days to retain completed jobs.
105
+ """
106
+
107
+ backend_type: BackendType = BackendType.SQLALCHEMY
108
+ misfire_grace_time: int = 60
109
+ coalesce: bool = True
110
+ max_instances: int = 1
111
+ max_retries: int = 3
112
+ retry_base_delay: float = 5.0
113
+ retry_max_delay: float = 300.0
114
+ shutdown_timeout: float = 30.0
115
+ database_url: str | None = None
116
+ job_table_name: str = "scheduler_jobs"
117
+ cleanup_interval: int = 3600 # 1 hour
118
+ job_retention_days: int = 7
119
+
120
+ @classmethod
121
+ def from_env(cls) -> SchedulerBackendConfig:
122
+ """Create configuration from environment variables.
123
+
124
+ Environment variables:
125
+ TRUTHOUND_SCHEDULER_BACKEND: Backend type (memory, sqlalchemy)
126
+ TRUTHOUND_SCHEDULER_MISFIRE_GRACE_TIME: Seconds for misfire grace
127
+ TRUTHOUND_SCHEDULER_COALESCE: Whether to coalesce jobs (true/false)
128
+ TRUTHOUND_SCHEDULER_MAX_RETRIES: Maximum retry attempts
129
+ TRUTHOUND_SCHEDULER_SHUTDOWN_TIMEOUT: Shutdown timeout seconds
130
+ """
131
+ return cls(
132
+ backend_type=BackendType(
133
+ os.getenv("TRUTHOUND_SCHEDULER_BACKEND", "sqlalchemy")
134
+ ),
135
+ misfire_grace_time=int(
136
+ os.getenv("TRUTHOUND_SCHEDULER_MISFIRE_GRACE_TIME", "60")
137
+ ),
138
+ coalesce=os.getenv("TRUTHOUND_SCHEDULER_COALESCE", "true").lower() == "true",
139
+ max_retries=int(os.getenv("TRUTHOUND_SCHEDULER_MAX_RETRIES", "3")),
140
+ shutdown_timeout=float(
141
+ os.getenv("TRUTHOUND_SCHEDULER_SHUTDOWN_TIMEOUT", "30")
142
+ ),
143
+ job_retention_days=int(
144
+ os.getenv("TRUTHOUND_SCHEDULER_JOB_RETENTION_DAYS", "7")
145
+ ),
146
+ )
147
+
148
+
149
+ @dataclass
150
+ class JobData:
151
+ """Data structure for a scheduled job.
152
+
153
+ Attributes:
154
+ id: Unique job identifier.
155
+ name: Human-readable job name.
156
+ func_ref: Reference to the function to execute.
157
+ trigger_type: Type of trigger (interval, cron, date).
158
+ trigger_args: Arguments for the trigger.
159
+ args: Positional arguments for the function.
160
+ kwargs: Keyword arguments for the function.
161
+ next_run_time: Next scheduled execution time.
162
+ state: Current job state.
163
+ retry_count: Number of retry attempts.
164
+ last_run_time: Last execution time.
165
+ last_error: Last error message.
166
+ metadata: Additional job metadata.
167
+ created_at: When the job was created.
168
+ updated_at: Last update timestamp.
169
+ """
170
+
171
+ id: str = field(default_factory=lambda: str(uuid4()))
172
+ name: str = ""
173
+ func_ref: str = ""
174
+ trigger_type: str = "interval"
175
+ trigger_args: dict[str, Any] = field(default_factory=dict)
176
+ args: tuple[Any, ...] = field(default_factory=tuple)
177
+ kwargs: dict[str, Any] = field(default_factory=dict)
178
+ next_run_time: datetime | None = None
179
+ state: JobState = JobState.PENDING
180
+ retry_count: int = 0
181
+ last_run_time: datetime | None = None
182
+ last_error: str | None = None
183
+ metadata: dict[str, Any] = field(default_factory=dict)
184
+ created_at: datetime = field(default_factory=datetime.utcnow)
185
+ updated_at: datetime = field(default_factory=datetime.utcnow)
186
+
187
+ def to_dict(self) -> dict[str, Any]:
188
+ """Convert to dictionary for serialization."""
189
+ return {
190
+ "id": self.id,
191
+ "name": self.name,
192
+ "func_ref": self.func_ref,
193
+ "trigger_type": self.trigger_type,
194
+ "trigger_args": self.trigger_args,
195
+ "args": list(self.args),
196
+ "kwargs": self.kwargs,
197
+ "next_run_time": self.next_run_time.isoformat() if self.next_run_time else None,
198
+ "state": self.state.value,
199
+ "retry_count": self.retry_count,
200
+ "last_run_time": self.last_run_time.isoformat() if self.last_run_time else None,
201
+ "last_error": self.last_error,
202
+ "metadata": self.metadata,
203
+ "created_at": self.created_at.isoformat(),
204
+ "updated_at": self.updated_at.isoformat(),
205
+ }
206
+
207
+ @classmethod
208
+ def from_dict(cls, data: dict[str, Any]) -> JobData:
209
+ """Create from dictionary."""
210
+ return cls(
211
+ id=data.get("id", str(uuid4())),
212
+ name=data.get("name", ""),
213
+ func_ref=data.get("func_ref", ""),
214
+ trigger_type=data.get("trigger_type", "interval"),
215
+ trigger_args=data.get("trigger_args", {}),
216
+ args=tuple(data.get("args", [])),
217
+ kwargs=data.get("kwargs", {}),
218
+ next_run_time=(
219
+ datetime.fromisoformat(data["next_run_time"])
220
+ if data.get("next_run_time")
221
+ else None
222
+ ),
223
+ state=JobState(data.get("state", "pending")),
224
+ retry_count=data.get("retry_count", 0),
225
+ last_run_time=(
226
+ datetime.fromisoformat(data["last_run_time"])
227
+ if data.get("last_run_time")
228
+ else None
229
+ ),
230
+ last_error=data.get("last_error"),
231
+ metadata=data.get("metadata", {}),
232
+ created_at=(
233
+ datetime.fromisoformat(data["created_at"])
234
+ if data.get("created_at")
235
+ else datetime.utcnow()
236
+ ),
237
+ updated_at=(
238
+ datetime.fromisoformat(data["updated_at"])
239
+ if data.get("updated_at")
240
+ else datetime.utcnow()
241
+ ),
242
+ )
243
+
244
+
245
+ @dataclass
246
+ class JobExecutionResult:
247
+ """Result of a job execution.
248
+
249
+ Attributes:
250
+ success: Whether the execution succeeded.
251
+ result: Return value from the job function.
252
+ error: Error message if failed.
253
+ duration_ms: Execution duration in milliseconds.
254
+ retry_scheduled: Whether a retry was scheduled.
255
+ """
256
+
257
+ success: bool
258
+ result: Any = None
259
+ error: str | None = None
260
+ duration_ms: int = 0
261
+ retry_scheduled: bool = False
262
+
263
+
264
+ # =============================================================================
265
+ # Abstract Base Class
266
+ # =============================================================================
267
+
268
+
269
+ class SchedulerBackend(ABC):
270
+ """Abstract base class for scheduler backends.
271
+
272
+ Subclasses must implement all abstract methods to provide
273
+ custom storage strategies for scheduled jobs.
274
+
275
+ This class defines the contract for:
276
+ - Job lifecycle management (add, update, remove)
277
+ - Job retrieval and querying
278
+ - Misfire handling
279
+ - Error recovery
280
+ - Cleanup and maintenance
281
+ """
282
+
283
+ def __init__(self, config: SchedulerBackendConfig | None = None) -> None:
284
+ """Initialize the backend.
285
+
286
+ Args:
287
+ config: Backend configuration. Uses defaults if None.
288
+ """
289
+ self.config = config or SchedulerBackendConfig()
290
+ self._initialized = False
291
+ self._shutdown = False
292
+ self._lock = asyncio.Lock()
293
+ self._running_jobs: set[str] = set()
294
+
295
+ @property
296
+ @abstractmethod
297
+ def backend_type(self) -> BackendType:
298
+ """Return the backend type identifier."""
299
+ ...
300
+
301
+ @abstractmethod
302
+ async def initialize(self) -> None:
303
+ """Initialize the backend (create tables, connections, etc.).
304
+
305
+ This method is called before any other operations and should
306
+ set up any required infrastructure.
307
+ """
308
+ ...
309
+
310
+ @abstractmethod
311
+ async def shutdown(self) -> None:
312
+ """Shutdown the backend gracefully.
313
+
314
+ Should wait for running jobs and clean up resources.
315
+ """
316
+ ...
317
+
318
+ @abstractmethod
319
+ async def add_job(self, job: JobData) -> JobData:
320
+ """Add a new job to the scheduler.
321
+
322
+ Args:
323
+ job: Job data to add.
324
+
325
+ Returns:
326
+ The added job with any modifications (e.g., assigned ID).
327
+
328
+ Raises:
329
+ ValueError: If job with same ID already exists.
330
+ """
331
+ ...
332
+
333
+ @abstractmethod
334
+ async def update_job(self, job: JobData) -> JobData:
335
+ """Update an existing job.
336
+
337
+ Args:
338
+ job: Updated job data.
339
+
340
+ Returns:
341
+ The updated job.
342
+
343
+ Raises:
344
+ KeyError: If job not found.
345
+ """
346
+ ...
347
+
348
+ @abstractmethod
349
+ async def remove_job(self, job_id: str) -> bool:
350
+ """Remove a job from the scheduler.
351
+
352
+ Args:
353
+ job_id: ID of job to remove.
354
+
355
+ Returns:
356
+ True if job was removed, False if not found.
357
+ """
358
+ ...
359
+
360
+ @abstractmethod
361
+ async def get_job(self, job_id: str) -> JobData | None:
362
+ """Get a job by ID.
363
+
364
+ Args:
365
+ job_id: Job ID to retrieve.
366
+
367
+ Returns:
368
+ Job data or None if not found.
369
+ """
370
+ ...
371
+
372
+ @abstractmethod
373
+ async def get_jobs(
374
+ self,
375
+ state: JobState | None = None,
376
+ limit: int | None = None,
377
+ ) -> list[JobData]:
378
+ """Get jobs, optionally filtered by state.
379
+
380
+ Args:
381
+ state: Optional state filter.
382
+ limit: Maximum number of jobs to return.
383
+
384
+ Returns:
385
+ List of matching jobs.
386
+ """
387
+ ...
388
+
389
+ @abstractmethod
390
+ async def get_due_jobs(self, now: datetime | None = None) -> list[JobData]:
391
+ """Get jobs that are due for execution.
392
+
393
+ Args:
394
+ now: Current time (defaults to utcnow).
395
+
396
+ Returns:
397
+ List of jobs ready to run.
398
+ """
399
+ ...
400
+
401
+ @abstractmethod
402
+ async def mark_job_running(self, job_id: str) -> bool:
403
+ """Mark a job as running.
404
+
405
+ Args:
406
+ job_id: Job ID to mark.
407
+
408
+ Returns:
409
+ True if marked successfully, False if already running or not found.
410
+ """
411
+ ...
412
+
413
+ @abstractmethod
414
+ async def mark_job_completed(
415
+ self,
416
+ job_id: str,
417
+ next_run_time: datetime | None = None,
418
+ ) -> bool:
419
+ """Mark a job as completed.
420
+
421
+ Args:
422
+ job_id: Job ID to mark.
423
+ next_run_time: Next scheduled run time (for recurring jobs).
424
+
425
+ Returns:
426
+ True if marked successfully.
427
+ """
428
+ ...
429
+
430
+ @abstractmethod
431
+ async def mark_job_failed(
432
+ self,
433
+ job_id: str,
434
+ error: str,
435
+ schedule_retry: bool = True,
436
+ ) -> bool:
437
+ """Mark a job as failed.
438
+
439
+ Args:
440
+ job_id: Job ID to mark.
441
+ error: Error message.
442
+ schedule_retry: Whether to schedule a retry.
443
+
444
+ Returns:
445
+ True if marked successfully.
446
+ """
447
+ ...
448
+
449
+ @abstractmethod
450
+ async def cleanup_old_jobs(self, older_than: datetime) -> int:
451
+ """Remove completed/failed jobs older than specified time.
452
+
453
+ Args:
454
+ older_than: Remove jobs updated before this time.
455
+
456
+ Returns:
457
+ Number of jobs removed.
458
+ """
459
+ ...
460
+
461
+ # -------------------------------------------------------------------------
462
+ # Default implementations
463
+ # -------------------------------------------------------------------------
464
+
465
+ def calculate_retry_delay(self, retry_count: int) -> float:
466
+ """Calculate exponential backoff delay for retry.
467
+
468
+ Args:
469
+ retry_count: Current retry attempt number.
470
+
471
+ Returns:
472
+ Delay in seconds before next retry.
473
+ """
474
+ delay = self.config.retry_base_delay * (2 ** retry_count)
475
+ return min(delay, self.config.retry_max_delay)
476
+
477
+ def is_misfired(self, job: JobData, now: datetime | None = None) -> bool:
478
+ """Check if a job has misfired.
479
+
480
+ A job is considered misfired if its next_run_time plus the
481
+ misfire grace time is before the current time.
482
+
483
+ Args:
484
+ job: Job to check.
485
+ now: Current time (defaults to utcnow).
486
+
487
+ Returns:
488
+ True if the job has misfired.
489
+ """
490
+ if not job.next_run_time:
491
+ return False
492
+
493
+ now = now or datetime.utcnow()
494
+ grace_deadline = job.next_run_time + timedelta(
495
+ seconds=self.config.misfire_grace_time
496
+ )
497
+ return now > grace_deadline
498
+
499
+ async def handle_misfire(self, job: JobData) -> JobData:
500
+ """Handle a misfired job according to policy.
501
+
502
+ Args:
503
+ job: The misfired job.
504
+
505
+ Returns:
506
+ Updated job data.
507
+ """
508
+ logger.warning(
509
+ f"Job {job.id} ({job.name}) misfired. "
510
+ f"Scheduled: {job.next_run_time}, Grace: {self.config.misfire_grace_time}s"
511
+ )
512
+
513
+ # Mark as misfired
514
+ job.state = JobState.MISFIRED
515
+ job.updated_at = datetime.utcnow()
516
+ job.metadata["misfire_count"] = job.metadata.get("misfire_count", 0) + 1
517
+ job.metadata["last_misfire_at"] = datetime.utcnow().isoformat()
518
+
519
+ # Calculate new next_run_time based on trigger
520
+ if job.trigger_type == "interval":
521
+ interval_seconds = job.trigger_args.get("seconds", 60)
522
+ job.next_run_time = datetime.utcnow() + timedelta(seconds=interval_seconds)
523
+ job.state = JobState.PENDING
524
+
525
+ await self.update_job(job)
526
+ return job
527
+
528
+ def get_status(self) -> dict[str, Any]:
529
+ """Get backend status information.
530
+
531
+ Returns:
532
+ Status dictionary with backend state and metrics.
533
+ """
534
+ return {
535
+ "backend_type": self.backend_type.value,
536
+ "initialized": self._initialized,
537
+ "shutdown": self._shutdown,
538
+ "running_jobs": len(self._running_jobs),
539
+ "config": {
540
+ "misfire_grace_time": self.config.misfire_grace_time,
541
+ "coalesce": self.config.coalesce,
542
+ "max_retries": self.config.max_retries,
543
+ "max_instances": self.config.max_instances,
544
+ },
545
+ }
546
+
547
+
548
+ # =============================================================================
549
+ # In-Memory Backend
550
+ # =============================================================================
551
+
552
+
553
+ class InMemorySchedulerBackend(SchedulerBackend):
554
+ """In-memory scheduler backend for ephemeral job storage.
555
+
556
+ Jobs are stored in memory and lost on process restart.
557
+ Best for development, testing, or non-critical workloads.
558
+
559
+ Features:
560
+ - Fast access without database overhead
561
+ - Thread-safe with asyncio locks
562
+ - Supports all job lifecycle operations
563
+
564
+ Limitations:
565
+ - Jobs lost on restart
566
+ - Not suitable for multi-process deployments
567
+ - Memory grows with job count
568
+ """
569
+
570
+ def __init__(self, config: SchedulerBackendConfig | None = None) -> None:
571
+ """Initialize in-memory backend."""
572
+ if config is None:
573
+ config = SchedulerBackendConfig(backend_type=BackendType.MEMORY)
574
+ super().__init__(config)
575
+ self._jobs: dict[str, JobData] = {}
576
+
577
+ @property
578
+ def backend_type(self) -> BackendType:
579
+ return BackendType.MEMORY
580
+
581
+ async def initialize(self) -> None:
582
+ """Initialize the in-memory backend."""
583
+ if self._initialized:
584
+ return
585
+
586
+ logger.info("Initializing in-memory scheduler backend")
587
+ self._jobs.clear()
588
+ self._initialized = True
589
+ self._shutdown = False
590
+ logger.info("In-memory scheduler backend initialized")
591
+
592
+ async def shutdown(self) -> None:
593
+ """Shutdown the in-memory backend."""
594
+ if self._shutdown:
595
+ return
596
+
597
+ logger.info("Shutting down in-memory scheduler backend")
598
+
599
+ # Wait for running jobs
600
+ if self._running_jobs:
601
+ logger.info(f"Waiting for {len(self._running_jobs)} running jobs...")
602
+ wait_until = datetime.utcnow() + timedelta(
603
+ seconds=self.config.shutdown_timeout
604
+ )
605
+ while self._running_jobs and datetime.utcnow() < wait_until:
606
+ await asyncio.sleep(0.5)
607
+
608
+ if self._running_jobs:
609
+ logger.warning(
610
+ f"Timeout waiting for jobs: {self._running_jobs}"
611
+ )
612
+
613
+ self._shutdown = True
614
+ self._initialized = False
615
+ logger.info("In-memory scheduler backend shut down")
616
+
617
+ async def add_job(self, job: JobData) -> JobData:
618
+ """Add a job to memory."""
619
+ async with self._lock:
620
+ if job.id in self._jobs:
621
+ raise ValueError(f"Job {job.id} already exists")
622
+
623
+ job.created_at = datetime.utcnow()
624
+ job.updated_at = datetime.utcnow()
625
+ self._jobs[job.id] = job
626
+ logger.debug(f"Added job {job.id} ({job.name})")
627
+ return job
628
+
629
+ async def update_job(self, job: JobData) -> JobData:
630
+ """Update a job in memory."""
631
+ async with self._lock:
632
+ if job.id not in self._jobs:
633
+ raise KeyError(f"Job {job.id} not found")
634
+
635
+ job.updated_at = datetime.utcnow()
636
+ self._jobs[job.id] = job
637
+ logger.debug(f"Updated job {job.id} ({job.name})")
638
+ return job
639
+
640
+ async def remove_job(self, job_id: str) -> bool:
641
+ """Remove a job from memory."""
642
+ async with self._lock:
643
+ if job_id in self._jobs:
644
+ del self._jobs[job_id]
645
+ logger.debug(f"Removed job {job_id}")
646
+ return True
647
+ return False
648
+
649
+ async def get_job(self, job_id: str) -> JobData | None:
650
+ """Get a job by ID."""
651
+ return self._jobs.get(job_id)
652
+
653
+ async def get_jobs(
654
+ self,
655
+ state: JobState | None = None,
656
+ limit: int | None = None,
657
+ ) -> list[JobData]:
658
+ """Get jobs, optionally filtered by state."""
659
+ jobs = list(self._jobs.values())
660
+
661
+ if state:
662
+ jobs = [j for j in jobs if j.state == state]
663
+
664
+ # Sort by next_run_time
665
+ jobs.sort(key=lambda j: j.next_run_time or datetime.max)
666
+
667
+ if limit:
668
+ jobs = jobs[:limit]
669
+
670
+ return jobs
671
+
672
+ async def get_due_jobs(self, now: datetime | None = None) -> list[JobData]:
673
+ """Get jobs due for execution."""
674
+ now = now or datetime.utcnow()
675
+ due_jobs = []
676
+
677
+ for job in self._jobs.values():
678
+ if job.state not in (JobState.PENDING, JobState.MISFIRED):
679
+ continue
680
+ if job.id in self._running_jobs:
681
+ continue
682
+ if job.next_run_time and job.next_run_time <= now:
683
+ due_jobs.append(job)
684
+
685
+ # Sort by next_run_time (earliest first)
686
+ due_jobs.sort(key=lambda j: j.next_run_time or datetime.min)
687
+ return due_jobs
688
+
689
+ async def mark_job_running(self, job_id: str) -> bool:
690
+ """Mark a job as running."""
691
+ async with self._lock:
692
+ job = self._jobs.get(job_id)
693
+ if not job:
694
+ return False
695
+
696
+ if job_id in self._running_jobs:
697
+ return False
698
+
699
+ # Check max instances
700
+ if len(self._running_jobs) >= self.config.max_instances:
701
+ logger.debug(f"Max instances reached, cannot run {job_id}")
702
+ return False
703
+
704
+ job.state = JobState.RUNNING
705
+ job.last_run_time = datetime.utcnow()
706
+ job.updated_at = datetime.utcnow()
707
+ self._running_jobs.add(job_id)
708
+ logger.debug(f"Job {job_id} marked as running")
709
+ return True
710
+
711
+ async def mark_job_completed(
712
+ self,
713
+ job_id: str,
714
+ next_run_time: datetime | None = None,
715
+ ) -> bool:
716
+ """Mark a job as completed."""
717
+ async with self._lock:
718
+ job = self._jobs.get(job_id)
719
+ if not job:
720
+ return False
721
+
722
+ job.state = JobState.COMPLETED if not next_run_time else JobState.PENDING
723
+ job.next_run_time = next_run_time
724
+ job.retry_count = 0
725
+ job.last_error = None
726
+ job.updated_at = datetime.utcnow()
727
+ self._running_jobs.discard(job_id)
728
+ logger.debug(f"Job {job_id} marked as completed")
729
+ return True
730
+
731
+ async def mark_job_failed(
732
+ self,
733
+ job_id: str,
734
+ error: str,
735
+ schedule_retry: bool = True,
736
+ ) -> bool:
737
+ """Mark a job as failed."""
738
+ async with self._lock:
739
+ job = self._jobs.get(job_id)
740
+ if not job:
741
+ return False
742
+
743
+ job.last_error = error
744
+ job.updated_at = datetime.utcnow()
745
+ self._running_jobs.discard(job_id)
746
+
747
+ if schedule_retry and job.retry_count < self.config.max_retries:
748
+ job.retry_count += 1
749
+ delay = self.calculate_retry_delay(job.retry_count)
750
+ job.next_run_time = datetime.utcnow() + timedelta(seconds=delay)
751
+ job.state = JobState.PENDING
752
+ logger.info(
753
+ f"Job {job_id} failed, retry {job.retry_count}/{self.config.max_retries} "
754
+ f"scheduled in {delay:.1f}s"
755
+ )
756
+ else:
757
+ job.state = JobState.FAILED
758
+ logger.error(f"Job {job_id} failed permanently: {error}")
759
+
760
+ return True
761
+
762
+ async def cleanup_old_jobs(self, older_than: datetime) -> int:
763
+ """Remove old completed/failed jobs."""
764
+ async with self._lock:
765
+ to_remove = []
766
+ for job_id, job in self._jobs.items():
767
+ if job.state in (JobState.COMPLETED, JobState.FAILED):
768
+ if job.updated_at < older_than:
769
+ to_remove.append(job_id)
770
+
771
+ for job_id in to_remove:
772
+ del self._jobs[job_id]
773
+
774
+ if to_remove:
775
+ logger.info(f"Cleaned up {len(to_remove)} old jobs")
776
+ return len(to_remove)
777
+
778
+
779
+ # =============================================================================
780
+ # SQLAlchemy Backend
781
+ # =============================================================================
782
+
783
+
784
+ class SQLAlchemySchedulerBackend(SchedulerBackend):
785
+ """SQLAlchemy-based scheduler backend for persistent job storage.
786
+
787
+ Jobs are stored in SQLite database and survive process restarts.
788
+ Suitable for production workloads requiring durability.
789
+
790
+ Features:
791
+ - Persistent storage in SQLite
792
+ - Automatic table creation
793
+ - Thread-safe with row-level locking
794
+ - Supports all job lifecycle operations
795
+ - Automatic cleanup of old jobs
796
+
797
+ Usage:
798
+ backend = SQLAlchemySchedulerBackend(config)
799
+ await backend.initialize()
800
+
801
+ job = JobData(name="my_job", func_ref="module:function")
802
+ await backend.add_job(job)
803
+ """
804
+
805
+ def __init__(self, config: SchedulerBackendConfig | None = None) -> None:
806
+ """Initialize SQLAlchemy backend."""
807
+ if config is None:
808
+ config = SchedulerBackendConfig(backend_type=BackendType.SQLALCHEMY)
809
+ super().__init__(config)
810
+ self._cleanup_task: asyncio.Task | None = None
811
+
812
+ @property
813
+ def backend_type(self) -> BackendType:
814
+ return BackendType.SQLALCHEMY
815
+
816
+ async def initialize(self) -> None:
817
+ """Initialize the SQLAlchemy backend and ensure table exists."""
818
+ if self._initialized:
819
+ return
820
+
821
+ logger.info("Initializing SQLAlchemy scheduler backend")
822
+
823
+ try:
824
+ # Import here to avoid circular imports
825
+ from ....db import get_session, init_db
826
+
827
+ # Ensure database tables are created
828
+ await init_db()
829
+
830
+ # Recover misfired jobs on startup
831
+ await self._recover_misfired_jobs()
832
+
833
+ # Start cleanup task
834
+ self._cleanup_task = asyncio.create_task(self._periodic_cleanup())
835
+
836
+ self._initialized = True
837
+ self._shutdown = False
838
+ logger.info("SQLAlchemy scheduler backend initialized")
839
+
840
+ except Exception as e:
841
+ logger.error(f"Failed to initialize SQLAlchemy backend: {e}")
842
+ raise
843
+
844
+ async def shutdown(self) -> None:
845
+ """Shutdown the SQLAlchemy backend."""
846
+ if self._shutdown:
847
+ return
848
+
849
+ logger.info("Shutting down SQLAlchemy scheduler backend")
850
+
851
+ # Cancel cleanup task
852
+ if self._cleanup_task:
853
+ self._cleanup_task.cancel()
854
+ try:
855
+ await self._cleanup_task
856
+ except asyncio.CancelledError:
857
+ pass
858
+
859
+ # Wait for running jobs
860
+ if self._running_jobs:
861
+ logger.info(f"Waiting for {len(self._running_jobs)} running jobs...")
862
+ wait_until = datetime.utcnow() + timedelta(
863
+ seconds=self.config.shutdown_timeout
864
+ )
865
+ while self._running_jobs and datetime.utcnow() < wait_until:
866
+ await asyncio.sleep(0.5)
867
+
868
+ # Mark remaining running jobs as pending for recovery on restart
869
+ if self._running_jobs:
870
+ logger.warning(
871
+ f"Marking {len(self._running_jobs)} jobs as pending for recovery"
872
+ )
873
+ for job_id in list(self._running_jobs):
874
+ try:
875
+ job = await self.get_job(job_id)
876
+ if job:
877
+ job.state = JobState.PENDING
878
+ await self.update_job(job)
879
+ except Exception as e:
880
+ logger.error(f"Error recovering job {job_id}: {e}")
881
+
882
+ self._shutdown = True
883
+ self._initialized = False
884
+ logger.info("SQLAlchemy scheduler backend shut down")
885
+
886
+ async def _recover_misfired_jobs(self) -> None:
887
+ """Recover jobs that were running during previous shutdown."""
888
+ from ....db import get_session
889
+ from ....db.models import SchedulerJob
890
+
891
+ try:
892
+ async with get_session() as session:
893
+ from sqlalchemy import select, update
894
+
895
+ # Find jobs that were left in running state
896
+ result = await session.execute(
897
+ select(SchedulerJob).where(
898
+ SchedulerJob.state == JobState.RUNNING.value
899
+ )
900
+ )
901
+ running_jobs = result.scalars().all()
902
+
903
+ for db_job in running_jobs:
904
+ logger.info(f"Recovering job {db_job.id} from running state")
905
+ db_job.state = JobState.PENDING.value
906
+ db_job.updated_at = datetime.utcnow()
907
+
908
+ await session.commit()
909
+
910
+ if running_jobs:
911
+ logger.info(f"Recovered {len(running_jobs)} jobs from running state")
912
+
913
+ except Exception as e:
914
+ logger.error(f"Error recovering misfired jobs: {e}")
915
+
916
+ async def _periodic_cleanup(self) -> None:
917
+ """Periodically clean up old jobs."""
918
+ while not self._shutdown:
919
+ try:
920
+ await asyncio.sleep(self.config.cleanup_interval)
921
+ if self._shutdown:
922
+ break
923
+
924
+ older_than = datetime.utcnow() - timedelta(
925
+ days=self.config.job_retention_days
926
+ )
927
+ removed = await self.cleanup_old_jobs(older_than)
928
+ if removed > 0:
929
+ logger.debug(f"Periodic cleanup removed {removed} old jobs")
930
+
931
+ except asyncio.CancelledError:
932
+ break
933
+ except Exception as e:
934
+ logger.error(f"Error in periodic cleanup: {e}")
935
+
936
+ def _job_to_model(self, job: JobData) -> "SchedulerJob":
937
+ """Convert JobData to database model."""
938
+ from ....db.models import SchedulerJob
939
+
940
+ return SchedulerJob(
941
+ id=job.id,
942
+ name=job.name,
943
+ func_ref=job.func_ref,
944
+ trigger_type=job.trigger_type,
945
+ trigger_args=job.trigger_args,
946
+ args=list(job.args),
947
+ kwargs=job.kwargs,
948
+ next_run_time=job.next_run_time,
949
+ state=job.state.value,
950
+ retry_count=job.retry_count,
951
+ last_run_time=job.last_run_time,
952
+ last_error=job.last_error,
953
+ job_metadata=job.metadata,
954
+ created_at=job.created_at,
955
+ updated_at=job.updated_at,
956
+ )
957
+
958
+ def _model_to_job(self, model: "SchedulerJob") -> JobData:
959
+ """Convert database model to JobData."""
960
+ return JobData(
961
+ id=model.id,
962
+ name=model.name,
963
+ func_ref=model.func_ref,
964
+ trigger_type=model.trigger_type,
965
+ trigger_args=model.trigger_args or {},
966
+ args=tuple(model.args) if model.args else (),
967
+ kwargs=model.kwargs or {},
968
+ next_run_time=model.next_run_time,
969
+ state=JobState(model.state),
970
+ retry_count=model.retry_count,
971
+ last_run_time=model.last_run_time,
972
+ last_error=model.last_error,
973
+ metadata=model.job_metadata or {},
974
+ created_at=model.created_at,
975
+ updated_at=model.updated_at,
976
+ )
977
+
978
+ async def add_job(self, job: JobData) -> JobData:
979
+ """Add a job to the database."""
980
+ from ....db import get_session
981
+ from ....db.models import SchedulerJob
982
+
983
+ async with self._lock:
984
+ try:
985
+ async with get_session() as session:
986
+ from sqlalchemy import select
987
+
988
+ # Check if job exists
989
+ result = await session.execute(
990
+ select(SchedulerJob).where(SchedulerJob.id == job.id)
991
+ )
992
+ if result.scalar_one_or_none():
993
+ raise ValueError(f"Job {job.id} already exists")
994
+
995
+ job.created_at = datetime.utcnow()
996
+ job.updated_at = datetime.utcnow()
997
+
998
+ db_job = self._job_to_model(job)
999
+ session.add(db_job)
1000
+ await session.commit()
1001
+
1002
+ logger.debug(f"Added job {job.id} ({job.name})")
1003
+ return job
1004
+
1005
+ except ValueError:
1006
+ raise
1007
+ except Exception as e:
1008
+ logger.error(f"Error adding job {job.id}: {e}")
1009
+ raise
1010
+
1011
+ async def update_job(self, job: JobData) -> JobData:
1012
+ """Update a job in the database."""
1013
+ from ....db import get_session
1014
+ from ....db.models import SchedulerJob
1015
+
1016
+ async with self._lock:
1017
+ try:
1018
+ async with get_session() as session:
1019
+ from sqlalchemy import select
1020
+
1021
+ result = await session.execute(
1022
+ select(SchedulerJob).where(SchedulerJob.id == job.id)
1023
+ )
1024
+ db_job = result.scalar_one_or_none()
1025
+
1026
+ if not db_job:
1027
+ raise KeyError(f"Job {job.id} not found")
1028
+
1029
+ job.updated_at = datetime.utcnow()
1030
+
1031
+ db_job.name = job.name
1032
+ db_job.func_ref = job.func_ref
1033
+ db_job.trigger_type = job.trigger_type
1034
+ db_job.trigger_args = job.trigger_args
1035
+ db_job.args = list(job.args)
1036
+ db_job.kwargs = job.kwargs
1037
+ db_job.next_run_time = job.next_run_time
1038
+ db_job.state = job.state.value
1039
+ db_job.retry_count = job.retry_count
1040
+ db_job.last_run_time = job.last_run_time
1041
+ db_job.last_error = job.last_error
1042
+ db_job.job_metadata = job.metadata
1043
+ db_job.updated_at = job.updated_at
1044
+
1045
+ await session.commit()
1046
+ logger.debug(f"Updated job {job.id} ({job.name})")
1047
+ return job
1048
+
1049
+ except KeyError:
1050
+ raise
1051
+ except Exception as e:
1052
+ logger.error(f"Error updating job {job.id}: {e}")
1053
+ raise
1054
+
1055
+ async def remove_job(self, job_id: str) -> bool:
1056
+ """Remove a job from the database."""
1057
+ from ....db import get_session
1058
+ from ....db.models import SchedulerJob
1059
+
1060
+ async with self._lock:
1061
+ try:
1062
+ async with get_session() as session:
1063
+ from sqlalchemy import delete
1064
+
1065
+ result = await session.execute(
1066
+ delete(SchedulerJob).where(SchedulerJob.id == job_id)
1067
+ )
1068
+ await session.commit()
1069
+
1070
+ removed = result.rowcount > 0
1071
+ if removed:
1072
+ logger.debug(f"Removed job {job_id}")
1073
+ return removed
1074
+
1075
+ except Exception as e:
1076
+ logger.error(f"Error removing job {job_id}: {e}")
1077
+ return False
1078
+
1079
+ async def get_job(self, job_id: str) -> JobData | None:
1080
+ """Get a job by ID from the database."""
1081
+ from ....db import get_session
1082
+ from ....db.models import SchedulerJob
1083
+
1084
+ try:
1085
+ async with get_session() as session:
1086
+ from sqlalchemy import select
1087
+
1088
+ result = await session.execute(
1089
+ select(SchedulerJob).where(SchedulerJob.id == job_id)
1090
+ )
1091
+ db_job = result.scalar_one_or_none()
1092
+
1093
+ if db_job:
1094
+ return self._model_to_job(db_job)
1095
+ return None
1096
+
1097
+ except Exception as e:
1098
+ logger.error(f"Error getting job {job_id}: {e}")
1099
+ return None
1100
+
1101
+ async def get_jobs(
1102
+ self,
1103
+ state: JobState | None = None,
1104
+ limit: int | None = None,
1105
+ ) -> list[JobData]:
1106
+ """Get jobs from the database."""
1107
+ from ....db import get_session
1108
+ from ....db.models import SchedulerJob
1109
+
1110
+ try:
1111
+ async with get_session() as session:
1112
+ from sqlalchemy import select
1113
+
1114
+ query = select(SchedulerJob)
1115
+
1116
+ if state:
1117
+ query = query.where(SchedulerJob.state == state.value)
1118
+
1119
+ query = query.order_by(SchedulerJob.next_run_time)
1120
+
1121
+ if limit:
1122
+ query = query.limit(limit)
1123
+
1124
+ result = await session.execute(query)
1125
+ db_jobs = result.scalars().all()
1126
+
1127
+ return [self._model_to_job(j) for j in db_jobs]
1128
+
1129
+ except Exception as e:
1130
+ logger.error(f"Error getting jobs: {e}")
1131
+ return []
1132
+
1133
+ async def get_due_jobs(self, now: datetime | None = None) -> list[JobData]:
1134
+ """Get jobs due for execution from the database."""
1135
+ from ....db import get_session
1136
+ from ....db.models import SchedulerJob
1137
+
1138
+ now = now or datetime.utcnow()
1139
+
1140
+ try:
1141
+ async with get_session() as session:
1142
+ from sqlalchemy import select
1143
+
1144
+ query = (
1145
+ select(SchedulerJob)
1146
+ .where(
1147
+ SchedulerJob.state.in_([
1148
+ JobState.PENDING.value,
1149
+ JobState.MISFIRED.value,
1150
+ ])
1151
+ )
1152
+ .where(SchedulerJob.next_run_time <= now)
1153
+ .order_by(SchedulerJob.next_run_time)
1154
+ )
1155
+
1156
+ result = await session.execute(query)
1157
+ db_jobs = result.scalars().all()
1158
+
1159
+ jobs = []
1160
+ for db_job in db_jobs:
1161
+ if db_job.id not in self._running_jobs:
1162
+ jobs.append(self._model_to_job(db_job))
1163
+
1164
+ return jobs
1165
+
1166
+ except Exception as e:
1167
+ logger.error(f"Error getting due jobs: {e}")
1168
+ return []
1169
+
1170
+ async def mark_job_running(self, job_id: str) -> bool:
1171
+ """Mark a job as running in the database."""
1172
+ from ....db import get_session
1173
+ from ....db.models import SchedulerJob
1174
+
1175
+ async with self._lock:
1176
+ if job_id in self._running_jobs:
1177
+ return False
1178
+
1179
+ try:
1180
+ async with get_session() as session:
1181
+ from sqlalchemy import select
1182
+
1183
+ result = await session.execute(
1184
+ select(SchedulerJob).where(SchedulerJob.id == job_id)
1185
+ )
1186
+ db_job = result.scalar_one_or_none()
1187
+
1188
+ if not db_job:
1189
+ return False
1190
+
1191
+ if db_job.state == JobState.RUNNING.value:
1192
+ return False
1193
+
1194
+ db_job.state = JobState.RUNNING.value
1195
+ db_job.last_run_time = datetime.utcnow()
1196
+ db_job.updated_at = datetime.utcnow()
1197
+
1198
+ await session.commit()
1199
+ self._running_jobs.add(job_id)
1200
+ logger.debug(f"Job {job_id} marked as running")
1201
+ return True
1202
+
1203
+ except Exception as e:
1204
+ logger.error(f"Error marking job {job_id} as running: {e}")
1205
+ return False
1206
+
1207
+ async def mark_job_completed(
1208
+ self,
1209
+ job_id: str,
1210
+ next_run_time: datetime | None = None,
1211
+ ) -> bool:
1212
+ """Mark a job as completed in the database."""
1213
+ from ....db import get_session
1214
+ from ....db.models import SchedulerJob
1215
+
1216
+ async with self._lock:
1217
+ try:
1218
+ async with get_session() as session:
1219
+ from sqlalchemy import select
1220
+
1221
+ result = await session.execute(
1222
+ select(SchedulerJob).where(SchedulerJob.id == job_id)
1223
+ )
1224
+ db_job = result.scalar_one_or_none()
1225
+
1226
+ if not db_job:
1227
+ return False
1228
+
1229
+ if next_run_time:
1230
+ db_job.state = JobState.PENDING.value
1231
+ db_job.next_run_time = next_run_time
1232
+ else:
1233
+ db_job.state = JobState.COMPLETED.value
1234
+
1235
+ db_job.retry_count = 0
1236
+ db_job.last_error = None
1237
+ db_job.updated_at = datetime.utcnow()
1238
+
1239
+ await session.commit()
1240
+ self._running_jobs.discard(job_id)
1241
+ logger.debug(f"Job {job_id} marked as completed")
1242
+ return True
1243
+
1244
+ except Exception as e:
1245
+ logger.error(f"Error marking job {job_id} as completed: {e}")
1246
+ return False
1247
+
1248
+ async def mark_job_failed(
1249
+ self,
1250
+ job_id: str,
1251
+ error: str,
1252
+ schedule_retry: bool = True,
1253
+ ) -> bool:
1254
+ """Mark a job as failed in the database."""
1255
+ from ....db import get_session
1256
+ from ....db.models import SchedulerJob
1257
+
1258
+ async with self._lock:
1259
+ try:
1260
+ async with get_session() as session:
1261
+ from sqlalchemy import select
1262
+
1263
+ result = await session.execute(
1264
+ select(SchedulerJob).where(SchedulerJob.id == job_id)
1265
+ )
1266
+ db_job = result.scalar_one_or_none()
1267
+
1268
+ if not db_job:
1269
+ return False
1270
+
1271
+ db_job.last_error = error
1272
+ db_job.updated_at = datetime.utcnow()
1273
+ self._running_jobs.discard(job_id)
1274
+
1275
+ if schedule_retry and db_job.retry_count < self.config.max_retries:
1276
+ db_job.retry_count += 1
1277
+ delay = self.calculate_retry_delay(db_job.retry_count)
1278
+ db_job.next_run_time = datetime.utcnow() + timedelta(seconds=delay)
1279
+ db_job.state = JobState.PENDING.value
1280
+ logger.info(
1281
+ f"Job {job_id} failed, retry {db_job.retry_count}/"
1282
+ f"{self.config.max_retries} scheduled in {delay:.1f}s"
1283
+ )
1284
+ else:
1285
+ db_job.state = JobState.FAILED.value
1286
+ logger.error(f"Job {job_id} failed permanently: {error}")
1287
+
1288
+ await session.commit()
1289
+ return True
1290
+
1291
+ except Exception as e:
1292
+ logger.error(f"Error marking job {job_id} as failed: {e}")
1293
+ return False
1294
+
1295
+ async def cleanup_old_jobs(self, older_than: datetime) -> int:
1296
+ """Remove old completed/failed jobs from the database."""
1297
+ from ....db import get_session
1298
+ from ....db.models import SchedulerJob
1299
+
1300
+ try:
1301
+ async with get_session() as session:
1302
+ from sqlalchemy import delete
1303
+
1304
+ result = await session.execute(
1305
+ delete(SchedulerJob)
1306
+ .where(
1307
+ SchedulerJob.state.in_([
1308
+ JobState.COMPLETED.value,
1309
+ JobState.FAILED.value,
1310
+ ])
1311
+ )
1312
+ .where(SchedulerJob.updated_at < older_than)
1313
+ )
1314
+ await session.commit()
1315
+
1316
+ removed = result.rowcount
1317
+ if removed > 0:
1318
+ logger.info(f"Cleaned up {removed} old jobs")
1319
+ return removed
1320
+
1321
+ except Exception as e:
1322
+ logger.error(f"Error cleaning up old jobs: {e}")
1323
+ return 0
1324
+
1325
+ def get_status(self) -> dict[str, Any]:
1326
+ """Get backend status with database-specific information."""
1327
+ status = super().get_status()
1328
+ status["cleanup_interval"] = self.config.cleanup_interval
1329
+ status["job_retention_days"] = self.config.job_retention_days
1330
+ return status
1331
+
1332
+
1333
+ # =============================================================================
1334
+ # Factory Function
1335
+ # =============================================================================
1336
+
1337
+
1338
+ def create_scheduler_backend(
1339
+ config: SchedulerBackendConfig | None = None,
1340
+ ) -> SchedulerBackend:
1341
+ """Create a scheduler backend based on configuration.
1342
+
1343
+ Args:
1344
+ config: Backend configuration. Uses environment config if None.
1345
+
1346
+ Returns:
1347
+ Configured scheduler backend instance.
1348
+
1349
+ Example:
1350
+ # Create from environment
1351
+ backend = create_scheduler_backend()
1352
+
1353
+ # Create with specific config
1354
+ config = SchedulerBackendConfig(backend_type=BackendType.MEMORY)
1355
+ backend = create_scheduler_backend(config)
1356
+ """
1357
+ if config is None:
1358
+ config = SchedulerBackendConfig.from_env()
1359
+
1360
+ if config.backend_type == BackendType.MEMORY:
1361
+ return InMemorySchedulerBackend(config)
1362
+ elif config.backend_type == BackendType.SQLALCHEMY:
1363
+ return SQLAlchemySchedulerBackend(config)
1364
+ else:
1365
+ raise ValueError(f"Unknown backend type: {config.backend_type}")
1366
+
1367
+
1368
+ __all__ = [
1369
+ # Enums
1370
+ "BackendType",
1371
+ "JobState",
1372
+ "MisfirePolicy",
1373
+ # Configuration
1374
+ "SchedulerBackendConfig",
1375
+ "JobData",
1376
+ "JobExecutionResult",
1377
+ # Abstract base
1378
+ "SchedulerBackend",
1379
+ # Implementations
1380
+ "InMemorySchedulerBackend",
1381
+ "SQLAlchemySchedulerBackend",
1382
+ # Factory
1383
+ "create_scheduler_backend",
1384
+ ]