stabilize 0.12.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. stabilize/__init__.py +255 -0
  2. stabilize/assertions.py +465 -0
  3. stabilize/cli.py +1878 -0
  4. stabilize/conditions.py +351 -0
  5. stabilize/config_validation.py +568 -0
  6. stabilize/context/__init__.py +7 -0
  7. stabilize/context/stage_context.py +170 -0
  8. stabilize/dag/__init__.py +15 -0
  9. stabilize/dag/graph.py +215 -0
  10. stabilize/dag/topological.py +199 -0
  11. stabilize/errors.py +383 -0
  12. stabilize/examples/__init__.py +1 -0
  13. stabilize/examples/docker-example.py +480 -0
  14. stabilize/examples/golden-standard-expected-result.txt +1 -0
  15. stabilize/examples/golden-standard.py +490 -0
  16. stabilize/examples/highway-integration-example.py +412 -0
  17. stabilize/examples/http-example.py +499 -0
  18. stabilize/examples/llama-example.py +683 -0
  19. stabilize/examples/python-example.py +593 -0
  20. stabilize/examples/shell-example.py +347 -0
  21. stabilize/examples/ssh-example.py +468 -0
  22. stabilize/handlers/__init__.py +53 -0
  23. stabilize/handlers/base.py +226 -0
  24. stabilize/handlers/complete_stage.py +219 -0
  25. stabilize/handlers/complete_task.py +81 -0
  26. stabilize/handlers/complete_workflow.py +150 -0
  27. stabilize/handlers/run_task.py +518 -0
  28. stabilize/handlers/start_stage.py +320 -0
  29. stabilize/handlers/start_task.py +74 -0
  30. stabilize/handlers/start_workflow.py +136 -0
  31. stabilize/launcher.py +307 -0
  32. stabilize/logging.py +228 -0
  33. stabilize/migrations/01KDQ4N9QPJ6Q4MCV3V9GHWPV4_initial_schema.sql +97 -0
  34. stabilize/migrations/01KE39HEAN221S7AWNS4A0H750_add_performance_indexes.sql +40 -0
  35. stabilize/migrations/01KE4YJT2W8A6NBX5R7CQMJK84_add_dlq_table.sql +28 -0
  36. stabilize/migrations/01KE5ZQT3X9B7PCS8R8DRMLN85_add_processed_messages.sql +26 -0
  37. stabilize/migrations/__init__.py +1 -0
  38. stabilize/models/__init__.py +15 -0
  39. stabilize/models/stage.py +389 -0
  40. stabilize/models/status.py +266 -0
  41. stabilize/models/task.py +125 -0
  42. stabilize/models/workflow.py +317 -0
  43. stabilize/monitor/__init__.py +63 -0
  44. stabilize/monitor/data.py +482 -0
  45. stabilize/monitor/display.py +800 -0
  46. stabilize/orchestrator.py +113 -0
  47. stabilize/persistence/__init__.py +28 -0
  48. stabilize/persistence/connection.py +185 -0
  49. stabilize/persistence/factory.py +140 -0
  50. stabilize/persistence/memory.py +347 -0
  51. stabilize/persistence/postgres.py +862 -0
  52. stabilize/persistence/sqlite.py +1253 -0
  53. stabilize/persistence/store.py +498 -0
  54. stabilize/queue/__init__.py +59 -0
  55. stabilize/queue/messages.py +377 -0
  56. stabilize/queue/processor.py +379 -0
  57. stabilize/queue/queue.py +741 -0
  58. stabilize/queue/sqlite_queue.py +595 -0
  59. stabilize/recovery.py +353 -0
  60. stabilize/resilience/__init__.py +19 -0
  61. stabilize/resilience/bulkheads.py +148 -0
  62. stabilize/resilience/circuits.py +164 -0
  63. stabilize/resilience/config.py +84 -0
  64. stabilize/resilience/executor.py +168 -0
  65. stabilize/stages/__init__.py +11 -0
  66. stabilize/stages/builder.py +253 -0
  67. stabilize/tasks/__init__.py +19 -0
  68. stabilize/tasks/docker.py +463 -0
  69. stabilize/tasks/highway/__init__.py +32 -0
  70. stabilize/tasks/highway/config.py +87 -0
  71. stabilize/tasks/highway/task.py +465 -0
  72. stabilize/tasks/http.py +592 -0
  73. stabilize/tasks/interface.py +335 -0
  74. stabilize/tasks/python.py +351 -0
  75. stabilize/tasks/registry.py +255 -0
  76. stabilize/tasks/result.py +283 -0
  77. stabilize/tasks/shell.py +246 -0
  78. stabilize/tasks/ssh.py +171 -0
  79. stabilize/tracing.py +327 -0
  80. stabilize/verification.py +316 -0
  81. stabilize-0.12.2.dist-info/METADATA +287 -0
  82. stabilize-0.12.2.dist-info/RECORD +85 -0
  83. stabilize-0.12.2.dist-info/WHEEL +4 -0
  84. stabilize-0.12.2.dist-info/entry_points.txt +2 -0
  85. stabilize-0.12.2.dist-info/licenses/LICENSE +201 -0
stabilize/cli.py ADDED
@@ -0,0 +1,1878 @@
1
+ """Stabilize CLI for database migrations and developer tools."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import hashlib
7
+ import os
8
+ import re
9
+ import sys
10
+ from importlib.resources import files
11
+ from pathlib import Path
12
+ from typing import TYPE_CHECKING
13
+
14
+ if TYPE_CHECKING:
15
+ from typing import Any
16
+
17
+ from stabilize.persistence.store import WorkflowStore
18
+ from stabilize.queue.queue import Queue
19
+
20
+
21
+ # =============================================================================
22
+ # PROMPT - Comprehensive documentation for AI coding agents
23
+ # =============================================================================
24
+
25
+ PROMPT_TEXT = r'''
26
+ ===============================================================================
27
+ STABILIZE WORKFLOW ENGINE - COMPLETE REFERENCE FOR CODE GENERATION
28
+ ===============================================================================
29
+
30
+ Stabilize is a Python DAG-based workflow orchestration engine. Workflows consist
31
+ of Stages (nodes in the DAG) containing Tasks (atomic work units). Stages can
32
+ run sequentially or in parallel based on their dependencies.
33
+
34
+ CRITICAL RULE FOR SHELL WORKFLOWS:
35
+ For shell/command workflows, import the BUILT-IN ShellTask:
36
+ from stabilize import ShellTask
37
+ registry.register("shell", ShellTask)
38
+
39
+ DO NOT define your own ShellTask class - use the built-in one! It automatically
40
+ substitutes {key} placeholders with upstream outputs (e.g., {stdout} becomes actual output).
41
+
42
+ ===============================================================================
43
+ 1. COMPLETE WORKING EXAMPLE - COPY THIS AS YOUR STARTING TEMPLATE
44
+ ===============================================================================
45
+
46
+ #!/usr/bin/env python3
47
+ """Minimal working Stabilize workflow example."""
48
+
49
+ from stabilize import (
50
+ # Core models
51
+ Workflow, StageExecution, TaskExecution, WorkflowStatus,
52
+ # Infrastructure (use SqliteWorkflowStore/SqliteQueue or Postgres variants)
53
+ Orchestrator, QueueProcessor,
54
+ Queue, SqliteQueue, # Queue abstract + SQLite impl
55
+ WorkflowStore, SqliteWorkflowStore, # Store abstract + SQLite impl
56
+ # Tasks - use built-in tasks, do NOT define your own
57
+ Task, TaskResult, TaskRegistry,
58
+ ShellTask, # For shell/command execution
59
+ PythonTask, # For Python code execution (uses script/INPUT/RESULT)
60
+ DockerTask, # For Docker container execution
61
+ HTTPTask, # For HTTP requests
62
+ # Handlers
63
+ StartWorkflowHandler, StartStageHandler, StartTaskHandler,
64
+ RunTaskHandler, CompleteTaskHandler, CompleteStageHandler,
65
+ CompleteWorkflowHandler,
66
+ )
67
+
68
+
69
+ # Step 1: USE BUILT-IN TASKS - Do NOT define your own Task classes!
70
+ # Available built-in tasks:
71
+ # - ShellTask: For shell/command execution
72
+ # - PythonTask: For Python code execution (uses script/INPUT/RESULT)
73
+ # - DockerTask: For Docker container execution
74
+ # - HTTPTask: For HTTP API requests
75
+ #
76
+ # Only define custom Task classes if the built-in tasks don't meet your needs.
77
+ # Example custom task (rarely needed):
78
+ #
79
+ # class MyCustomTask(Task):
80
+ # def execute(self, stage: StageExecution) -> TaskResult:
81
+ # value = stage.context.get("key")
82
+ # return TaskResult.success(outputs={"result": value})
83
+
84
+
85
+ # Step 2: Setup infrastructure
86
+ def setup_pipeline_runner(store: WorkflowStore, queue: Queue) -> tuple[QueueProcessor, Orchestrator]:
87
+ """Create processor and orchestrator with task registered."""
88
+ task_registry = TaskRegistry()
89
+ # Register built-in tasks
90
+ task_registry.register("shell", ShellTask)
91
+ task_registry.register("python", PythonTask)
92
+ task_registry.register("docker", DockerTask)
93
+ task_registry.register("http", HTTPTask)
94
+
95
+ processor = QueueProcessor(queue)
96
+
97
+ # Register all handlers in order
98
+ handlers = [
99
+ StartWorkflowHandler(queue, store),
100
+ StartStageHandler(queue, store),
101
+ StartTaskHandler(queue, store),
102
+ RunTaskHandler(queue, store, task_registry),
103
+ CompleteTaskHandler(queue, store),
104
+ CompleteStageHandler(queue, store),
105
+ CompleteWorkflowHandler(queue, store),
106
+ ]
107
+ for handler in handlers:
108
+ processor.register_handler(handler)
109
+
110
+ orchestrator = Orchestrator(queue)
111
+ return processor, orchestrator
112
+
113
+
114
+ # Step 3: Create and run workflow
115
+ def main():
116
+ # Initialize storage (in-memory SQLite for development)
117
+ store = SqliteWorkflowStore("sqlite:///:memory:", create_tables=True)
118
+ queue = SqliteQueue("sqlite:///:memory:", table_name="queue_messages")
119
+ queue._create_table()
120
+ processor, orchestrator = setup_pipeline_runner(store, queue)
121
+
122
+ # Create workflow with stages using built-in tasks
123
+ workflow = Workflow.create(
124
+ application="my-app",
125
+ name="My Pipeline",
126
+ stages=[
127
+ StageExecution(
128
+ ref_id="1",
129
+ type="shell", # Use built-in ShellTask
130
+ name="First Stage",
131
+ context={"command": "echo 'Hello World'"},
132
+ tasks=[
133
+ TaskExecution.create(
134
+ name="Run Shell",
135
+ implementing_class="shell", # Must match registered name
136
+ stage_start=True, # REQUIRED for first task
137
+ stage_end=True, # REQUIRED for last task
138
+ ),
139
+ ],
140
+ ),
141
+ ],
142
+ )
143
+
144
+ # Execute workflow
145
+ store.store(workflow)
146
+ orchestrator.start(workflow)
147
+ processor.process_all(timeout=30.0)
148
+
149
+ # Check result
150
+ result = store.retrieve(workflow.id)
151
+ print(f"Status: {result.status}")
152
+ print(f"Output: {result.stages[0].outputs}")
153
+
154
+
155
+ if __name__ == "__main__":
156
+ main()
157
+
158
+ ===============================================================================
159
+ 1.1 SHELL PIPELINE TEMPLATE - USE FOR ANY SHELL/COMMAND WORKFLOWS
160
+ ===============================================================================
161
+ For shell commands, IMPORT the built-in ShellTask (do NOT define your own):
162
+
163
+ from stabilize import ShellTask, TaskRegistry
164
+
165
+ registry = TaskRegistry()
166
+ registry.register("shell", ShellTask)
167
+
168
+ ShellTask Context Parameters:
169
+ command (str) - The shell command to execute (required)
170
+ timeout (int) - Timeout in seconds (default: 60)
171
+ cwd (str) - Working directory
172
+ env (dict) - Additional environment variables
173
+ shell (bool|str) - True for default shell, or path like "/bin/bash"
174
+ stdin (str) - Input to send to command stdin
175
+ max_output_size (int) - Max bytes for output (default: 10MB)
176
+ expected_codes (list) - Exit codes treated as success (default: [0])
177
+ secrets (list) - Context keys to mask in logs
178
+ binary (bool) - Capture output as bytes (default: False)
179
+ continue_on_failure - Return failed_continue instead of terminal
180
+
181
+ ShellTask Outputs:
182
+ stdout, stderr, returncode, truncated (bool), stdout_b64 (if binary)
183
+
184
+ # Example: Pipeline with upstream output substitution
185
+ stages=[
186
+ StageExecution(
187
+ ref_id="1", type="shell", name="Get Data",
188
+ context={"command": "git status"},
189
+ tasks=[TaskExecution.create("Run", "shell", stage_start=True, stage_end=True)],
190
+ ),
191
+ StageExecution(
192
+ ref_id="2", type="shell", name="Save Data",
193
+ requisite_stage_ref_ids={"1"},
194
+ context={"command": "echo '{stdout}' > /tmp/output.txt"}, # {stdout} auto-replaced
195
+ tasks=[TaskExecution.create("Save", "shell", stage_start=True, stage_end=True)],
196
+ ),
197
+ ]
198
+
199
+ # Example: With environment and working directory
200
+ context={"command": "npm install", "cwd": "/app", "env": {"NODE_ENV": "production"}}
201
+
202
+ # Example: With secrets masking
203
+ context={"command": "curl -H 'Auth: {token}' api.com", "token": "secret", "secrets": ["token"]}
204
+
205
+ # Example: Allow grep's exit code 1 (no match found)
206
+ context={"command": "grep pattern file.txt", "expected_codes": [0, 1]}
207
+
208
+ ===============================================================================
209
+ 1.2 HTTP PIPELINE TEMPLATE - USE FOR ANY HTTP/API WORKFLOWS
210
+ ===============================================================================
211
+ For HTTP requests, IMPORT the built-in HTTPTask (do NOT define your own):
212
+
213
+ from stabilize import HTTPTask, TaskRegistry
214
+
215
+ registry = TaskRegistry()
216
+ registry.register("http", HTTPTask)
217
+
218
+ HTTPTask Context Parameters:
219
+ url (str) - Request URL (required)
220
+ method (str) - GET, POST, PUT, DELETE, PATCH, HEAD, OPTIONS (default: GET)
221
+
222
+ Request Body (mutually exclusive):
223
+ body (str|bytes) - Raw request body
224
+ json (dict) - JSON body (auto-serialized, sets Content-Type)
225
+ form (dict) - Form-encoded body (application/x-www-form-urlencoded)
226
+
227
+ Headers & Auth:
228
+ headers (dict) - Custom request headers
229
+ auth (list) - Basic auth as [username, password]
230
+ bearer_token (str) - Bearer token for Authorization header
231
+
232
+ File Upload:
233
+ upload_file (str) - Path to file to upload (multipart/form-data)
234
+ upload_field (str) - Form field name (default: "file")
235
+ upload_form (dict) - Additional form fields with upload
236
+
237
+ File Download:
238
+ download_to (str) - Path to save response body
239
+
240
+ Timeouts & Retries:
241
+ timeout (int) - Request timeout in seconds (default: 30)
242
+ retries (int) - Number of retries (default: 0)
243
+ retry_delay (float) - Delay between retries (default: 1.0)
244
+ retry_on_status (list) - Status codes to retry (default: [502, 503, 504])
245
+
246
+ Response Handling:
247
+ expected_status (int|list) - Expected status code(s)
248
+ parse_json (bool) - Auto-parse JSON response (default: False)
249
+ max_response_size (int) - Max bytes (default: 10MB)
250
+
251
+ Other:
252
+ verify_ssl (bool) - Verify SSL certs (default: True)
253
+ continue_on_failure - Return failed_continue instead of terminal
254
+
255
+ HTTPTask Outputs:
256
+ status_code, headers, body, body_json (if parse_json), elapsed_ms, url, content_type, content_length
257
+
258
+ # Example: Simple GET with JSON parsing
259
+ context={"url": "https://api.example.com/users", "parse_json": True}
260
+
261
+ # Example: POST with JSON body
262
+ context={"url": "https://api.example.com/users", "method": "POST", "json": {"name": "John"}}
263
+
264
+ # Example: With Bearer token authentication
265
+ context={"url": "https://api.example.com/private", "bearer_token": "my-token"}
266
+
267
+ # Example: File upload
268
+ context={"url": "https://api.example.com/upload", "method": "POST", "upload_file": "/path/to/file.pdf"}
269
+
270
+ # Example: Download file
271
+ context={"url": "https://example.com/report.pdf", "download_to": "/tmp/report.pdf"}
272
+
273
+ # Example: With retries for unreliable endpoints
274
+ context={"url": "https://api.example.com/data", "retries": 3, "retry_delay": 2.0}
275
+
276
+ ===============================================================================
277
+ 1.3 PYTHON PIPELINE TEMPLATE - USE FOR PYTHON CODE EXECUTION
278
+ ===============================================================================
279
+ For Python code execution, IMPORT the built-in PythonTask (do NOT define your own):
280
+
281
+ from stabilize import PythonTask, TaskRegistry
282
+
283
+ registry = TaskRegistry()
284
+ registry.register("python", PythonTask)
285
+
286
+ PythonTask Context Parameters:
287
+ # Execution Mode (choose one):
288
+ script (str) - Inline Python code to execute
289
+ script_file (str) - Path to Python script file
290
+ module (str) - Module path (e.g., "myapp.tasks")
291
+ function (str) - Function name (required with module)
292
+
293
+ # Inputs:
294
+ inputs (dict) - Input data, available as INPUT in script
295
+ args (list) - Command line arguments
296
+
297
+ # Execution:
298
+ python_path (str) - Python interpreter (default: current)
299
+ timeout (int) - Timeout in seconds (default: 60)
300
+ cwd (str) - Working directory
301
+ env (dict) - Additional environment variables
302
+ continue_on_failure - Return failed_continue instead of terminal
303
+
304
+ PythonTask Outputs:
305
+ stdout, stderr, exit_code, result (value of RESULT variable if set)
306
+
307
+ Script Convention:
308
+ - Access inputs via INPUT dict (includes upstream outputs + explicit inputs)
309
+ - Set return value via RESULT variable (must be JSON-serializable)
310
+
311
+ # Example: Inline script with INPUT and RESULT
312
+ stages=[
313
+ StageExecution(
314
+ ref_id="1", type="python", name="Calculate",
315
+ context={
316
+ "script": """
317
+ numbers = INPUT["values"]
318
+ RESULT = {"sum": sum(numbers), "avg": sum(numbers) / len(numbers)}
319
+ """,
320
+ "inputs": {"values": [1, 2, 3, 4, 5]}
321
+ },
322
+ tasks=[TaskExecution.create("Run", "python", stage_start=True, stage_end=True)],
323
+ ),
324
+ ]
325
+
326
+ # Example: Module + function mode (calls myapp.validators.validate(INPUT))
327
+ context={"module": "myapp.validators", "function": "validate", "inputs": {"data": {...}}}
328
+
329
+ # Example: Script file
330
+ context={"script_file": "/path/to/script.py", "inputs": {"config": {...}}}
331
+
332
+ ===============================================================================
333
+ 1.4 DOCKER PIPELINE TEMPLATE - USE FOR CONTAINER EXECUTION
334
+ ===============================================================================
335
+ For Docker container execution, IMPORT the built-in DockerTask (do NOT define your own):
336
+
337
+ from stabilize import DockerTask, TaskRegistry
338
+
339
+ registry = TaskRegistry()
340
+ registry.register("docker", DockerTask)
341
+
342
+ DockerTask Actions:
343
+ run - Run a container (default)
344
+ exec - Execute command in running container
345
+ build - Build image from Dockerfile
346
+ pull - Pull image from registry
347
+ ps - List containers
348
+ images - List images
349
+ logs - Get container logs
350
+ stop - Stop container
351
+ rm - Remove container
352
+
353
+ DockerTask Context Parameters (run action):
354
+ image (str) - Docker image (required)
355
+ command (str|list) - Command to run in container
356
+ entrypoint (str|list) - Override container entrypoint
357
+ name (str) - Container name
358
+ user (str) - Run as user (e.g., "1000:1000")
359
+ hostname (str) - Container hostname
360
+
361
+ # Mounts & Network:
362
+ volumes (list) - Volume mounts as "host:container"
363
+ ports (list) - Port mappings as "host:container"
364
+ network (str) - Docker network name
365
+ dns (list) - Custom DNS servers
366
+ extra_hosts (list) - Add host mappings as "host:ip"
367
+
368
+ # Environment:
369
+ environment (dict) - Environment variables
370
+ workdir (str) - Working directory
371
+
372
+ # Resources:
373
+ memory (str) - Memory limit (e.g., "512m", "2g")
374
+ memory_swap (str) - Memory + swap limit
375
+ cpus (str) - CPU limit (e.g., "0.5", "2")
376
+ gpus (str) - GPU access (e.g., "all", "device=0")
377
+ shm_size (str) - Shared memory size
378
+
379
+ # Security:
380
+ privileged (bool) - Privileged mode
381
+ cap_add (list) - Add Linux capabilities
382
+ cap_drop (list) - Drop Linux capabilities
383
+ security_opt (list) - Security options
384
+ read_only (bool) - Read-only root filesystem
385
+
386
+ # Other:
387
+ remove (bool) - Remove after exit (default: True)
388
+ detach (bool) - Run in background
389
+ init (bool) - Run init inside container
390
+ platform (str) - Target platform (e.g., "linux/amd64")
391
+ pull (str) - Pull policy: "always", "never", "missing"
392
+ labels (dict) - Container labels
393
+ timeout (int) - Command timeout (default: 300)
394
+ continue_on_failure - Return failed_continue instead of terminal
395
+
396
+ DockerTask Outputs:
397
+ stdout, stderr, exit_code, container_id (if detach), image_id (if build)
398
+
399
+ # Example: Simple container run
400
+ context={"action": "run", "image": "python:3.11", "command": "python -c 'print(1+1)'"}
401
+
402
+ # Example: With volumes and environment
403
+ context={
404
+ "action": "run",
405
+ "image": "node:18",
406
+ "volumes": ["/app:/app"],
407
+ "environment": {"NODE_ENV": "production"},
408
+ "workdir": "/app",
409
+ "command": "npm test"
410
+ }
411
+
412
+ # Example: GPU container with resource limits
413
+ context={
414
+ "action": "run",
415
+ "image": "pytorch/pytorch:latest",
416
+ "gpus": "all",
417
+ "memory": "8g",
418
+ "shm_size": "2g",
419
+ "volumes": ["/data:/data"],
420
+ "command": "python train.py"
421
+ }
422
+
423
+ # Example: Build and tag image
424
+ context={"action": "build", "tag": "myapp:latest", "context": "./docker"}
425
+
426
+ ===============================================================================
427
+ 2. CORE CLASSES API
428
+ ===============================================================================
429
+
430
+ 2.1 Workflow
431
+ -------------
432
+ Factory: Workflow.create(application, name, stages, trigger=None, pipeline_config_id=None)
433
+
434
+ Fields:
435
+ id: str - Unique ULID identifier (auto-generated)
436
+ status: WorkflowStatus - Current execution status
437
+ stages: list[StageExecution] - All stages in the workflow
438
+ application: str - Application name
439
+ name: str - Pipeline name
440
+
441
+ Methods:
442
+ stage_by_id(stage_id) -> StageExecution - Get stage by internal ID
443
+ stage_by_ref_id(ref_id) -> StageExecution - Get stage by reference ID
444
+ get_context() -> dict - Get merged outputs from all stages
445
+
446
+
447
+ 2.2 StageExecution
448
+ -------------------
449
+ Constructor: StageExecution(ref_id, type, name, context, tasks, requisite_stage_ref_ids=set())
450
+
451
+ Fields:
452
+ ref_id: str - UNIQUE reference ID for DAG (e.g., "1", "deploy", "build")
453
+ type: str - Stage type (usually matches task name)
454
+ name: str - Human-readable name
455
+ context: dict[str, Any] - INPUT parameters for this stage
456
+ outputs: dict[str, Any] - OUTPUT values for downstream stages (populated by tasks)
457
+ tasks: list[TaskExecution] - Tasks to execute (sequentially)
458
+ requisite_stage_ref_ids: set[str] - Dependencies (ref_ids of upstream stages)
459
+ status: WorkflowStatus - Current status
460
+
461
+ DAG Dependencies:
462
+ - Empty set: Stage runs immediately (initial stage)
463
+ - {"A"}: Stage runs after stage with ref_id="A" completes
464
+ - {"A", "B"}: Stage waits for BOTH A and B to complete (join point)
465
+
466
+
467
+ 2.3 TaskExecution
468
+ ------------------
469
+ Factory: TaskExecution.create(name, implementing_class, stage_start=False, stage_end=False)
470
+
471
+ Fields:
472
+ name: str - Human-readable task name
473
+ implementing_class: str - MUST match the name used in TaskRegistry.register()
474
+ stage_start: bool - MUST be True for first task in stage
475
+ stage_end: bool - MUST be True for last task in stage
476
+ status: WorkflowStatus - Current status
477
+
478
+ CRITICAL: If a stage has only one task, set BOTH stage_start=True AND stage_end=True
479
+
480
+
481
+ 2.4 WorkflowStatus
482
+ -------------------
483
+ All status values:
484
+ NOT_STARTED - Not yet started
485
+ RUNNING - Currently executing
486
+ PAUSED - Paused, can be resumed
487
+ SUSPENDED - Waiting for external trigger
488
+ SUCCEEDED - Completed successfully
489
+ FAILED_CONTINUE - Failed but pipeline continues
490
+ TERMINAL - Failed, pipeline halts
491
+ CANCELED - Execution was canceled
492
+ STOPPED - Execution was stopped
493
+ SKIPPED - Stage/task was skipped
494
+ REDIRECT - Decision branch redirect
495
+ BUFFERED - Buffered, waiting
496
+
497
+ Properties:
498
+ .is_complete: bool - Has finished executing
499
+ .is_halt: bool - Blocks downstream stages
500
+ .is_successful: bool - SUCCEEDED, STOPPED, or SKIPPED
501
+ .is_failure: bool - TERMINAL, STOPPED, or FAILED_CONTINUE
502
+
503
+ ===============================================================================
504
+ 3. TASK IMPLEMENTATION
505
+ ===============================================================================
506
+
507
+ 3.1 Task Interface (Abstract Base Class)
508
+ -----------------------------------------
509
+ from stabilize import Task
510
+
511
+ class MyTask(Task):
512
+ def execute(self, stage: StageExecution) -> TaskResult:
513
+ # Read from stage.context (includes upstream outputs)
514
+ value = stage.context.get("key")
515
+
516
+ # Return TaskResult
517
+ return TaskResult.success(outputs={"output_key": "value"})
518
+
519
+ # Optional: Handle timeout (for RetryableTask)
520
+ def on_timeout(self, stage: StageExecution) -> TaskResult | None:
521
+ return TaskResult.terminal(error="Task timed out")
522
+
523
+ # Optional: Handle cancellation
524
+ def on_cancel(self, stage: StageExecution) -> TaskResult | None:
525
+ return TaskResult.canceled()
526
+
527
+
528
+ 3.2 TaskResult Factory Methods - CRITICAL REFERENCE
529
+ ----------------------------------------------------
530
+ from stabilize import TaskResult
531
+
532
+ SUCCESS - Task completed successfully, pipeline continues:
533
+ TaskResult.success(outputs=None, context=None)
534
+ Parameters:
535
+ outputs: dict - Values available to downstream stages
536
+ context: dict - Values stored in stage.context (stage-scoped)
537
+
538
+ RUNNING - Task needs to poll again (for RetryableTask):
539
+ TaskResult.running(context=None)
540
+ Parameters:
541
+ context: dict - Updated state for next poll iteration
542
+
543
+ TERMINAL - Task failed, pipeline HALTS:
544
+ TaskResult.terminal(error, context=None)
545
+ Parameters:
546
+ error: str - Error message (REQUIRED)
547
+ context: dict - Additional context data
548
+ WARNING: Does NOT accept 'outputs' parameter!
549
+
550
+ FAILED_CONTINUE - Task failed but pipeline continues:
551
+ TaskResult.failed_continue(error, outputs=None, context=None)
552
+ Parameters:
553
+ error: str - Error message (REQUIRED)
554
+ outputs: dict - Values still available downstream
555
+ context: dict - Additional context data
556
+
557
+ SKIPPED - Task was skipped:
558
+ TaskResult.skipped()
559
+
560
+ CANCELED - Task was canceled:
561
+ TaskResult.canceled(outputs=None)
562
+
563
+ STOPPED - Task was stopped:
564
+ TaskResult.stopped(outputs=None)
565
+
566
+ REDIRECT - Indicates decision branch redirect:
567
+ TaskResult.redirect(context=None)
568
+ Parameters:
569
+ context: dict - Context for the redirect
570
+
571
+ Builder Pattern (for complex results):
572
+ TaskResult.builder(status).context({...}).outputs({...}).build()
573
+
574
+ Methods:
575
+ .context(dict) - Set the full context
576
+ .outputs(dict) - Set the full outputs
577
+ .add_context(key, value) - Add a single context value
578
+ .add_output(key, value) - Add a single output value
579
+ .build() - Build and return the TaskResult
580
+
581
+
582
+ 3.3 RetryableTask - For Polling Operations
583
+ -------------------------------------------
584
+ from datetime import timedelta
585
+ from stabilize import RetryableTask
586
+
587
+ class PollTask(RetryableTask):
588
+ def get_timeout(self) -> timedelta:
589
+ """Maximum time before task times out."""
590
+ return timedelta(minutes=30)
591
+
592
+ def get_backoff_period(self, stage: StageExecution, duration: timedelta) -> timedelta:
593
+ """Time to wait between poll attempts."""
594
+ return timedelta(seconds=10)
595
+
596
+ def execute(self, stage: StageExecution) -> TaskResult:
597
+ status = check_external_system()
598
+
599
+ if status == "complete":
600
+ return TaskResult.success(outputs={"status": "done"})
601
+ elif status == "failed":
602
+ return TaskResult.terminal(error="External system failed")
603
+ else:
604
+ # Keep polling - will be called again after backoff
605
+ return TaskResult.running(context={"last_check": time.time()})
606
+
607
+
608
+ 3.4 SkippableTask - Conditional Execution
609
+ ------------------------------------------
610
+ from stabilize.tasks.interface import SkippableTask # Advanced, not in main exports
611
+
612
+ class ConditionalTask(SkippableTask):
613
+ def is_enabled(self, stage: StageExecution) -> bool:
614
+ """Return False to skip this task."""
615
+ return stage.context.get("should_run", True)
616
+
617
+ def do_execute(self, stage: StageExecution) -> TaskResult:
618
+ """Actual task logic (only called if is_enabled returns True)."""
619
+ return TaskResult.success()
620
+
621
+
622
+ 3.5 Additional Built-in Tasks
623
+ ------------------------------
624
+ from stabilize.tasks.interface import CallableTask, NoOpTask, WaitTask
625
+
626
+ OverridableTimeoutRetryableTask:
627
+ A RetryableTask that allows the stage to override timeout via 'stageTimeoutMs'
628
+ context value. Useful when timeout should be configurable per-stage.
629
+
630
+ CallableTask:
631
+ Wraps a callable function as a task without creating a class.
632
+
633
+ def my_task(stage: StageExecution) -> TaskResult:
634
+ return TaskResult.success(outputs={"result": "done"})
635
+
636
+ task = CallableTask(my_task)
637
+ registry.register("my_task", task)
638
+
639
+ NoOpTask:
640
+ A task that does nothing and returns success immediately.
641
+ Useful for testing, placeholder stages, or synchronization points.
642
+
643
+ registry.register("noop", NoOpTask)
644
+
645
+ WaitTask:
646
+ Built-in RetryableTask that waits for a specified duration.
647
+ Reads 'waitTime' (seconds) from stage.context.
648
+
649
+ StageExecution(
650
+ ref_id="wait",
651
+ type="wait",
652
+ name="Wait 30 seconds",
653
+ context={"waitTime": 30},
654
+ tasks=[TaskExecution.create("Wait", "wait", stage_start=True, stage_end=True)],
655
+ )
656
+
657
+ ===============================================================================
658
+ 4. TASK REGISTRY
659
+ ===============================================================================
660
+
661
+ from stabilize import TaskRegistry
662
+
663
+ registry = TaskRegistry()
664
+
665
+ # Register a task class
666
+ registry.register("my_task", MyTask)
667
+
668
+ # Register with aliases
669
+ registry.register("http", HTTPTask, aliases=["http_request", "web_request"])
670
+
671
+ # The implementing_class in TaskExecution MUST match the registered name:
672
+ TaskExecution.create(
673
+ name="Do something",
674
+ implementing_class="my_task", # Must match registry.register() name
675
+ stage_start=True,
676
+ stage_end=True,
677
+ )
678
+
679
+ ===============================================================================
680
+ 5. DAG PATTERNS
681
+ ===============================================================================
682
+
683
+ 5.1 Sequential Stages (A -> B -> C)
684
+ ------------------------------------
685
+ stages=[
686
+ StageExecution(ref_id="A", ..., requisite_stage_ref_ids=set()), # Initial
687
+ StageExecution(ref_id="B", ..., requisite_stage_ref_ids={"A"}), # After A
688
+ StageExecution(ref_id="C", ..., requisite_stage_ref_ids={"B"}), # After B
689
+ ]
690
+
691
+
692
+ 5.2 Parallel Stages
693
+ --------------------
694
+ A
695
+ / \
696
+ B C <- B and C run in parallel after A
697
+ \ /
698
+ D
699
+
700
+ stages=[
701
+ StageExecution(ref_id="A", ..., requisite_stage_ref_ids=set()),
702
+ StageExecution(ref_id="B", ..., requisite_stage_ref_ids={"A"}), # Parallel
703
+ StageExecution(ref_id="C", ..., requisite_stage_ref_ids={"A"}), # Parallel
704
+ StageExecution(ref_id="D", ..., requisite_stage_ref_ids={"B", "C"}), # Join
705
+ ]
706
+
707
+
708
+ 5.3 Complex DAG
709
+ ----------------
710
+ A
711
+ /|\
712
+ B C D <- All parallel after A
713
+ |/ \|
714
+ E F <- E waits for B,C; F waits for C,D
715
+ \ /
716
+ G <- G waits for E and F
717
+
718
+ stages=[
719
+ StageExecution(ref_id="A", ..., requisite_stage_ref_ids=set()),
720
+ StageExecution(ref_id="B", ..., requisite_stage_ref_ids={"A"}),
721
+ StageExecution(ref_id="C", ..., requisite_stage_ref_ids={"A"}),
722
+ StageExecution(ref_id="D", ..., requisite_stage_ref_ids={"A"}),
723
+ StageExecution(ref_id="E", ..., requisite_stage_ref_ids={"B", "C"}),
724
+ StageExecution(ref_id="F", ..., requisite_stage_ref_ids={"C", "D"}),
725
+ StageExecution(ref_id="G", ..., requisite_stage_ref_ids={"E", "F"}),
726
+ ]
727
+
728
+ ===============================================================================
729
+ 6. CONTEXT AND OUTPUTS DATA FLOW
730
+ ===============================================================================
731
+
732
+ stage.context - INPUT: Parameters passed when creating the stage
733
+ Also includes outputs from upstream stages (automatic lookup)
734
+
735
+ stage.outputs - OUTPUT: Values produced by tasks for downstream stages
736
+ Set via TaskResult.success(outputs={...})
737
+
738
+ Example flow:
739
+ Stage A context: {"input": "hello"}
740
+ Stage A task returns: TaskResult.success(outputs={"result": "processed"})
741
+ Stage B context: {"input": "hello", "result": "processed"} <- Includes A's output
742
+
743
+ Accessing in tasks:
744
+ def execute(self, stage):
745
+ # Read from context (includes upstream outputs)
746
+ upstream_result = stage.context.get("result") # From upstream stage
747
+
748
+ # Write to outputs (available downstream)
749
+ return TaskResult.success(outputs={"my_output": "value"})
750
+
751
+ IMPORTANT - Shell Tasks with Upstream Outputs:
752
+ Use the BUILT-IN ShellTask which automatically substitutes {key} placeholders:
753
+
754
+ from stabilize import ShellTask
755
+ registry.register("shell", ShellTask)
756
+
757
+ The built-in ShellTask handles: cwd, env, stdin, timeout, expected_codes, secrets, binary mode.
758
+ See section 1.1 for full parameter documentation.
759
+
760
+ ===============================================================================
761
+ 7. COMMON MISTAKES AND HOW TO FIX THEM
762
+ ===============================================================================
763
+
764
+ MISTAKE 1: Using 'outputs' parameter with TaskResult.terminal()
765
+ ---------------------------------------------------------------
766
+ WRONG:
767
+ return TaskResult.terminal(error="Failed", outputs={"data": value})
768
+
769
+ RIGHT:
770
+ return TaskResult.terminal(error="Failed", context={"data": value})
771
+
772
+ terminal() only accepts: error (required), context (optional)
773
+
774
+
775
+ MISTAKE 2: Forgetting stage_start and stage_end on tasks
776
+ ---------------------------------------------------------
777
+ WRONG:
778
+ TaskExecution.create(name="X", implementing_class="y")
779
+
780
+ RIGHT:
781
+ TaskExecution.create(name="X", implementing_class="y", stage_start=True, stage_end=True)
782
+
783
+
784
+ MISTAKE 3: implementing_class doesn't match registered name
785
+ ------------------------------------------------------------
786
+ WRONG:
787
+ registry.register("http_task", HTTPTask)
788
+ TaskExecution.create(..., implementing_class="HTTPTask") # Class name, not registered name
789
+
790
+ RIGHT:
791
+ registry.register("http_task", HTTPTask)
792
+ TaskExecution.create(..., implementing_class="http_task") # Matches registered name
793
+
794
+
795
+ MISTAKE 4: Duplicate ref_id values
796
+ -----------------------------------
797
+ WRONG:
798
+ StageExecution(ref_id="1", name="Stage A", ...)
799
+ StageExecution(ref_id="1", name="Stage B", ...) # Same ref_id!
800
+
801
+ RIGHT:
802
+ StageExecution(ref_id="1", name="Stage A", ...)
803
+ StageExecution(ref_id="2", name="Stage B", ...) # Unique ref_ids
804
+
805
+
806
+ MISTAKE 5: Missing handlers
807
+ ----------------------------
808
+ All 7 handlers are REQUIRED for the engine to work:
809
+ StartWorkflowHandler, StartStageHandler, StartTaskHandler,
810
+ RunTaskHandler, CompleteTaskHandler, CompleteStageHandler, CompleteWorkflowHandler
811
+
812
+
813
+ MISTAKE 6: Forgetting requisite_stage_ref_ids for sequential stages
814
+ --------------------------------------------------------------------
815
+ WRONG - Stages may run in parallel, stage 2 won't have stage 1 outputs:
816
+ StageExecution(ref_id="1", context={"command": "git status"}, ...),
817
+ StageExecution(ref_id="2", context={"command": "echo {stdout}"}, ...), # No dependency!
818
+
819
+ RIGHT - Stage 2 waits for stage 1 and receives its outputs:
820
+ StageExecution(ref_id="1", context={"command": "git status"}, ...),
821
+ StageExecution(ref_id="2", requisite_stage_ref_ids={"1"}, context={"command": "echo {stdout}"}, ...),
822
+
823
+ Without requisite_stage_ref_ids, stages run in parallel and upstream outputs are NOT available.
824
+
825
+
826
+ MISTAKE 7: Using $variable instead of {variable} for upstream outputs
827
+ ----------------------------------------------------------------------
828
+ WRONG - Shell variable syntax doesn't work:
829
+ context={"command": "echo $stdout > file.txt"} # $stdout is shell variable, not context
830
+
831
+ RIGHT - Use {key} placeholders that ShellTask substitutes:
832
+ context={"command": "echo '{stdout}' > file.txt"} # {stdout} replaced by task
833
+
834
+
835
+ MISTAKE 8: Defining your own ShellTask instead of using built-in
836
+ -----------------------------------------------------------------
837
+ WRONG - Defining custom ShellTask that may lack features:
838
+ class ShellTask(Task):
839
+ def execute(self, stage):
840
+ command = stage.context.get("command")
841
+ result = subprocess.run(command, shell=True, ...)
842
+
843
+ RIGHT - Use the built-in ShellTask which handles everything:
844
+ from stabilize import ShellTask
845
+ registry.register("shell", ShellTask)
846
+
847
+ ===============================================================================
848
+ 8. COMPLETE EXAMPLE: SEQUENTIAL PIPELINE WITH ERROR HANDLING
849
+ ===============================================================================
850
+
851
+ #!/usr/bin/env python3
852
+ from stabilize import (
853
+ Workflow, StageExecution, TaskExecution, WorkflowStatus,
854
+ Orchestrator, QueueProcessor, SqliteQueue, SqliteWorkflowStore,
855
+ Task, TaskResult, TaskRegistry,
856
+ StartWorkflowHandler, StartStageHandler, StartTaskHandler,
857
+ RunTaskHandler, CompleteTaskHandler, CompleteStageHandler,
858
+ CompleteWorkflowHandler,
859
+ )
860
+
861
+
862
+ class ValidateTask(Task):
863
+ def execute(self, stage: StageExecution) -> TaskResult:
864
+ data = stage.context.get("data")
865
+ if not data:
866
+ return TaskResult.terminal(error="No data provided")
867
+ return TaskResult.success(outputs={"validated": True, "data": data})
868
+
869
+
870
+ class ProcessTask(Task):
871
+ def execute(self, stage: StageExecution) -> TaskResult:
872
+ data = stage.context.get("data")
873
+ validated = stage.context.get("validated")
874
+ if not validated:
875
+ return TaskResult.terminal(error="Data not validated")
876
+ result = data.upper()
877
+ return TaskResult.success(outputs={"processed_data": result})
878
+
879
+
880
+ class NotifyTask(Task):
881
+ def execute(self, stage: StageExecution) -> TaskResult:
882
+ processed = stage.context.get("processed_data")
883
+ # Even if notification fails, we don't want to fail the pipeline
884
+ try:
885
+ send_notification(processed)
886
+ return TaskResult.success(outputs={"notified": True})
887
+ except Exception as e:
888
+ # Use failed_continue to not halt the pipeline
889
+ return TaskResult.failed_continue(
890
+ error=f"Notification failed: {e}",
891
+ outputs={"notified": False}
892
+ )
893
+
894
+
895
+ def setup_pipeline_runner(store, queue):
896
+ registry = TaskRegistry()
897
+ registry.register("validate", ValidateTask)
898
+ registry.register("process", ProcessTask)
899
+ registry.register("notify", NotifyTask)
900
+
901
+ processor = QueueProcessor(queue)
902
+ handlers = [
903
+ StartWorkflowHandler(queue, store),
904
+ StartStageHandler(queue, store),
905
+ StartTaskHandler(queue, store),
906
+ RunTaskHandler(queue, store, registry),
907
+ CompleteTaskHandler(queue, store),
908
+ CompleteStageHandler(queue, store),
909
+ CompleteWorkflowHandler(queue, store),
910
+ ]
911
+ for h in handlers:
912
+ processor.register_handler(h)
913
+
914
+ return processor, Orchestrator(queue)
915
+
916
+
917
+ def main():
918
+ store = SqliteWorkflowStore("sqlite:///:memory:", create_tables=True)
919
+ queue = SqliteQueue("sqlite:///:memory:", table_name="queue_messages")
920
+ queue._create_table()
921
+ processor, orchestrator = setup_pipeline_runner(store, queue)
922
+
923
+ workflow = Workflow.create(
924
+ application="data-pipeline",
925
+ name="Process Data",
926
+ stages=[
927
+ StageExecution(
928
+ ref_id="validate",
929
+ type="validate",
930
+ name="Validate Input",
931
+ context={"data": "hello world"},
932
+ tasks=[TaskExecution.create("Validate", "validate", stage_start=True, stage_end=True)],
933
+ ),
934
+ StageExecution(
935
+ ref_id="process",
936
+ type="process",
937
+ name="Process Data",
938
+ requisite_stage_ref_ids={"validate"},
939
+ context={}, # Will receive 'data' from upstream
940
+ tasks=[TaskExecution.create("Process", "process", stage_start=True, stage_end=True)],
941
+ ),
942
+ StageExecution(
943
+ ref_id="notify",
944
+ type="notify",
945
+ name="Send Notification",
946
+ requisite_stage_ref_ids={"process"},
947
+ context={},
948
+ tasks=[TaskExecution.create("Notify", "notify", stage_start=True, stage_end=True)],
949
+ ),
950
+ ],
951
+ )
952
+
953
+ store.store(workflow)
954
+ orchestrator.start(workflow)
955
+ processor.process_all(timeout=30.0)
956
+
957
+ result = store.retrieve(workflow.id)
958
+ print(f"Final status: {result.status}")
959
+ for stage in result.stages:
960
+ print(f" {stage.name}: {stage.status} - {stage.outputs}")
961
+
962
+
963
+ if __name__ == "__main__":
964
+ main()
965
+
966
+ ===============================================================================
967
+ 9. COMPLETE EXAMPLE: PARALLEL STAGES WITH JOIN
968
+ ===============================================================================
969
+
970
+ #!/usr/bin/env python3
971
+ from stabilize import (
972
+ Workflow, StageExecution, TaskExecution,
973
+ Orchestrator, QueueProcessor, SqliteQueue, SqliteWorkflowStore,
974
+ Task, TaskResult, TaskRegistry,
975
+ StartWorkflowHandler, StartStageHandler, StartTaskHandler,
976
+ RunTaskHandler, CompleteTaskHandler, CompleteStageHandler,
977
+ CompleteWorkflowHandler,
978
+ )
979
+
980
+
981
+ class FetchDataTask(Task):
982
+ def execute(self, stage: StageExecution) -> TaskResult:
983
+ source = stage.context.get("source")
984
+ # Simulate fetching data from different sources
985
+ data = f"data_from_{source}"
986
+ return TaskResult.success(outputs={f"{source}_data": data})
987
+
988
+
989
+ class AggregateTask(Task):
990
+ def execute(self, stage: StageExecution) -> TaskResult:
991
+ # Collect data from all upstream parallel stages
992
+ api_data = stage.context.get("api_data")
993
+ db_data = stage.context.get("db_data")
994
+ cache_data = stage.context.get("cache_data")
995
+ combined = f"{api_data} + {db_data} + {cache_data}"
996
+ return TaskResult.success(outputs={"combined_data": combined})
997
+
998
+
999
+ def setup_pipeline_runner(store, queue):
1000
+ registry = TaskRegistry()
1001
+ registry.register("fetch", FetchDataTask)
1002
+ registry.register("aggregate", AggregateTask)
1003
+
1004
+ processor = QueueProcessor(queue)
1005
+ for h in [
1006
+ StartWorkflowHandler(queue, store),
1007
+ StartStageHandler(queue, store),
1008
+ StartTaskHandler(queue, store),
1009
+ RunTaskHandler(queue, store, registry),
1010
+ CompleteTaskHandler(queue, store),
1011
+ CompleteStageHandler(queue, store),
1012
+ CompleteWorkflowHandler(queue, store),
1013
+ ]:
1014
+ processor.register_handler(h)
1015
+
1016
+ return processor, Orchestrator(queue)
1017
+
1018
+
1019
+ def main():
1020
+ store = SqliteWorkflowStore("sqlite:///:memory:", create_tables=True)
1021
+ queue = SqliteQueue("sqlite:///:memory:", table_name="queue_messages")
1022
+ queue._create_table()
1023
+ processor, orchestrator = setup_pipeline_runner(store, queue)
1024
+
1025
+ # Start
1026
+ # / | \
1027
+ # API DB Cache <- Run in parallel
1028
+ # \ | /
1029
+ # Aggregate <- Join point
1030
+
1031
+ workflow = Workflow.create(
1032
+ application="parallel-fetch",
1033
+ name="Parallel Data Fetch",
1034
+ stages=[
1035
+ StageExecution(
1036
+ ref_id="api",
1037
+ type="fetch",
1038
+ name="Fetch from API",
1039
+ context={"source": "api"},
1040
+ tasks=[TaskExecution.create("Fetch API", "fetch", stage_start=True, stage_end=True)],
1041
+ ),
1042
+ StageExecution(
1043
+ ref_id="db",
1044
+ type="fetch",
1045
+ name="Fetch from Database",
1046
+ context={"source": "db"},
1047
+ tasks=[TaskExecution.create("Fetch DB", "fetch", stage_start=True, stage_end=True)],
1048
+ ),
1049
+ StageExecution(
1050
+ ref_id="cache",
1051
+ type="fetch",
1052
+ name="Fetch from Cache",
1053
+ context={"source": "cache"},
1054
+ tasks=[TaskExecution.create("Fetch Cache", "fetch", stage_start=True, stage_end=True)],
1055
+ ),
1056
+ StageExecution(
1057
+ ref_id="aggregate",
1058
+ type="aggregate",
1059
+ name="Aggregate Results",
1060
+ requisite_stage_ref_ids={"api", "db", "cache"}, # Wait for ALL three
1061
+ context={},
1062
+ tasks=[TaskExecution.create("Aggregate", "aggregate", stage_start=True, stage_end=True)],
1063
+ ),
1064
+ ],
1065
+ )
1066
+
1067
+ store.store(workflow)
1068
+ orchestrator.start(workflow)
1069
+ processor.process_all(timeout=30.0)
1070
+
1071
+ result = store.retrieve(workflow.id)
1072
+ print(f"Final status: {result.status}")
1073
+ print(f"Combined data: {result.stages[-1].outputs.get('combined_data')}")
1074
+
1075
+
1076
+ if __name__ == "__main__":
1077
+ main()
1078
+
1079
+ ===============================================================================
1080
+ 10. COMPLETE IMPORTS REFERENCE
1081
+ ===============================================================================
1082
+
1083
+ # RECOMMENDED: Single consolidated import (most common classes)
1084
+ from stabilize import (
1085
+ # Core models
1086
+ Workflow, StageExecution, TaskExecution, WorkflowStatus,
1087
+ # Infrastructure
1088
+ Orchestrator, QueueProcessor, SqliteQueue, SqliteWorkflowStore,
1089
+ # Tasks
1090
+ Task, RetryableTask, TaskResult, TaskRegistry,
1091
+ ShellTask, HTTPTask, DockerTask, SSHTask, HighwayTask,
1092
+ # Handlers (all 7 required)
1093
+ StartWorkflowHandler, StartStageHandler, StartTaskHandler,
1094
+ RunTaskHandler, CompleteTaskHandler, CompleteStageHandler,
1095
+ CompleteWorkflowHandler,
1096
+ )
1097
+
1098
+ # Advanced imports (for specialized use cases)
1099
+ from stabilize.persistence.store import WorkflowStore # Abstract base for custom stores
1100
+ from stabilize.queue.queue import Queue # Abstract base for custom queues
1101
+ from stabilize.tasks.interface import ( # Advanced task types
1102
+ SkippableTask, OverridableTimeoutRetryableTask,
1103
+ CallableTask, NoOpTask, WaitTask,
1104
+ )
1105
+ from stabilize.tasks.result import TaskResultBuilder # For complex result building
1106
+
1107
+ # Verification System (NEW)
1108
+ from stabilize.verification import (
1109
+ VerifyResult, VerifyStatus, Verifier, OutputVerifier, CallableVerifier,
1110
+ )
1111
+
1112
+ # Structured Conditions (NEW)
1113
+ from stabilize.conditions import (
1114
+ Condition, ConditionSet, ConditionType, ConditionReason,
1115
+ )
1116
+
1117
+ # Assertion Helpers (NEW)
1118
+ from stabilize.assertions import (
1119
+ assert_context, assert_context_type, assert_context_in,
1120
+ assert_output, assert_output_type,
1121
+ assert_config, assert_verified, assert_true,
1122
+ assert_stage_ready, assert_not_none, assert_non_empty,
1123
+ ContextError, OutputError, ConfigError, VerificationError,
1124
+ PreconditionError, StageNotReadyError,
1125
+ )
1126
+
1127
+ # Configuration Validation (NEW)
1128
+ from stabilize.config_validation import (
1129
+ validate_context, validate_outputs, is_valid,
1130
+ SchemaValidator, ValidationError,
1131
+ SHELL_TASK_SCHEMA, WAIT_TASK_SCHEMA,
1132
+ )
1133
+
1134
+ # Error Handling & Reliability (NEW)
1135
+ from stabilize.errors import (
1136
+ TransientError, PermanentError, TaskTimeoutError,
1137
+ is_transient, is_permanent,
1138
+ )
1139
+ from stabilize.models.status import (
1140
+ can_transition, validate_transition, InvalidStateTransitionError,
1141
+ )
1142
+ from stabilize.recovery import WorkflowRecovery, recover_on_startup
1143
+
1144
+ ===============================================================================
1145
+ 11. VERIFICATION SYSTEM (NEW)
1146
+ ===============================================================================
1147
+
1148
+ The verification system validates stage outputs after task completion,
1149
+ before downstream stages start. This ensures data integrity in pipelines.
1150
+
1151
+ 11.1 VerifyResult - Verification Result Type
1152
+ ---------------------------------------------
1153
+ from stabilize.verification import VerifyResult, VerifyStatus
1154
+
1155
+ # Create results using factory methods:
1156
+ VerifyResult.ok(message="All checks passed") # Verification passed
1157
+ VerifyResult.retry(message="Still waiting", details={}) # Will retry
1158
+ VerifyResult.failed(message="Check failed", details={}) # Terminal failure
1159
+ VerifyResult.skipped(message="Not applicable") # Skipped
1160
+
1161
+ # Check result status:
1162
+ result.is_ok # True if verification passed
1163
+ result.is_retry # True if should retry
1164
+ result.is_failed # True if terminal failure
1165
+ result.is_terminal # True if OK, FAILED, or SKIPPED (won't retry)
1166
+
1167
+ 11.2 OutputVerifier - Check Required Outputs
1168
+ --------------------------------------------
1169
+ from stabilize.verification import OutputVerifier
1170
+
1171
+ # Verify that specific outputs exist with correct types
1172
+ verifier = OutputVerifier(
1173
+ required_keys=["url", "status_code"],
1174
+ type_checks={"status_code": int},
1175
+ )
1176
+
1177
+ class MyTask(Task):
1178
+ def execute(self, stage: StageExecution) -> TaskResult:
1179
+ # ... task logic ...
1180
+ result = verifier.verify(stage)
1181
+ if not result.is_ok:
1182
+ return TaskResult.terminal(result.message)
1183
+ return TaskResult.success(outputs={"url": url, "status_code": 200})
1184
+
1185
+ 11.3 Custom Verifier
1186
+ ---------------------
1187
+ from stabilize.verification import Verifier, VerifyResult
1188
+
1189
+ class URLVerifier(Verifier):
1190
+ def verify(self, stage: StageExecution) -> VerifyResult:
1191
+ url = stage.outputs.get("url")
1192
+ if not url:
1193
+ return VerifyResult.failed("No URL in outputs")
1194
+
1195
+ # Check if URL is reachable
1196
+ try:
1197
+ response = requests.head(url, timeout=5)
1198
+ if response.ok:
1199
+ return VerifyResult.ok(f"URL {url} is reachable")
1200
+ return VerifyResult.retry(f"URL returned {response.status_code}")
1201
+ except Exception as e:
1202
+ return VerifyResult.retry(f"URL check failed: {e}")
1203
+
1204
+ @property
1205
+ def max_retries(self) -> int:
1206
+ return 5 # Override default of 3
1207
+
1208
+ @property
1209
+ def retry_delay_seconds(self) -> float:
1210
+ return 2.0 # Override default of 1.0
1211
+
1212
+ ===============================================================================
1213
+ 12. STRUCTURED CONDITIONS (NEW)
1214
+ ===============================================================================
1215
+
1216
+ Conditions provide detailed status information with reasons and timestamps,
1217
+ inspired by Kubernetes conditions.
1218
+
1219
+ 12.1 Condition - Status with Context
1220
+ ------------------------------------
1221
+ from stabilize.conditions import Condition, ConditionType, ConditionReason
1222
+
1223
+ # Create conditions using factory methods:
1224
+ Condition.ready(status=True, reason=ConditionReason.TASKS_SUCCEEDED, message="Done")
1225
+ Condition.progressing(status=True, reason=ConditionReason.IN_PROGRESS)
1226
+ Condition.verified(status=True, reason=ConditionReason.VERIFICATION_PASSED)
1227
+ Condition.failed(reason=ConditionReason.TASK_FAILED, message="Task timed out")
1228
+ Condition.config_valid(status=True)
1229
+
1230
+ # Update a condition (immutable - returns new instance)
1231
+ updated = condition.update(status=False, reason=ConditionReason.IN_PROGRESS)
1232
+
1233
+ # Serialize for storage
1234
+ data = condition.to_dict() # {"type": "Ready", "status": true, ...}
1235
+ condition = Condition.from_dict(data)
1236
+
1237
+ 12.2 ConditionSet - Manage Multiple Conditions
1238
+ ----------------------------------------------
1239
+ from stabilize.conditions import ConditionSet
1240
+
1241
+ conditions = ConditionSet()
1242
+
1243
+ # Set/update conditions
1244
+ conditions.set(Condition.ready(True, ConditionReason.TASKS_SUCCEEDED))
1245
+ conditions.set(Condition.progressing(False, ConditionReason.STAGE_COMPLETED))
1246
+
1247
+ # Quick status checks
1248
+ conditions.is_ready # True if Ready condition is True
1249
+ conditions.is_progressing # True if Progressing condition is True
1250
+ conditions.is_verified # True if Verified condition is True
1251
+ conditions.has_failed # True if Failed condition exists
1252
+ conditions.is_config_valid # True if ConfigValid is True (default: True)
1253
+
1254
+ # Get specific condition
1255
+ ready = conditions.get(ConditionType.READY)
1256
+ if ready:
1257
+ print(f"Ready: {ready.status}, Reason: {ready.reason}")
1258
+
1259
+ # Serialize
1260
+ data_list = conditions.to_list()
1261
+ conditions = ConditionSet.from_list(data_list)
1262
+
1263
+ ===============================================================================
1264
+ 13. ASSERTION HELPERS (NEW)
1265
+ ===============================================================================
1266
+
1267
+ Assertion helpers provide clean error handling with descriptive exceptions.
1268
+
1269
+ 13.1 Context Assertions
1270
+ -----------------------
1271
+ from stabilize.assertions import (
1272
+ assert_context, assert_context_type, assert_context_in,
1273
+ ContextError,
1274
+ )
1275
+
1276
+ class MyTask(Task):
1277
+ def execute(self, stage: StageExecution) -> TaskResult:
1278
+ # Assert key exists and get value (raises ContextError if missing)
1279
+ api_key = assert_context(stage, "api_key", "API key is required")
1280
+
1281
+ # Assert key exists with specific type
1282
+ timeout = assert_context_type(stage, "timeout", int, "Timeout must be int")
1283
+
1284
+ # Assert value is in allowed list
1285
+ env = assert_context_in(stage, "env", ["dev", "staging", "prod"])
1286
+
1287
+ # ... rest of task logic
1288
+ return TaskResult.success()
1289
+
1290
+ 13.2 Output Assertions
1291
+ ----------------------
1292
+ from stabilize.assertions import assert_output, assert_output_type, OutputError
1293
+
1294
+ # Assert output exists
1295
+ result = assert_output(stage, "deployment_id")
1296
+
1297
+ # Assert output with type
1298
+ count = assert_output_type(stage, "item_count", int)
1299
+
1300
+ 13.3 Configuration & Verification Assertions
1301
+ --------------------------------------------
1302
+ from stabilize.assertions import assert_config, assert_verified, ConfigError
1303
+
1304
+ # Assert configuration is valid
1305
+ assert_config(timeout > 0, "Timeout must be positive", field="timeout")
1306
+
1307
+ # Assert verification condition
1308
+ assert_verified(response.ok, "API check failed", details={"status": response.status_code})
1309
+
1310
+ 13.4 Stage Readiness Assertions
1311
+ -------------------------------
1312
+ from stabilize.assertions import assert_stage_ready, assert_no_upstream_failures
1313
+
1314
+ # Assert all upstream stages complete
1315
+ assert_stage_ready(stage, "Cannot start: upstream incomplete")
1316
+
1317
+ # Assert no upstream failures
1318
+ assert_no_upstream_failures(stage)
1319
+
1320
+ 13.5 General Assertions
1321
+ -----------------------
1322
+ from stabilize.assertions import assert_true, assert_not_none, assert_non_empty
1323
+
1324
+ assert_true(condition, "Condition not met")
1325
+ user = assert_not_none(get_user(id), f"User {id} not found")
1326
+ items = assert_non_empty(stage.context.get("items", []), "Items required")
1327
+
1328
+ 13.6 Exception Hierarchy
1329
+ ------------------------
1330
+ StabilizeError (base)
1331
+ ├── StabilizeFatalError (unrecoverable - halts pipeline)
1332
+ │ ├── ContextError (missing/invalid context)
1333
+ │ └── ConfigError (invalid configuration)
1334
+ └── StabilizeExpectedError (may allow retry)
1335
+ ├── PreconditionError (general precondition)
1336
+ ├── OutputError (missing/invalid output)
1337
+ ├── VerificationError (verification failed)
1338
+ └── StageNotReadyError (upstream incomplete)
1339
+
1340
+ ===============================================================================
1341
+ 14. CONFIGURATION VALIDATION (NEW)
1342
+ ===============================================================================
1343
+
1344
+ JSON Schema-based validation for stage contexts and configurations.
1345
+
1346
+ 14.1 Validate Context
1347
+ ---------------------
1348
+ from stabilize.config_validation import validate_context, ValidationError
1349
+
1350
+ DEPLOY_SCHEMA = {
1351
+ "type": "object",
1352
+ "required": ["cluster", "image"],
1353
+ "properties": {
1354
+ "cluster": {"type": "string", "minLength": 1},
1355
+ "image": {"type": "string", "pattern": r"^[a-z0-9./-]+:[a-z0-9.-]+$"},
1356
+ "replicas": {"type": "integer", "minimum": 1, "default": 1},
1357
+ "timeout": {"type": "integer", "minimum": 0},
1358
+ },
1359
+ }
1360
+
1361
+ class DeployTask(Task):
1362
+ def execute(self, stage: StageExecution) -> TaskResult:
1363
+ errors = validate_context(stage.context, DEPLOY_SCHEMA)
1364
+ if errors:
1365
+ return TaskResult.terminal(f"Invalid config: {errors[0]}")
1366
+
1367
+ # Config is valid, proceed
1368
+ cluster = stage.context["cluster"]
1369
+ image = stage.context["image"]
1370
+ # ...
1371
+
1372
+ 14.2 Built-in Schemas
1373
+ ---------------------
1374
+ from stabilize.config_validation import SHELL_TASK_SCHEMA, WAIT_TASK_SCHEMA
1375
+
1376
+ # SHELL_TASK_SCHEMA validates: command (required), timeout, cwd, env, etc.
1377
+ # WAIT_TASK_SCHEMA validates: waitTime (required, >= 0)
1378
+
1379
+ 14.3 Quick Validation Check
1380
+ ---------------------------
1381
+ from stabilize.config_validation import is_valid
1382
+
1383
+ if not is_valid(stage.context, DEPLOY_SCHEMA):
1384
+ return TaskResult.terminal("Invalid configuration")
1385
+
1386
+ 14.4 Supported Validations
1387
+ --------------------------
1388
+ Type: "type": "string" | "integer" | "number" | "boolean" | "array" | "object" | "null"
1389
+ Union: "type": ["string", "integer"]
1390
+ Required: "required": ["field1", "field2"]
1391
+ Enum: "enum": ["value1", "value2"]
1392
+ Const: "const": "fixed_value"
1393
+
1394
+ String: "minLength", "maxLength", "pattern"
1395
+ Number: "minimum", "maximum", "exclusiveMinimum", "exclusiveMaximum", "multipleOf"
1396
+ Array: "minItems", "maxItems", "uniqueItems", "items" (schema for array elements)
1397
+ Object: "properties", "additionalProperties", "minProperties", "maxProperties"
1398
+
1399
+ ===============================================================================
1400
+ 15. ERROR HANDLING & RELIABILITY (NEW)
1401
+ ===============================================================================
1402
+
1403
+ Stabilize has enterprise-grade reliability features for production deployments.
1404
+
1405
+ 15.1 Transient vs Permanent Errors
1406
+ ----------------------------------
1407
+ from stabilize.errors import TransientError, PermanentError, is_transient
1408
+
1409
+ # Transient errors are automatically retried with exponential backoff
1410
+ raise TransientError("Connection timeout") # Will retry
1411
+
1412
+ # Permanent errors immediately fail the task
1413
+ raise PermanentError("Invalid input") # No retry, marks task as terminal
1414
+
1415
+ # Classification helper - checks exception class name for keywords
1416
+ is_transient(ConnectionError("timeout")) # True - has "connection"
1417
+ is_transient(TimeoutError()) # True - has "timeout"
1418
+ is_transient(ValueError("bad input")) # False - standard exception
1419
+
1420
+ Keywords that make an error transient:
1421
+ - "timeout", "temporary", "transient", "connection", "network"
1422
+ - "unavailable", "retry", "throttl", "rate", "limit", "5xx"
1423
+
1424
+ 15.2 Automatic Retry with Exponential Backoff
1425
+ ---------------------------------------------
1426
+ Transient errors are retried with exponential backoff:
1427
+ - Attempt 1: ~1 second delay
1428
+ - Attempt 2: ~2 seconds delay
1429
+ - Attempt 3: ~4 seconds delay
1430
+ - ...continues doubling up to 60 seconds max
1431
+ - ±25% jitter added to prevent thundering herd
1432
+
1433
+ Maximum 10 retry attempts before marking as terminal.
1434
+
1435
+ 15.3 Message Deduplication (Idempotency)
1436
+ ----------------------------------------
1437
+ Messages are deduplicated to prevent duplicate processing:
1438
+
1439
+ # Automatic - no code changes needed
1440
+ # Each message has a unique ID tracked in processed_messages table
1441
+ # Re-processing the same message is skipped
1442
+
1443
+ This ensures:
1444
+ - Crash recovery doesn't cause duplicate side effects
1445
+ - Network retries don't duplicate work
1446
+ - At-least-once delivery becomes effectively-once processing
1447
+
1448
+ 15.4 State Transition Validation
1449
+ --------------------------------
1450
+ from stabilize.models.status import can_transition, validate_transition
1451
+
1452
+ # Check if transition is valid
1453
+ can_transition(WorkflowStatus.NOT_STARTED, WorkflowStatus.RUNNING) # True
1454
+ can_transition(WorkflowStatus.SUCCEEDED, WorkflowStatus.RUNNING) # False
1455
+
1456
+ # Validate with exception
1457
+ validate_transition(
1458
+ WorkflowStatus.SUCCEEDED,
1459
+ WorkflowStatus.RUNNING,
1460
+ entity_type="workflow",
1461
+ entity_id="wf-123",
1462
+ ) # Raises InvalidStateTransitionError
1463
+
1464
+ Valid transitions:
1465
+ NOT_STARTED → RUNNING, CANCELED, SKIPPED
1466
+ RUNNING → SUCCEEDED, FAILED_CONTINUE, TERMINAL, CANCELED, PAUSED, STOPPED
1467
+ PAUSED → RUNNING, CANCELED
1468
+ Terminal states (SUCCEEDED, TERMINAL, CANCELED, STOPPED, SKIPPED) → no transitions
1469
+
1470
+ 15.5 Timeout Enforcement
1471
+ ------------------------
1472
+ Tasks are executed with timeout enforcement using thread interruption:
1473
+
1474
+ # Default timeout: 5 minutes for regular tasks
1475
+ # RetryableTask can override via get_dynamic_timeout()
1476
+
1477
+ class MyRetryableTask(RetryableTask):
1478
+ def get_timeout(self) -> timedelta:
1479
+ return timedelta(minutes=30)
1480
+
1481
+ def get_dynamic_timeout(self, stage: StageExecution) -> timedelta:
1482
+ # Can use stage context to determine timeout
1483
+ return timedelta(milliseconds=stage.context.get("stageTimeoutMs", 300000))
1484
+
1485
+ # When timeout occurs, task.on_timeout(stage) is called if defined
1486
+ def on_timeout(self, stage: StageExecution) -> TaskResult | None:
1487
+ # Cleanup and return partial result, or None for default behavior
1488
+ return TaskResult.failed_continue(error="Timed out", outputs={"partial": data})
1489
+
1490
+ 15.6 Crash Recovery
1491
+ -------------------
1492
+ from stabilize.recovery import WorkflowRecovery, recover_on_startup
1493
+
1494
+ # At application startup, recover in-progress workflows
1495
+ recovery = WorkflowRecovery(store, queue)
1496
+ results = recovery.recover_pending_workflows()
1497
+
1498
+ # Convenience function
1499
+ recover_on_startup(store, queue) # Returns list of RecoveryResult
1500
+
1501
+ Recovery automatically:
1502
+ - Finds workflows in RUNNING/NOT_STARTED state
1503
+ - Re-queues their current stages for continuation
1504
+ - Uses idempotency to prevent duplicate work
1505
+
1506
+ ===============================================================================
1507
+ END OF REFERENCE
1508
+ ===============================================================================
1509
+ '''
1510
+
1511
+ # Migration tracking table
1512
+ MIGRATION_TABLE = "stabilize_migrations"
1513
+
1514
+
1515
+ def load_config() -> dict[str, Any]:
1516
+ """Load database config from mg.yaml or environment."""
1517
+ db_url = os.environ.get("MG_DATABASE_URL")
1518
+ if db_url:
1519
+ return parse_db_url(db_url)
1520
+
1521
+ # Try to load mg.yaml
1522
+ mg_yaml = Path("mg.yaml")
1523
+ if mg_yaml.exists():
1524
+ try:
1525
+ import yaml # type: ignore[import-untyped]
1526
+
1527
+ with open(mg_yaml) as f:
1528
+ config = yaml.safe_load(f)
1529
+ db_config: dict[str, Any] = config.get("database", {}) if config else {}
1530
+ return db_config
1531
+ except ImportError:
1532
+ print("Warning: PyYAML not installed, cannot read mg.yaml")
1533
+ print("Set MG_DATABASE_URL environment variable instead")
1534
+ sys.exit(1)
1535
+
1536
+ print("Error: No database configuration found")
1537
+ print("Either create mg.yaml or set MG_DATABASE_URL environment variable")
1538
+ sys.exit(1)
1539
+
1540
+
1541
+ def parse_db_url(url: str) -> dict[str, Any]:
1542
+ """Parse a database URL into connection parameters."""
1543
+ # postgres://user:pass@host:port/dbname
1544
+ pattern = r"postgres(?:ql)?://(?:(?P<user>[^:]+)(?::(?P<password>[^@]+))?@)?(?P<host>[^:/]+)(?::(?P<port>\d+))?/(?P<dbname>[^?]+)"
1545
+ match = re.match(pattern, url)
1546
+ if not match:
1547
+ print(f"Error: Invalid database URL: {url}")
1548
+ sys.exit(1)
1549
+
1550
+ return {
1551
+ "host": match.group("host"),
1552
+ "port": int(match.group("port") or 5432),
1553
+ "user": match.group("user") or "postgres",
1554
+ "password": match.group("password") or "",
1555
+ "dbname": match.group("dbname"),
1556
+ }
1557
+
1558
+
1559
+ def get_migrations() -> list[tuple[str, str]]:
1560
+ """Get all migration files from the package."""
1561
+ migrations_pkg = files("stabilize.migrations")
1562
+ migrations = []
1563
+
1564
+ for item in migrations_pkg.iterdir():
1565
+ if item.name.endswith(".sql"):
1566
+ content = item.read_text()
1567
+ migrations.append((item.name, content))
1568
+
1569
+ # Sort by filename (ULID prefix ensures chronological order)
1570
+ migrations.sort(key=lambda x: x[0])
1571
+ return migrations
1572
+
1573
+
1574
+ def extract_up_migration(content: str) -> str:
1575
+ """Extract the UP migration from SQL content."""
1576
+ # Find content between "-- migrate: up" and "-- migrate: down"
1577
+ up_match = re.search(
1578
+ r"--\s*migrate:\s*up\s*\n(.*?)(?:--\s*migrate:\s*down|$)",
1579
+ content,
1580
+ re.DOTALL | re.IGNORECASE,
1581
+ )
1582
+ if up_match:
1583
+ return up_match.group(1).strip()
1584
+ return content
1585
+
1586
+
1587
+ def compute_checksum(content: str) -> str:
1588
+ """Compute MD5 checksum of migration content."""
1589
+ return hashlib.md5(content.encode()).hexdigest()
1590
+
1591
+
1592
+ def mg_up(db_url: str | None = None) -> None:
1593
+ """Apply pending migrations to PostgreSQL database."""
1594
+ try:
1595
+ import psycopg
1596
+ except ImportError:
1597
+ print("Error: psycopg not installed")
1598
+ print("Install with: pip install stabilize[postgres]")
1599
+ sys.exit(1)
1600
+
1601
+ # Load config
1602
+ if db_url:
1603
+ config = parse_db_url(db_url)
1604
+ else:
1605
+ config = load_config()
1606
+
1607
+ # Connect to database
1608
+ conninfo = (
1609
+ f"host={config['host']} port={config.get('port', 5432)} "
1610
+ f"user={config.get('user', 'postgres')} password={config.get('password', '')} "
1611
+ f"dbname={config['dbname']}"
1612
+ )
1613
+
1614
+ try:
1615
+ with psycopg.connect(conninfo) as conn:
1616
+ with conn.cursor() as cur:
1617
+ # Ensure migration tracking table exists
1618
+ cur.execute(
1619
+ f"""
1620
+ CREATE TABLE IF NOT EXISTS {MIGRATION_TABLE} (
1621
+ id SERIAL PRIMARY KEY,
1622
+ name VARCHAR(255) NOT NULL UNIQUE,
1623
+ checksum VARCHAR(32) NOT NULL,
1624
+ applied_at TIMESTAMP DEFAULT NOW()
1625
+ )
1626
+ """
1627
+ )
1628
+ conn.commit()
1629
+
1630
+ # Get applied migrations
1631
+ cur.execute(f"SELECT name, checksum FROM {MIGRATION_TABLE}")
1632
+ applied = {row[0]: row[1] for row in cur.fetchall()}
1633
+
1634
+ # Get available migrations
1635
+ migrations = get_migrations()
1636
+
1637
+ if not migrations:
1638
+ print("No migrations found in package")
1639
+ return
1640
+
1641
+ # Apply pending migrations
1642
+ pending = 0
1643
+ for name, content in migrations:
1644
+ if name in applied:
1645
+ # Verify checksum
1646
+ expected = compute_checksum(content)
1647
+ if applied[name] != expected:
1648
+ print(f"Warning: Checksum mismatch for {name}")
1649
+ continue
1650
+
1651
+ pending += 1
1652
+ print(f"Applying: {name}")
1653
+
1654
+ up_sql = extract_up_migration(content)
1655
+ cur.execute(up_sql)
1656
+
1657
+ checksum = compute_checksum(content)
1658
+ cur.execute(
1659
+ f"INSERT INTO {MIGRATION_TABLE} (name, checksum) VALUES (%s, %s)",
1660
+ (name, checksum),
1661
+ )
1662
+ conn.commit()
1663
+
1664
+ if pending == 0:
1665
+ print("All migrations already applied")
1666
+ else:
1667
+ print(f"Applied {pending} migration(s)")
1668
+
1669
+ except psycopg.Error as e:
1670
+ print(f"Database error: {e}")
1671
+ sys.exit(1)
1672
+
1673
+
1674
+ def prompt() -> None:
1675
+ """Output comprehensive documentation for AI coding agents."""
1676
+ print(PROMPT_TEXT)
1677
+
1678
+
1679
+ def monitor(
1680
+ db_url: str | None,
1681
+ app_filter: str | None,
1682
+ refresh_interval: int,
1683
+ status_filter: str,
1684
+ ) -> None:
1685
+ """Launch the real-time monitoring dashboard."""
1686
+ from stabilize.monitor import run_monitor
1687
+
1688
+ # Create store based on db_url
1689
+ if db_url is None:
1690
+ # Try to load from config
1691
+ try:
1692
+ config = load_config()
1693
+ db_url = (
1694
+ f"postgres://{config.get('user', 'postgres')}:"
1695
+ f"{config.get('password', '')}@"
1696
+ f"{config.get('host', 'localhost')}:"
1697
+ f"{config.get('port', 5432)}/"
1698
+ f"{config.get('dbname', 'stabilize')}"
1699
+ )
1700
+ except SystemExit:
1701
+ print("Error: No database configuration found.")
1702
+ print("Provide --db-url or set up mg.yaml / MG_DATABASE_URL")
1703
+ sys.exit(1)
1704
+
1705
+ # Determine store type from URL
1706
+ store: WorkflowStore
1707
+ queue: Queue | None = None
1708
+ if db_url.startswith("sqlite"):
1709
+ from stabilize.persistence.sqlite import SqliteWorkflowStore
1710
+ from stabilize.queue.sqlite_queue import SqliteQueue
1711
+
1712
+ store = SqliteWorkflowStore(db_url, create_tables=False)
1713
+ # Try to create queue for stats
1714
+ try:
1715
+ queue = SqliteQueue(db_url, table_name="queue_messages")
1716
+ except Exception:
1717
+ queue = None
1718
+ elif db_url.startswith("postgres"):
1719
+ try:
1720
+ from stabilize.persistence.postgres import PostgresWorkflowStore
1721
+ from stabilize.queue.queue import PostgresQueue
1722
+
1723
+ store = PostgresWorkflowStore(db_url)
1724
+ try:
1725
+ queue = PostgresQueue(db_url)
1726
+ except Exception:
1727
+ queue = None
1728
+ except ImportError:
1729
+ print("Error: psycopg not installed")
1730
+ print("Install with: pip install stabilize[postgres]")
1731
+ sys.exit(1)
1732
+ else:
1733
+ print(f"Error: Unsupported database URL: {db_url}")
1734
+ print("Use sqlite:///path or postgres://...")
1735
+ sys.exit(1)
1736
+
1737
+ print(f"Connecting to {db_url[:50]}...")
1738
+ run_monitor(
1739
+ store=store,
1740
+ queue=queue,
1741
+ app_filter=app_filter,
1742
+ refresh_interval=refresh_interval,
1743
+ status_filter=status_filter,
1744
+ )
1745
+
1746
+
1747
+ def mg_status(db_url: str | None = None) -> None:
1748
+ """Show migration status."""
1749
+ try:
1750
+ import psycopg
1751
+ except ImportError:
1752
+ print("Error: psycopg not installed")
1753
+ print("Install with: pip install stabilize[postgres]")
1754
+ sys.exit(1)
1755
+
1756
+ # Load config
1757
+ if db_url:
1758
+ config = parse_db_url(db_url)
1759
+ else:
1760
+ config = load_config()
1761
+
1762
+ conninfo = (
1763
+ f"host={config['host']} port={config.get('port', 5432)} "
1764
+ f"user={config.get('user', 'postgres')} password={config.get('password', '')} "
1765
+ f"dbname={config['dbname']}"
1766
+ )
1767
+
1768
+ try:
1769
+ with psycopg.connect(conninfo) as conn:
1770
+ with conn.cursor() as cur:
1771
+ # Check if tracking table exists
1772
+ cur.execute(
1773
+ """
1774
+ SELECT EXISTS (
1775
+ SELECT FROM information_schema.tables
1776
+ WHERE table_name = %s
1777
+ )
1778
+ """,
1779
+ (MIGRATION_TABLE,),
1780
+ )
1781
+ row = cur.fetchone()
1782
+ table_exists = row[0] if row else False
1783
+
1784
+ applied = {}
1785
+ if table_exists:
1786
+ cur.execute(f"SELECT name, checksum, applied_at FROM {MIGRATION_TABLE} ORDER BY applied_at")
1787
+ applied = {row[0]: (row[1], row[2]) for row in cur.fetchall()}
1788
+
1789
+ migrations = get_migrations()
1790
+
1791
+ print(f"{'Status':<10} {'Migration':<50} {'Applied At'}")
1792
+ print("-" * 80)
1793
+
1794
+ for name, content in migrations:
1795
+ if name in applied:
1796
+ checksum, applied_at = applied[name]
1797
+ expected = compute_checksum(content)
1798
+ status = "applied" if checksum == expected else "MISMATCH"
1799
+ print(f"{status:<10} {name:<50} {applied_at}")
1800
+ else:
1801
+ print(f"{'pending':<10} {name:<50} -")
1802
+
1803
+ except psycopg.Error as e:
1804
+ print(f"Database error: {e}")
1805
+ sys.exit(1)
1806
+
1807
+
1808
+ def main() -> None:
1809
+ """Main CLI entry point."""
1810
+ parser = argparse.ArgumentParser(
1811
+ prog="stabilize",
1812
+ description="Stabilize - Workflow Engine CLI",
1813
+ )
1814
+ subparsers = parser.add_subparsers(dest="command", help="Available commands")
1815
+
1816
+ # mg-up command
1817
+ up_parser = subparsers.add_parser("mg-up", help="Apply pending PostgreSQL migrations")
1818
+ up_parser.add_argument(
1819
+ "--db-url",
1820
+ help="Database URL (postgres://user:pass@host:port/dbname)",
1821
+ )
1822
+
1823
+ # mg-status command
1824
+ status_parser = subparsers.add_parser("mg-status", help="Show migration status")
1825
+ status_parser.add_argument(
1826
+ "--db-url",
1827
+ help="Database URL (postgres://user:pass@host:port/dbname)",
1828
+ )
1829
+
1830
+ # prompt command
1831
+ subparsers.add_parser(
1832
+ "prompt",
1833
+ help="Output comprehensive documentation for pipeline code generation",
1834
+ )
1835
+
1836
+ # monitor command
1837
+ monitor_parser = subparsers.add_parser(
1838
+ "monitor",
1839
+ help="Real-time workflow monitoring dashboard (htop-like)",
1840
+ )
1841
+ monitor_parser.add_argument(
1842
+ "--app",
1843
+ help="Filter by application name",
1844
+ )
1845
+ monitor_parser.add_argument(
1846
+ "--db-url",
1847
+ help="Database URL (postgres://... or sqlite:///...)",
1848
+ )
1849
+ monitor_parser.add_argument(
1850
+ "--refresh",
1851
+ type=int,
1852
+ default=2,
1853
+ help="Refresh interval in seconds (default: 2)",
1854
+ )
1855
+ monitor_parser.add_argument(
1856
+ "--status",
1857
+ choices=["all", "running", "failed", "recent"],
1858
+ default="all",
1859
+ help="Filter workflows by status (default: all)",
1860
+ )
1861
+
1862
+ args = parser.parse_args()
1863
+
1864
+ if args.command == "mg-up":
1865
+ mg_up(args.db_url)
1866
+ elif args.command == "mg-status":
1867
+ mg_status(args.db_url)
1868
+ elif args.command == "prompt":
1869
+ prompt()
1870
+ elif args.command == "monitor":
1871
+ monitor(args.db_url, args.app, args.refresh, args.status)
1872
+ else:
1873
+ parser.print_help()
1874
+ sys.exit(1)
1875
+
1876
+
1877
+ if __name__ == "__main__":
1878
+ main()