stabilize 0.9.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- stabilize/__init__.py +29 -0
- stabilize/cli.py +1193 -0
- stabilize/context/__init__.py +7 -0
- stabilize/context/stage_context.py +170 -0
- stabilize/dag/__init__.py +15 -0
- stabilize/dag/graph.py +215 -0
- stabilize/dag/topological.py +199 -0
- stabilize/examples/__init__.py +1 -0
- stabilize/examples/docker-example.py +759 -0
- stabilize/examples/golden-standard-expected-result.txt +1 -0
- stabilize/examples/golden-standard.py +488 -0
- stabilize/examples/http-example.py +606 -0
- stabilize/examples/llama-example.py +662 -0
- stabilize/examples/python-example.py +731 -0
- stabilize/examples/shell-example.py +399 -0
- stabilize/examples/ssh-example.py +603 -0
- stabilize/handlers/__init__.py +53 -0
- stabilize/handlers/base.py +226 -0
- stabilize/handlers/complete_stage.py +209 -0
- stabilize/handlers/complete_task.py +75 -0
- stabilize/handlers/complete_workflow.py +150 -0
- stabilize/handlers/run_task.py +369 -0
- stabilize/handlers/start_stage.py +262 -0
- stabilize/handlers/start_task.py +74 -0
- stabilize/handlers/start_workflow.py +136 -0
- stabilize/launcher.py +307 -0
- stabilize/migrations/01KDQ4N9QPJ6Q4MCV3V9GHWPV4_initial_schema.sql +97 -0
- stabilize/migrations/01KDRK3TXW4R2GERC1WBCQYJGG_rag_embeddings.sql +25 -0
- stabilize/migrations/__init__.py +1 -0
- stabilize/models/__init__.py +15 -0
- stabilize/models/stage.py +389 -0
- stabilize/models/status.py +146 -0
- stabilize/models/task.py +125 -0
- stabilize/models/workflow.py +317 -0
- stabilize/orchestrator.py +113 -0
- stabilize/persistence/__init__.py +28 -0
- stabilize/persistence/connection.py +185 -0
- stabilize/persistence/factory.py +136 -0
- stabilize/persistence/memory.py +214 -0
- stabilize/persistence/postgres.py +655 -0
- stabilize/persistence/sqlite.py +674 -0
- stabilize/persistence/store.py +235 -0
- stabilize/queue/__init__.py +59 -0
- stabilize/queue/messages.py +377 -0
- stabilize/queue/processor.py +312 -0
- stabilize/queue/queue.py +526 -0
- stabilize/queue/sqlite_queue.py +354 -0
- stabilize/rag/__init__.py +19 -0
- stabilize/rag/assistant.py +459 -0
- stabilize/rag/cache.py +294 -0
- stabilize/stages/__init__.py +11 -0
- stabilize/stages/builder.py +253 -0
- stabilize/tasks/__init__.py +19 -0
- stabilize/tasks/interface.py +335 -0
- stabilize/tasks/registry.py +255 -0
- stabilize/tasks/result.py +283 -0
- stabilize-0.9.2.dist-info/METADATA +301 -0
- stabilize-0.9.2.dist-info/RECORD +61 -0
- stabilize-0.9.2.dist-info/WHEEL +4 -0
- stabilize-0.9.2.dist-info/entry_points.txt +2 -0
- stabilize-0.9.2.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Stage context with ancestor output lookup.
|
|
3
|
+
|
|
4
|
+
The StageContext class provides access to stage inputs with automatic
|
|
5
|
+
fallback to ancestor stage outputs. This enables data flow between
|
|
6
|
+
stages in a pipeline.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from collections.abc import Iterator, MutableMapping
|
|
12
|
+
from typing import TYPE_CHECKING, Any
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from stabilize.models.stage import StageExecution
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class StageContext(MutableMapping[str, Any]):
|
|
19
|
+
"""
|
|
20
|
+
Stage context with ancestor output lookup.
|
|
21
|
+
|
|
22
|
+
When a key is not found in the stage's own context, it automatically
|
|
23
|
+
searches ancestor stages' outputs. This enables data to flow from
|
|
24
|
+
earlier stages to later ones.
|
|
25
|
+
|
|
26
|
+
Example:
|
|
27
|
+
# Stage A outputs: {"deploymentId": "abc123"}
|
|
28
|
+
# Stage B (downstream) context lookup:
|
|
29
|
+
|
|
30
|
+
context = StageContext(stage_b, stage_b.context)
|
|
31
|
+
deployment_id = context["deploymentId"] # Returns "abc123" from stage A
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
stage: StageExecution,
|
|
37
|
+
delegate: dict[str, Any],
|
|
38
|
+
) -> None:
|
|
39
|
+
"""
|
|
40
|
+
Initialize the context.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
stage: The stage this context belongs to
|
|
44
|
+
delegate: The underlying context dictionary
|
|
45
|
+
"""
|
|
46
|
+
self._stage = stage
|
|
47
|
+
self._delegate = delegate
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def stage(self) -> StageExecution:
|
|
51
|
+
"""Get the stage this context belongs to."""
|
|
52
|
+
return self._stage
|
|
53
|
+
|
|
54
|
+
def __getitem__(self, key: str) -> Any:
|
|
55
|
+
"""
|
|
56
|
+
Get a value by key.
|
|
57
|
+
|
|
58
|
+
First checks the stage's own context, then falls back to
|
|
59
|
+
ancestor outputs.
|
|
60
|
+
"""
|
|
61
|
+
# Check own context first
|
|
62
|
+
if key in self._delegate:
|
|
63
|
+
return self._delegate[key]
|
|
64
|
+
|
|
65
|
+
# Search ancestor outputs
|
|
66
|
+
for ancestor in self._stage.ancestors():
|
|
67
|
+
if key in ancestor.outputs:
|
|
68
|
+
return ancestor.outputs[key]
|
|
69
|
+
|
|
70
|
+
raise KeyError(key)
|
|
71
|
+
|
|
72
|
+
def __setitem__(self, key: str, value: Any) -> None:
|
|
73
|
+
"""Set a value in the context."""
|
|
74
|
+
self._delegate[key] = value
|
|
75
|
+
|
|
76
|
+
def __delitem__(self, key: str) -> None:
|
|
77
|
+
"""Delete a value from the context."""
|
|
78
|
+
del self._delegate[key]
|
|
79
|
+
|
|
80
|
+
def __iter__(self) -> Iterator[str]:
|
|
81
|
+
"""Iterate over keys."""
|
|
82
|
+
return iter(self._delegate)
|
|
83
|
+
|
|
84
|
+
def __len__(self) -> int:
|
|
85
|
+
"""Get number of keys in own context."""
|
|
86
|
+
return len(self._delegate)
|
|
87
|
+
|
|
88
|
+
def __contains__(self, key: object) -> bool:
|
|
89
|
+
"""Check if key exists in context or ancestors."""
|
|
90
|
+
if key in self._delegate:
|
|
91
|
+
return True
|
|
92
|
+
if isinstance(key, str):
|
|
93
|
+
for ancestor in self._stage.ancestors():
|
|
94
|
+
if key in ancestor.outputs:
|
|
95
|
+
return True
|
|
96
|
+
return False
|
|
97
|
+
|
|
98
|
+
def get(self, key: str, default: Any = None) -> Any:
|
|
99
|
+
"""Get with default value."""
|
|
100
|
+
try:
|
|
101
|
+
return self[key]
|
|
102
|
+
except KeyError:
|
|
103
|
+
return default
|
|
104
|
+
|
|
105
|
+
def get_current_only(self, key: str, default: Any = None) -> Any:
|
|
106
|
+
"""
|
|
107
|
+
Get a value only from the current stage's context.
|
|
108
|
+
|
|
109
|
+
Does not search ancestor outputs.
|
|
110
|
+
"""
|
|
111
|
+
return self._delegate.get(key, default)
|
|
112
|
+
|
|
113
|
+
def get_all(self, key: str) -> list[Any]:
|
|
114
|
+
"""
|
|
115
|
+
Get all values of a key from ancestors.
|
|
116
|
+
|
|
117
|
+
Returns values sorted by proximity (closest ancestor first).
|
|
118
|
+
"""
|
|
119
|
+
values = []
|
|
120
|
+
for ancestor in self._stage.ancestors():
|
|
121
|
+
if key in ancestor.outputs:
|
|
122
|
+
values.append(ancestor.outputs[key])
|
|
123
|
+
return values
|
|
124
|
+
|
|
125
|
+
def to_dict(self) -> dict[str, Any]:
|
|
126
|
+
"""Get the underlying dictionary."""
|
|
127
|
+
return dict(self._delegate)
|
|
128
|
+
|
|
129
|
+
def merge_with_ancestors(self) -> dict[str, Any]:
|
|
130
|
+
"""
|
|
131
|
+
Get merged context including all ancestor outputs.
|
|
132
|
+
|
|
133
|
+
Returns a new dictionary with all keys from this context
|
|
134
|
+
and all ancestor outputs. Own values take precedence.
|
|
135
|
+
"""
|
|
136
|
+
merged = {}
|
|
137
|
+
|
|
138
|
+
# Start with ancestor outputs (reversed so closest overwrites)
|
|
139
|
+
for ancestor in reversed(self._stage.ancestors()):
|
|
140
|
+
merged.update(ancestor.outputs)
|
|
141
|
+
|
|
142
|
+
# Own context takes precedence
|
|
143
|
+
merged.update(self._delegate)
|
|
144
|
+
|
|
145
|
+
return merged
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def create_merged_context(stage: StageExecution) -> StageContext:
|
|
149
|
+
"""
|
|
150
|
+
Create a StageContext with merged ancestor data.
|
|
151
|
+
|
|
152
|
+
The returned context will have the stage's own context merged
|
|
153
|
+
with ancestor outputs, with the stage's values taking precedence.
|
|
154
|
+
|
|
155
|
+
Args:
|
|
156
|
+
stage: The stage to create context for
|
|
157
|
+
|
|
158
|
+
Returns:
|
|
159
|
+
A StageContext with merged data
|
|
160
|
+
"""
|
|
161
|
+
merged = {}
|
|
162
|
+
|
|
163
|
+
# Collect ancestor outputs
|
|
164
|
+
for ancestor in reversed(stage.ancestors()):
|
|
165
|
+
merged.update(ancestor.outputs)
|
|
166
|
+
|
|
167
|
+
# Own context takes precedence
|
|
168
|
+
merged.update(stage.context)
|
|
169
|
+
|
|
170
|
+
return StageContext(stage, merged)
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""DAG operations for pipeline execution."""
|
|
2
|
+
|
|
3
|
+
from stabilize.dag.graph import StageGraphBuilder
|
|
4
|
+
from stabilize.dag.topological import (
|
|
5
|
+
CircularDependencyError,
|
|
6
|
+
topological_sort,
|
|
7
|
+
topological_sort_all_stages,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"topological_sort",
|
|
12
|
+
"topological_sort_all_stages",
|
|
13
|
+
"CircularDependencyError",
|
|
14
|
+
"StageGraphBuilder",
|
|
15
|
+
]
|
stabilize/dag/graph.py
ADDED
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Stage graph builder for constructing synthetic stages.
|
|
3
|
+
|
|
4
|
+
This module provides the StageGraphBuilder class for building graphs of
|
|
5
|
+
synthetic stages (before/after stages) that are dynamically injected
|
|
6
|
+
by StageDefinitionBuilders.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from collections.abc import Callable
|
|
12
|
+
from typing import TYPE_CHECKING
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from stabilize.models.stage import StageExecution, SyntheticStageOwner
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class StageGraphBuilder:
|
|
19
|
+
"""
|
|
20
|
+
Builder for constructing graphs of synthetic stages.
|
|
21
|
+
|
|
22
|
+
Used by StageDefinitionBuilder to add before/after stages to a parent stage.
|
|
23
|
+
Manages dependencies between synthetic stages and connects them properly.
|
|
24
|
+
|
|
25
|
+
Example:
|
|
26
|
+
# In a StageDefinitionBuilder.before_stages():
|
|
27
|
+
def before_stages(self, stage, graph):
|
|
28
|
+
# Add setup stage
|
|
29
|
+
setup = StageExecution.create_synthetic(
|
|
30
|
+
type="setup",
|
|
31
|
+
name="Setup",
|
|
32
|
+
parent=stage,
|
|
33
|
+
owner=SyntheticStageOwner.STAGE_BEFORE,
|
|
34
|
+
)
|
|
35
|
+
graph.add(setup)
|
|
36
|
+
|
|
37
|
+
# Add validation stage after setup
|
|
38
|
+
validation = StageExecution.create_synthetic(
|
|
39
|
+
type="validate",
|
|
40
|
+
name="Validate",
|
|
41
|
+
parent=stage,
|
|
42
|
+
owner=SyntheticStageOwner.STAGE_BEFORE,
|
|
43
|
+
)
|
|
44
|
+
graph.add(validation)
|
|
45
|
+
graph.connect(setup, validation)
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
def __init__(
|
|
49
|
+
self,
|
|
50
|
+
parent: StageExecution,
|
|
51
|
+
owner: SyntheticStageOwner,
|
|
52
|
+
requisite_stage_ref_ids: set[str] | None = None,
|
|
53
|
+
):
|
|
54
|
+
"""
|
|
55
|
+
Initialize a StageGraphBuilder.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
parent: The parent stage these synthetic stages belong to
|
|
59
|
+
owner: STAGE_BEFORE or STAGE_AFTER
|
|
60
|
+
requisite_stage_ref_ids: Initial requisites for first stage
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
self.parent = parent
|
|
64
|
+
self.owner = owner
|
|
65
|
+
self.requisite_stage_ref_ids = requisite_stage_ref_ids or set()
|
|
66
|
+
self._stages: list[StageExecution] = []
|
|
67
|
+
self._last_stage: StageExecution | None = None
|
|
68
|
+
|
|
69
|
+
@classmethod
|
|
70
|
+
def before_stages(cls, parent: StageExecution) -> StageGraphBuilder:
|
|
71
|
+
"""
|
|
72
|
+
Create a builder for before stages.
|
|
73
|
+
|
|
74
|
+
Before stages run before the parent's tasks.
|
|
75
|
+
"""
|
|
76
|
+
from stabilize.models.stage import SyntheticStageOwner
|
|
77
|
+
|
|
78
|
+
return cls(parent, SyntheticStageOwner.STAGE_BEFORE)
|
|
79
|
+
|
|
80
|
+
@classmethod
|
|
81
|
+
def after_stages(
|
|
82
|
+
cls,
|
|
83
|
+
parent: StageExecution,
|
|
84
|
+
requisite_stage_ref_ids: set[str] | None = None,
|
|
85
|
+
) -> StageGraphBuilder:
|
|
86
|
+
"""
|
|
87
|
+
Create a builder for after stages.
|
|
88
|
+
|
|
89
|
+
After stages run after the parent completes.
|
|
90
|
+
"""
|
|
91
|
+
from stabilize.models.stage import SyntheticStageOwner
|
|
92
|
+
|
|
93
|
+
return cls(
|
|
94
|
+
parent,
|
|
95
|
+
SyntheticStageOwner.STAGE_AFTER,
|
|
96
|
+
requisite_stage_ref_ids or set(),
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
def add(self, stage: StageExecution) -> StageGraphBuilder:
|
|
100
|
+
"""
|
|
101
|
+
Add a stage to the graph.
|
|
102
|
+
|
|
103
|
+
The stage will be configured as a synthetic stage of the parent.
|
|
104
|
+
If this is the first stage and requisite_stage_ref_ids is set,
|
|
105
|
+
those requisites will be applied.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
stage: The stage to add
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
self for method chaining
|
|
112
|
+
"""
|
|
113
|
+
# Configure as synthetic stage
|
|
114
|
+
stage.parent_stage_id = self.parent.id
|
|
115
|
+
stage.synthetic_stage_owner = self.owner
|
|
116
|
+
|
|
117
|
+
# Set execution reference if parent has one
|
|
118
|
+
if self.parent.has_execution():
|
|
119
|
+
stage._execution = self.parent._execution
|
|
120
|
+
|
|
121
|
+
# Apply initial requisites to first stage
|
|
122
|
+
if not self._stages and self.requisite_stage_ref_ids:
|
|
123
|
+
stage.requisite_stage_ref_ids = stage.requisite_stage_ref_ids.union(self.requisite_stage_ref_ids)
|
|
124
|
+
|
|
125
|
+
self._stages.append(stage)
|
|
126
|
+
self._last_stage = stage
|
|
127
|
+
|
|
128
|
+
return self
|
|
129
|
+
|
|
130
|
+
def append(self, stage: StageExecution) -> StageGraphBuilder:
|
|
131
|
+
"""
|
|
132
|
+
Append a stage after the last added stage.
|
|
133
|
+
|
|
134
|
+
Creates a sequential dependency from the last stage to this one.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
stage: The stage to append
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
self for method chaining
|
|
141
|
+
"""
|
|
142
|
+
if self._last_stage:
|
|
143
|
+
self.connect(self._last_stage, stage)
|
|
144
|
+
return self.add(stage)
|
|
145
|
+
|
|
146
|
+
def connect(self, previous: StageExecution, next_stage: StageExecution) -> None:
|
|
147
|
+
"""
|
|
148
|
+
Connect two stages with a dependency.
|
|
149
|
+
|
|
150
|
+
The next stage will depend on the previous stage.
|
|
151
|
+
|
|
152
|
+
Args:
|
|
153
|
+
previous: The stage that must complete first
|
|
154
|
+
next_stage: The stage that depends on previous
|
|
155
|
+
"""
|
|
156
|
+
requisites = set(next_stage.requisite_stage_ref_ids)
|
|
157
|
+
requisites.add(previous.ref_id)
|
|
158
|
+
next_stage.requisite_stage_ref_ids = requisites
|
|
159
|
+
|
|
160
|
+
def build(self) -> list[StageExecution]:
|
|
161
|
+
"""
|
|
162
|
+
Build and return the list of stages.
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
List of configured synthetic stages
|
|
166
|
+
"""
|
|
167
|
+
return list(self._stages)
|
|
168
|
+
|
|
169
|
+
@property
|
|
170
|
+
def stages(self) -> list[StageExecution]:
|
|
171
|
+
"""Get the current list of stages."""
|
|
172
|
+
return list(self._stages)
|
|
173
|
+
|
|
174
|
+
@property
|
|
175
|
+
def last_stage(self) -> StageExecution | None:
|
|
176
|
+
"""Get the last added stage."""
|
|
177
|
+
return self._last_stage
|
|
178
|
+
|
|
179
|
+
def is_empty(self) -> bool:
|
|
180
|
+
"""Check if no stages have been added."""
|
|
181
|
+
return len(self._stages) == 0
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def connect_stages_linearly(stages: list[StageExecution]) -> None:
|
|
185
|
+
"""
|
|
186
|
+
Connect a list of stages in linear sequence.
|
|
187
|
+
|
|
188
|
+
Each stage will depend on the previous one.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
stages: List of stages to connect
|
|
192
|
+
"""
|
|
193
|
+
for i in range(1, len(stages)):
|
|
194
|
+
previous = stages[i - 1]
|
|
195
|
+
current = stages[i]
|
|
196
|
+
requisites = set(current.requisite_stage_ref_ids)
|
|
197
|
+
requisites.add(previous.ref_id)
|
|
198
|
+
current.requisite_stage_ref_ids = requisites
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def add_stages_to_execution(
|
|
202
|
+
stages: list[StageExecution],
|
|
203
|
+
add_stage: Callable[[StageExecution], None],
|
|
204
|
+
) -> None:
|
|
205
|
+
"""
|
|
206
|
+
Add multiple stages to an execution.
|
|
207
|
+
|
|
208
|
+
Calls the provided add_stage function for each stage.
|
|
209
|
+
|
|
210
|
+
Args:
|
|
211
|
+
stages: List of stages to add
|
|
212
|
+
add_stage: Function to call to add each stage
|
|
213
|
+
"""
|
|
214
|
+
for stage in stages:
|
|
215
|
+
add_stage(stage)
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Topological sort for stage execution ordering.
|
|
3
|
+
|
|
4
|
+
This module implements a topological sort algorithm for stages based on their
|
|
5
|
+
requisite_stage_ref_ids (DAG edges).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from collections.abc import Callable
|
|
11
|
+
from typing import TYPE_CHECKING
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from stabilize.models.stage import StageExecution
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class CircularDependencyError(Exception):
|
|
18
|
+
"""
|
|
19
|
+
Raised when a circular dependency is detected in the stage graph.
|
|
20
|
+
|
|
21
|
+
This indicates an invalid pipeline configuration where stages depend
|
|
22
|
+
on each other in a cycle.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(self, message: str, stages: list[StageExecution] | None = None):
|
|
26
|
+
super().__init__(message)
|
|
27
|
+
self.stages = stages or []
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def topological_sort(
|
|
31
|
+
stages: list[StageExecution],
|
|
32
|
+
stage_filter: Callable[[StageExecution], bool] = lambda s: s.parent_stage_id is None,
|
|
33
|
+
) -> list[StageExecution]:
|
|
34
|
+
"""
|
|
35
|
+
Sort stages into topological order based on their dependencies.
|
|
36
|
+
|
|
37
|
+
The algorithm:
|
|
38
|
+
|
|
39
|
+
1. Starts with all unsorted stages (filtered by predicate)
|
|
40
|
+
2. Finds stages whose requisites are all in the "processed" set
|
|
41
|
+
3. Adds those stages to result and their ref_ids to processed set
|
|
42
|
+
4. Repeats until all stages are sorted
|
|
43
|
+
5. Raises CircularDependencyError if no progress can be made
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
stages: List of stages to sort
|
|
47
|
+
stage_filter: Predicate to filter stages (default: exclude synthetic stages)
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
List of stages sorted in execution order
|
|
51
|
+
|
|
52
|
+
Raises:
|
|
53
|
+
CircularDependencyError: If stages have circular dependencies
|
|
54
|
+
|
|
55
|
+
Example:
|
|
56
|
+
# Linear: A -> B -> C
|
|
57
|
+
stages = [stage_c, stage_a, stage_b]
|
|
58
|
+
sorted_stages = topological_sort(stages)
|
|
59
|
+
# Result: [stage_a, stage_b, stage_c]
|
|
60
|
+
|
|
61
|
+
# Parallel with join: A -> [B, C] -> D
|
|
62
|
+
# B and C have requisites [A], D has requisites [B, C]
|
|
63
|
+
sorted_stages = topological_sort(stages)
|
|
64
|
+
# Result: [A, B, C, D] or [A, C, B, D] (B and C can be in any order)
|
|
65
|
+
"""
|
|
66
|
+
# Filter stages by predicate
|
|
67
|
+
unsorted: list[StageExecution] = [s for s in stages if stage_filter(s)]
|
|
68
|
+
sorted_stages: list[StageExecution] = []
|
|
69
|
+
ref_ids: set[str] = set()
|
|
70
|
+
|
|
71
|
+
while unsorted:
|
|
72
|
+
# Find all stages whose requisites have been satisfied
|
|
73
|
+
# A stage is sortable if all its requisite_stage_ref_ids are in ref_ids
|
|
74
|
+
sortable = [stage for stage in unsorted if ref_ids.issuperset(stage.requisite_stage_ref_ids)]
|
|
75
|
+
|
|
76
|
+
if not sortable:
|
|
77
|
+
# No progress possible - circular dependency
|
|
78
|
+
relationships = ", ".join(f"{list(stage.requisite_stage_ref_ids)}->{stage.ref_id}" for stage in stages)
|
|
79
|
+
raise CircularDependencyError(
|
|
80
|
+
f"Invalid stage relationships found: {relationships}",
|
|
81
|
+
stages=unsorted,
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
# Add all sortable stages to result
|
|
85
|
+
for stage in sortable:
|
|
86
|
+
unsorted.remove(stage)
|
|
87
|
+
ref_ids.add(stage.ref_id)
|
|
88
|
+
sorted_stages.append(stage)
|
|
89
|
+
|
|
90
|
+
return sorted_stages
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def topological_sort_all_stages(stages: list[StageExecution]) -> list[StageExecution]:
|
|
94
|
+
"""
|
|
95
|
+
Sort all stages including synthetic stages.
|
|
96
|
+
|
|
97
|
+
Unlike topological_sort(), this does not filter out synthetic stages.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
stages: List of stages to sort
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
List of all stages sorted in execution order
|
|
104
|
+
"""
|
|
105
|
+
return topological_sort(stages, stage_filter=lambda s: True)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def validate_dag(stages: list[StageExecution]) -> bool:
|
|
109
|
+
"""
|
|
110
|
+
Validate that stages form a valid DAG.
|
|
111
|
+
|
|
112
|
+
Returns True if valid, raises CircularDependencyError if invalid.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
stages: List of stages to validate
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
True if DAG is valid
|
|
119
|
+
|
|
120
|
+
Raises:
|
|
121
|
+
CircularDependencyError: If stages have circular dependencies
|
|
122
|
+
"""
|
|
123
|
+
topological_sort(stages)
|
|
124
|
+
return True
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def find_initial_stages(stages: list[StageExecution]) -> list[StageExecution]:
|
|
128
|
+
"""
|
|
129
|
+
Find all initial stages (those with no dependencies and not synthetic).
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
stages: List of stages to search
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
List of initial stages
|
|
136
|
+
"""
|
|
137
|
+
return [stage for stage in stages if stage.is_initial() and not stage.is_synthetic()]
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def find_terminal_stages(stages: list[StageExecution]) -> list[StageExecution]:
|
|
141
|
+
"""
|
|
142
|
+
Find all terminal stages (those with no downstream stages and not synthetic).
|
|
143
|
+
|
|
144
|
+
Terminal stages are the last stages in the pipeline - no other stages
|
|
145
|
+
depend on them.
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
stages: List of stages to search
|
|
149
|
+
|
|
150
|
+
Returns:
|
|
151
|
+
List of terminal stages
|
|
152
|
+
"""
|
|
153
|
+
# Get all ref_ids that are dependencies
|
|
154
|
+
all_requisites: set[str] = set()
|
|
155
|
+
for stage in stages:
|
|
156
|
+
all_requisites.update(stage.requisite_stage_ref_ids)
|
|
157
|
+
|
|
158
|
+
# Terminal stages are those whose ref_id is not in any requisite set
|
|
159
|
+
return [stage for stage in stages if not stage.is_synthetic() and stage.ref_id not in all_requisites]
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def get_execution_layers(stages: list[StageExecution]) -> list[list[StageExecution]]:
|
|
163
|
+
"""
|
|
164
|
+
Group stages into execution layers.
|
|
165
|
+
|
|
166
|
+
Stages in the same layer can execute in parallel.
|
|
167
|
+
Each layer depends only on stages in previous layers.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
stages: List of stages to group
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
List of layers, where each layer is a list of stages that can run in parallel
|
|
174
|
+
|
|
175
|
+
Example:
|
|
176
|
+
# A -> [B, C] -> D
|
|
177
|
+
# Layer 0: [A]
|
|
178
|
+
# Layer 1: [B, C]
|
|
179
|
+
# Layer 2: [D]
|
|
180
|
+
"""
|
|
181
|
+
unsorted: list[StageExecution] = [s for s in stages if s.parent_stage_id is None]
|
|
182
|
+
layers: list[list[StageExecution]] = []
|
|
183
|
+
ref_ids: set[str] = set()
|
|
184
|
+
|
|
185
|
+
while unsorted:
|
|
186
|
+
# Find all stages whose requisites are satisfied
|
|
187
|
+
layer = [stage for stage in unsorted if ref_ids.issuperset(stage.requisite_stage_ref_ids)]
|
|
188
|
+
|
|
189
|
+
if not layer:
|
|
190
|
+
# This shouldn't happen if topological_sort passes
|
|
191
|
+
break
|
|
192
|
+
|
|
193
|
+
layers.append(layer)
|
|
194
|
+
|
|
195
|
+
for stage in layer:
|
|
196
|
+
unsorted.remove(stage)
|
|
197
|
+
ref_ids.add(stage.ref_id)
|
|
198
|
+
|
|
199
|
+
return layers
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Stabilize example pipelines for RAG context."""
|