synaflow 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synaflow/__init__.py +21 -0
- synaflow/executor.py +416 -0
- synaflow/iterator_utils.py +24 -0
- synaflow/pipeline.py +39 -0
- synaflow/step.py +18 -0
- synaflow/type_compatibility.py +191 -0
- synaflow/types.py +24 -0
- synaflow/validator.py +203 -0
- synaflow-0.1.0.dist-info/METADATA +93 -0
- synaflow-0.1.0.dist-info/RECORD +12 -0
- synaflow-0.1.0.dist-info/WHEEL +4 -0
- synaflow-0.1.0.dist-info/licenses/LICENSE +21 -0
synaflow/__init__.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pipeline Engine
|
|
3
|
+
|
|
4
|
+
A lightweight, robust engine for defining and executing typed Directed Acyclic Graphs (DAGs).
|
|
5
|
+
This module defines the public interface for clients.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from .executor import run
|
|
9
|
+
from .pipeline import PipelineDef, pipeline
|
|
10
|
+
from .step import Step, step
|
|
11
|
+
from .types import OnError, StepParams
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"PipelineDef",
|
|
15
|
+
"pipeline",
|
|
16
|
+
"Step",
|
|
17
|
+
"step",
|
|
18
|
+
"OnError",
|
|
19
|
+
"StepParams",
|
|
20
|
+
"run",
|
|
21
|
+
]
|
synaflow/executor.py
ADDED
|
@@ -0,0 +1,416 @@
|
|
|
1
|
+
import inspect
|
|
2
|
+
import itertools
|
|
3
|
+
from collections.abc import Callable, Generator, Iterator
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from .iterator_utils import InterleavedIterator
|
|
7
|
+
from .pipeline import PipelineDef
|
|
8
|
+
from .type_compatibility import is_iterable_type, is_scalar
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class PipelineStopException(Exception):
|
|
12
|
+
"""Raised to stop the pipeline execution early."""
|
|
13
|
+
|
|
14
|
+
pass
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class PipelineExecutor:
|
|
18
|
+
"""Executes a compiled Directed Acyclic Graph (DAG) for a pipeline."""
|
|
19
|
+
|
|
20
|
+
def __init__(self, dag: dict[str, dict], materialize_fn: Callable = list):
|
|
21
|
+
self.dag = dag
|
|
22
|
+
self.materialize_fn = materialize_fn
|
|
23
|
+
self.context: dict[str, Any] = {}
|
|
24
|
+
self.executed_steps: set[str] = set()
|
|
25
|
+
|
|
26
|
+
def execute(self, params: Any) -> None:
|
|
27
|
+
self._initialize_context_with_params(params)
|
|
28
|
+
|
|
29
|
+
try:
|
|
30
|
+
levels = self._compute_topological_levels()
|
|
31
|
+
for level in levels:
|
|
32
|
+
self._execute_level(level)
|
|
33
|
+
except PipelineStopException:
|
|
34
|
+
pass
|
|
35
|
+
|
|
36
|
+
def _initialize_context_with_params(self, params: Any) -> None:
|
|
37
|
+
for field, value in params._asdict().items():
|
|
38
|
+
node = self.dag.get(field, {})
|
|
39
|
+
needs_materialization = node.get("needs_materialize", False)
|
|
40
|
+
|
|
41
|
+
if needs_materialization and isinstance(value, Iterator):
|
|
42
|
+
value = self.materialize_fn(value)
|
|
43
|
+
elif isinstance(value, Iterator):
|
|
44
|
+
value = self._tee_iterator_for_consumers(field, value)
|
|
45
|
+
|
|
46
|
+
self.context[field] = value
|
|
47
|
+
|
|
48
|
+
def _tee_iterator_for_consumers(
|
|
49
|
+
self, producer_name: str, iterator_value: Iterator
|
|
50
|
+
) -> Any:
|
|
51
|
+
consumers = [
|
|
52
|
+
consumer_name
|
|
53
|
+
for consumer_name, node in self.dag.items()
|
|
54
|
+
if producer_name in node.get("deps", {})
|
|
55
|
+
]
|
|
56
|
+
if len(consumers) > 1:
|
|
57
|
+
tees = itertools.tee(iterator_value, len(consumers))
|
|
58
|
+
return {"__tees__": dict(zip(consumers, tees))}
|
|
59
|
+
return iterator_value
|
|
60
|
+
|
|
61
|
+
def _compute_topological_levels(self) -> list[list[str]]:
|
|
62
|
+
in_degree: dict[str, int] = {name: 0 for name in self.dag}
|
|
63
|
+
for name, node in self.dag.items():
|
|
64
|
+
for dep in node.get("deps", {}):
|
|
65
|
+
if dep in in_degree:
|
|
66
|
+
in_degree[name] += 1
|
|
67
|
+
|
|
68
|
+
levels: list[list[str]] = []
|
|
69
|
+
processed: set[str] = set()
|
|
70
|
+
|
|
71
|
+
while len(processed) < len(self.dag):
|
|
72
|
+
level = [
|
|
73
|
+
name
|
|
74
|
+
for name, degree in in_degree.items()
|
|
75
|
+
if degree == 0 and name not in processed
|
|
76
|
+
]
|
|
77
|
+
if not level:
|
|
78
|
+
break
|
|
79
|
+
levels.append(level)
|
|
80
|
+
processed.update(level)
|
|
81
|
+
|
|
82
|
+
for name in level:
|
|
83
|
+
for other_name, node in self.dag.items():
|
|
84
|
+
if name in node.get("deps", {}):
|
|
85
|
+
in_degree[other_name] -= 1
|
|
86
|
+
|
|
87
|
+
return levels
|
|
88
|
+
|
|
89
|
+
def _execute_level(self, level: list[str]) -> None:
|
|
90
|
+
dep_each_nodes, dep_all_nodes, independent_nodes = (
|
|
91
|
+
self._group_nodes_by_execution_mode(level)
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
all_dependencies = set(dep_each_nodes.keys()) | set(dep_all_nodes.keys())
|
|
95
|
+
|
|
96
|
+
for dep_name in all_dependencies:
|
|
97
|
+
each_names = dep_each_nodes.get(dep_name, [])
|
|
98
|
+
all_names = dep_all_nodes.get(dep_name, [])
|
|
99
|
+
self._process_grouped_dependencies(
|
|
100
|
+
dep_name, each_names, all_names, independent_nodes
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
for name in independent_nodes:
|
|
104
|
+
self._execute_independent_node(name)
|
|
105
|
+
|
|
106
|
+
def _group_nodes_by_execution_mode(
|
|
107
|
+
self, level: list[str]
|
|
108
|
+
) -> tuple[dict, dict, list]:
|
|
109
|
+
dep_each_nodes: dict[str, list[str]] = {}
|
|
110
|
+
dep_all_nodes: dict[str, list[str]] = {}
|
|
111
|
+
independent_nodes: list[str] = []
|
|
112
|
+
|
|
113
|
+
for name in level:
|
|
114
|
+
if name in self.executed_steps:
|
|
115
|
+
continue
|
|
116
|
+
|
|
117
|
+
node = self.dag.get(name)
|
|
118
|
+
if not node or node.get("fn") is None:
|
|
119
|
+
continue
|
|
120
|
+
|
|
121
|
+
deps = node.get("deps", {})
|
|
122
|
+
if not deps:
|
|
123
|
+
independent_nodes.append(name)
|
|
124
|
+
continue
|
|
125
|
+
|
|
126
|
+
first_dep_name = next(iter(deps))
|
|
127
|
+
if self._is_each_mode_execution(deps, first_dep_name):
|
|
128
|
+
dep_each_nodes.setdefault(first_dep_name, []).append(name)
|
|
129
|
+
else:
|
|
130
|
+
consumer_type = (
|
|
131
|
+
inspect.signature(node["fn"]).parameters[first_dep_name].annotation
|
|
132
|
+
)
|
|
133
|
+
if self._is_lazy_iterator_type(consumer_type):
|
|
134
|
+
dep_all_nodes.setdefault(first_dep_name, []).append(name)
|
|
135
|
+
else:
|
|
136
|
+
independent_nodes.append(name)
|
|
137
|
+
|
|
138
|
+
return dep_each_nodes, dep_all_nodes, independent_nodes
|
|
139
|
+
|
|
140
|
+
def _process_grouped_dependencies(
|
|
141
|
+
self,
|
|
142
|
+
dep_name: str,
|
|
143
|
+
each_names: list[str],
|
|
144
|
+
all_names: list[str],
|
|
145
|
+
independent_nodes: list[str],
|
|
146
|
+
) -> None:
|
|
147
|
+
eager_each_names, lazy_each_names = self._split_eager_and_lazy_each_nodes(
|
|
148
|
+
each_names
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
eager_callbacks = self._create_eager_callbacks(eager_each_names, dep_name)
|
|
152
|
+
|
|
153
|
+
independent_nodes.extend(lazy_each_names)
|
|
154
|
+
|
|
155
|
+
if eager_callbacks:
|
|
156
|
+
self._execute_eager_callbacks(
|
|
157
|
+
eager_callbacks, dep_name, all_names, independent_nodes
|
|
158
|
+
)
|
|
159
|
+
else:
|
|
160
|
+
independent_nodes.extend(all_names)
|
|
161
|
+
|
|
162
|
+
def _split_eager_and_lazy_each_nodes(
|
|
163
|
+
self, each_names: list[str]
|
|
164
|
+
) -> tuple[list[str], list[str]]:
|
|
165
|
+
eager = []
|
|
166
|
+
lazy = []
|
|
167
|
+
for name in each_names:
|
|
168
|
+
consumers = [
|
|
169
|
+
cn for cn, cnode in self.dag.items() if name in cnode.get("deps", {})
|
|
170
|
+
]
|
|
171
|
+
if name.startswith("_") or not consumers:
|
|
172
|
+
eager.append(name)
|
|
173
|
+
else:
|
|
174
|
+
lazy.append(name)
|
|
175
|
+
return eager, lazy
|
|
176
|
+
|
|
177
|
+
def _create_eager_callbacks(
|
|
178
|
+
self, eager_names: list[str], dep_name: str
|
|
179
|
+
) -> list[Callable]:
|
|
180
|
+
callbacks = []
|
|
181
|
+
for name in eager_names:
|
|
182
|
+
node = self.dag[name]
|
|
183
|
+
fn = node["fn"]
|
|
184
|
+
kwargs = self._resolve_node_arguments(name, node)
|
|
185
|
+
on_error = node.get("on_error")
|
|
186
|
+
|
|
187
|
+
def make_callback(fn, kwargs, dep_name, on_error):
|
|
188
|
+
def cb(item):
|
|
189
|
+
try:
|
|
190
|
+
item_kwargs = dict(kwargs)
|
|
191
|
+
item_kwargs[dep_name] = item
|
|
192
|
+
fn(**item_kwargs)
|
|
193
|
+
except Exception as e:
|
|
194
|
+
if on_error and on_error.value == "stop":
|
|
195
|
+
raise PipelineStopException() from e
|
|
196
|
+
|
|
197
|
+
return cb
|
|
198
|
+
|
|
199
|
+
callbacks.append(make_callback(fn, kwargs, dep_name, on_error))
|
|
200
|
+
self.executed_steps.add(name)
|
|
201
|
+
|
|
202
|
+
return callbacks
|
|
203
|
+
|
|
204
|
+
def _execute_eager_callbacks(
|
|
205
|
+
self,
|
|
206
|
+
callbacks: list[Callable],
|
|
207
|
+
dep_name: str,
|
|
208
|
+
all_names: list[str],
|
|
209
|
+
independent_nodes: list[str],
|
|
210
|
+
) -> None:
|
|
211
|
+
items_source = self.context.get(dep_name)
|
|
212
|
+
|
|
213
|
+
if all_names:
|
|
214
|
+
first_all = all_names[0]
|
|
215
|
+
self._execute_interleaved_node(first_all, dep_name, callbacks)
|
|
216
|
+
independent_nodes.extend(all_names[1:])
|
|
217
|
+
else:
|
|
218
|
+
self._execute_lockstep_loop(items_source, callbacks, dep_name)
|
|
219
|
+
|
|
220
|
+
def _execute_interleaved_node(
|
|
221
|
+
self, node_name: str, dep_name: str, callbacks: list[Callable]
|
|
222
|
+
) -> None:
|
|
223
|
+
node = self.dag[node_name]
|
|
224
|
+
fn = node["fn"]
|
|
225
|
+
kwargs = self._resolve_node_arguments(node_name, node)
|
|
226
|
+
|
|
227
|
+
dep_val = kwargs.get(dep_name)
|
|
228
|
+
if isinstance(dep_val, dict) and "__tees__" in dep_val:
|
|
229
|
+
dep_val = dep_val["__tees__"][node_name]
|
|
230
|
+
|
|
231
|
+
kwargs[dep_name] = InterleavedIterator(dep_val, callbacks)
|
|
232
|
+
|
|
233
|
+
try:
|
|
234
|
+
output = fn(**kwargs)
|
|
235
|
+
if node_name and not node_name.startswith("_"):
|
|
236
|
+
self.context[node_name] = output
|
|
237
|
+
except Exception:
|
|
238
|
+
if node.get("on_error") and node["on_error"].value == "stop":
|
|
239
|
+
raise PipelineStopException()
|
|
240
|
+
|
|
241
|
+
self.executed_steps.add(node_name)
|
|
242
|
+
|
|
243
|
+
def _execute_lockstep_loop(
|
|
244
|
+
self, items_source: Any, callbacks: list[Callable], dep_name: str
|
|
245
|
+
) -> None:
|
|
246
|
+
dep_val = items_source
|
|
247
|
+
if isinstance(dep_val, dict) and "__tees__" in dep_val:
|
|
248
|
+
consumers = [
|
|
249
|
+
cn
|
|
250
|
+
for cn, cnode in self.dag.items()
|
|
251
|
+
if dep_name in cnode.get("deps", {})
|
|
252
|
+
]
|
|
253
|
+
first_tee_name = next(c for c in consumers if c in dep_val["__tees__"])
|
|
254
|
+
dep_val = dep_val["__tees__"][first_tee_name]
|
|
255
|
+
|
|
256
|
+
for item in dep_val:
|
|
257
|
+
for cb in callbacks:
|
|
258
|
+
cb(item)
|
|
259
|
+
|
|
260
|
+
def _execute_independent_node(self, name: str) -> None:
|
|
261
|
+
if name in self.executed_steps:
|
|
262
|
+
return
|
|
263
|
+
|
|
264
|
+
node = self.dag.get(name)
|
|
265
|
+
if not node or node.get("fn") is None:
|
|
266
|
+
return
|
|
267
|
+
|
|
268
|
+
fn = node["fn"]
|
|
269
|
+
deps = node.get("deps", {})
|
|
270
|
+
kwargs = self._resolve_node_arguments(name, node)
|
|
271
|
+
|
|
272
|
+
if deps and self._is_each_mode_execution(deps, next(iter(deps))):
|
|
273
|
+
self._execute_independent_each_node(name, fn, deps, kwargs, node)
|
|
274
|
+
else:
|
|
275
|
+
self._execute_standard_node(name, fn, kwargs, node)
|
|
276
|
+
|
|
277
|
+
self.executed_steps.add(name)
|
|
278
|
+
|
|
279
|
+
def _execute_independent_each_node(
|
|
280
|
+
self, name: str, fn: Callable, deps: dict, kwargs: dict, node: dict
|
|
281
|
+
) -> None:
|
|
282
|
+
first_dep = next(iter(deps))
|
|
283
|
+
items = self.context.get(first_dep)
|
|
284
|
+
|
|
285
|
+
if isinstance(items, dict) and "__tees__" in items:
|
|
286
|
+
items = items["__tees__"][name]
|
|
287
|
+
|
|
288
|
+
consumers = [
|
|
289
|
+
cn for cn, cnode in self.dag.items() if name in cnode.get("deps", {})
|
|
290
|
+
]
|
|
291
|
+
is_sink = name.startswith("_") or len(consumers) == 0
|
|
292
|
+
|
|
293
|
+
if is_sink:
|
|
294
|
+
for item in items:
|
|
295
|
+
try:
|
|
296
|
+
item_kwargs = dict(kwargs)
|
|
297
|
+
item_kwargs[first_dep] = item
|
|
298
|
+
fn(**item_kwargs)
|
|
299
|
+
except Exception:
|
|
300
|
+
if node.get("on_error") and node["on_error"].value == "stop":
|
|
301
|
+
raise PipelineStopException()
|
|
302
|
+
else:
|
|
303
|
+
|
|
304
|
+
def each_generator(items, kwargs, first_dep, fn, on_error):
|
|
305
|
+
for item in items:
|
|
306
|
+
try:
|
|
307
|
+
item_kwargs = dict(kwargs)
|
|
308
|
+
item_kwargs[first_dep] = item
|
|
309
|
+
yield fn(**item_kwargs)
|
|
310
|
+
except Exception as e:
|
|
311
|
+
if on_error and on_error.value == "stop":
|
|
312
|
+
raise PipelineStopException() from e
|
|
313
|
+
|
|
314
|
+
output = each_generator(items, kwargs, first_dep, fn, node.get("on_error"))
|
|
315
|
+
|
|
316
|
+
if len(consumers) > 1:
|
|
317
|
+
tees = itertools.tee(output, len(consumers))
|
|
318
|
+
output = {"__tees__": dict(zip(consumers, tees))}
|
|
319
|
+
|
|
320
|
+
self.context[name] = output
|
|
321
|
+
|
|
322
|
+
def _execute_standard_node(
|
|
323
|
+
self, name: str, fn: Callable, kwargs: dict, node: dict
|
|
324
|
+
) -> None:
|
|
325
|
+
try:
|
|
326
|
+
output = fn(**kwargs)
|
|
327
|
+
|
|
328
|
+
if name and not name.startswith("_"):
|
|
329
|
+
if isinstance(output, Iterator) and node.get("needs_materialize"):
|
|
330
|
+
output = self.materialize_fn(output)
|
|
331
|
+
elif isinstance(output, Iterator):
|
|
332
|
+
output = self._tee_iterator_for_consumers(name, output)
|
|
333
|
+
|
|
334
|
+
self.context[name] = output
|
|
335
|
+
|
|
336
|
+
except Exception:
|
|
337
|
+
if node.get("on_error") and node["on_error"].value == "stop":
|
|
338
|
+
raise PipelineStopException()
|
|
339
|
+
|
|
340
|
+
def _resolve_node_arguments(self, consumer_name: str, node: dict) -> dict[str, Any]:
|
|
341
|
+
sig = inspect.signature(node["fn"])
|
|
342
|
+
deps = node.get("deps", {})
|
|
343
|
+
kwargs: dict[str, Any] = {}
|
|
344
|
+
|
|
345
|
+
for param_name in sig.parameters:
|
|
346
|
+
if param_name in self.context:
|
|
347
|
+
value = self.context.get(param_name)
|
|
348
|
+
|
|
349
|
+
if isinstance(value, dict) and "__tees__" in value:
|
|
350
|
+
value = value["__tees__"][consumer_name]
|
|
351
|
+
|
|
352
|
+
if param_name in deps:
|
|
353
|
+
consumer_type = deps[param_name]
|
|
354
|
+
value = self._adapt_argument_to_consumer_type(value, consumer_type)
|
|
355
|
+
|
|
356
|
+
kwargs[param_name] = value
|
|
357
|
+
|
|
358
|
+
return kwargs
|
|
359
|
+
|
|
360
|
+
def _adapt_argument_to_consumer_type(self, value: Any, consumer_type: Any) -> Any:
|
|
361
|
+
is_lazy_iterator = self._is_lazy_iterator_type(consumer_type)
|
|
362
|
+
needs_materialization = self._needs_materialize_for(consumer_type)
|
|
363
|
+
|
|
364
|
+
if is_lazy_iterator or needs_materialization:
|
|
365
|
+
if not isinstance(value, (list, set, tuple, Iterator, Generator)):
|
|
366
|
+
value = [value]
|
|
367
|
+
|
|
368
|
+
if isinstance(value, Iterator) and needs_materialization:
|
|
369
|
+
value = self.materialize_fn(value)
|
|
370
|
+
|
|
371
|
+
origin = getattr(consumer_type, "__origin__", consumer_type)
|
|
372
|
+
if origin is set:
|
|
373
|
+
value = set(value)
|
|
374
|
+
elif origin is tuple:
|
|
375
|
+
value = tuple(value)
|
|
376
|
+
elif origin in (Iterator, Generator):
|
|
377
|
+
value = iter(value)
|
|
378
|
+
|
|
379
|
+
return value
|
|
380
|
+
|
|
381
|
+
def _is_each_mode_execution(self, deps: dict, first_dep_name: str) -> bool:
|
|
382
|
+
if not deps:
|
|
383
|
+
return False
|
|
384
|
+
|
|
385
|
+
first_type = deps[first_dep_name]
|
|
386
|
+
producer = self.dag.get(first_dep_name)
|
|
387
|
+
if not producer or producer.get("output") is None:
|
|
388
|
+
return False
|
|
389
|
+
|
|
390
|
+
producer_output = producer.get("output")
|
|
391
|
+
return is_iterable_type(producer_output) and is_scalar(first_type)
|
|
392
|
+
|
|
393
|
+
def _is_lazy_iterator_type(self, tp: Any) -> bool:
|
|
394
|
+
if tp is Iterator:
|
|
395
|
+
return True
|
|
396
|
+
origin = getattr(tp, "__origin__", tp)
|
|
397
|
+
return origin in (Iterator, Generator)
|
|
398
|
+
|
|
399
|
+
def _needs_materialize_for(self, tp: Any) -> bool:
|
|
400
|
+
if tp is None:
|
|
401
|
+
return False
|
|
402
|
+
if tp in (list, set, tuple):
|
|
403
|
+
return True
|
|
404
|
+
origin = getattr(tp, "__origin__", None)
|
|
405
|
+
return origin in (list, set, tuple)
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
def run(
|
|
409
|
+
pipeline: PipelineDef,
|
|
410
|
+
params: Any,
|
|
411
|
+
*,
|
|
412
|
+
materialize: Callable = list,
|
|
413
|
+
) -> None:
|
|
414
|
+
"""Executes a pipeline definition."""
|
|
415
|
+
executor = PipelineExecutor(pipeline._dag, materialize)
|
|
416
|
+
executor.execute(params)
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from typing import Any, Callable, Iterable
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class InterleavedIterator:
|
|
5
|
+
"""
|
|
6
|
+
An iterator that yields items from a source, while simultaneously
|
|
7
|
+
executing a set of callbacks on each item.
|
|
8
|
+
|
|
9
|
+
This is useful for lockstep processing where eager consumers must execute
|
|
10
|
+
on items before lazy downstream consumers iterate over them.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
def __init__(self, source: Iterable[Any], callbacks: list[Callable[[Any], None]]):
|
|
14
|
+
self.source = iter(source)
|
|
15
|
+
self.callbacks = callbacks
|
|
16
|
+
|
|
17
|
+
def __next__(self):
|
|
18
|
+
item = next(self.source)
|
|
19
|
+
for callback in self.callbacks:
|
|
20
|
+
callback(item)
|
|
21
|
+
return item
|
|
22
|
+
|
|
23
|
+
def __iter__(self):
|
|
24
|
+
return self
|
synaflow/pipeline.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
from .step import Step
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class PipelineDef:
|
|
9
|
+
"""
|
|
10
|
+
Defines a Pipeline workflow.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
name: str
|
|
14
|
+
params: Any
|
|
15
|
+
steps: list[Step]
|
|
16
|
+
description: str = ""
|
|
17
|
+
|
|
18
|
+
def __post_init__(self) -> None:
|
|
19
|
+
from .validator import validate_and_build_dag
|
|
20
|
+
|
|
21
|
+
self._dag = validate_and_build_dag(self.name, self.steps, self.params)
|
|
22
|
+
|
|
23
|
+
def to_dict(self) -> dict:
|
|
24
|
+
"""Exports the compiled DAG structure to a JSON-serializable dictionary."""
|
|
25
|
+
from .type_compatibility import get_type_name
|
|
26
|
+
|
|
27
|
+
serialized = {}
|
|
28
|
+
for name, node in self._dag.items():
|
|
29
|
+
serialized[name] = {
|
|
30
|
+
"deps": {k: get_type_name(v) for k, v in node["deps"].items()},
|
|
31
|
+
"output": get_type_name(node["output"]),
|
|
32
|
+
"fn": node["fn"].__name__ if node["fn"] else None,
|
|
33
|
+
"on_error": node["on_error"].value if node["on_error"] else None,
|
|
34
|
+
"needs_materialize": node["needs_materialize"],
|
|
35
|
+
}
|
|
36
|
+
return serialized
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
pipeline = PipelineDef
|
synaflow/step.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from typing import Any, Callable
|
|
5
|
+
|
|
6
|
+
from .types import OnError, StepParams
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class Step:
|
|
11
|
+
name: str
|
|
12
|
+
fn: Callable
|
|
13
|
+
on_error: OnError = OnError.STOP
|
|
14
|
+
params: StepParams | None = None
|
|
15
|
+
description: str = ""
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
step = Step
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
import types
|
|
2
|
+
from collections.abc import Generator, Iterable, Iterator
|
|
3
|
+
from typing import Any, get_args, get_origin
|
|
4
|
+
|
|
5
|
+
SCALAR_TYPES = {int, float, str, bool, bytes, type(None)}
|
|
6
|
+
COLLECTION_ORIGINS = {list, set, tuple, Generator, Iterator, Iterable}
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ListType:
|
|
10
|
+
"""Wrapper to represent a runtime-resolved list of a specific type."""
|
|
11
|
+
|
|
12
|
+
def __init__(self, inner_type: Any):
|
|
13
|
+
self.inner_type = inner_type
|
|
14
|
+
|
|
15
|
+
def __repr__(self):
|
|
16
|
+
return f"ListType({self.inner_type})"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def is_type_compatible(producer_type: Any, consumer_type: Any) -> bool:
|
|
20
|
+
"""Checks if a producer output type satisfies a consumer input type."""
|
|
21
|
+
if producer_type is None or consumer_type is None:
|
|
22
|
+
return True
|
|
23
|
+
|
|
24
|
+
producer_origin = get_origin(producer_type)
|
|
25
|
+
consumer_origin = get_origin(consumer_type)
|
|
26
|
+
|
|
27
|
+
if _is_union(producer_type, producer_origin) and _is_union(
|
|
28
|
+
consumer_type, consumer_origin
|
|
29
|
+
):
|
|
30
|
+
return _all_producer_types_match_any_consumer_type(producer_type, consumer_type)
|
|
31
|
+
|
|
32
|
+
if _is_union(producer_type, producer_origin):
|
|
33
|
+
return _all_producer_types_match_consumer(producer_type, consumer_type)
|
|
34
|
+
|
|
35
|
+
is_producer_iterable = _is_iterable(producer_type, producer_origin)
|
|
36
|
+
is_consumer_iterable = _is_iterable(consumer_type, consumer_origin)
|
|
37
|
+
|
|
38
|
+
if is_producer_iterable:
|
|
39
|
+
return _check_iterable_producer_compatibility(
|
|
40
|
+
producer_type, consumer_type, is_consumer_iterable
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
if _is_union(consumer_type, consumer_origin):
|
|
44
|
+
return _producer_matches_any_consumer_type(producer_type, consumer_type)
|
|
45
|
+
|
|
46
|
+
if is_consumer_iterable:
|
|
47
|
+
return _check_scalar_producer_to_iterable_consumer(producer_type, consumer_type)
|
|
48
|
+
|
|
49
|
+
if is_scalar(consumer_type):
|
|
50
|
+
return _check_scalar_compatibility(producer_type, consumer_type)
|
|
51
|
+
|
|
52
|
+
return True
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _all_producer_types_match_any_consumer_type(
|
|
56
|
+
producer_type: Any, consumer_type: Any
|
|
57
|
+
) -> bool:
|
|
58
|
+
return all(
|
|
59
|
+
any(is_type_compatible(p, c) for c in get_args(consumer_type))
|
|
60
|
+
for p in get_args(producer_type)
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _all_producer_types_match_consumer(producer_type: Any, consumer_type: Any) -> bool:
|
|
65
|
+
return all(is_type_compatible(p, consumer_type) for p in get_args(producer_type))
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _producer_matches_any_consumer_type(producer_type: Any, consumer_type: Any) -> bool:
|
|
69
|
+
return any(is_type_compatible(producer_type, c) for c in get_args(consumer_type))
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _check_iterable_producer_compatibility(
|
|
73
|
+
producer_type: Any, consumer_type: Any, is_consumer_iterable: bool
|
|
74
|
+
) -> bool:
|
|
75
|
+
producer_inner = get_inner_type(producer_type)
|
|
76
|
+
if producer_inner is None:
|
|
77
|
+
return False
|
|
78
|
+
|
|
79
|
+
consumer_origin = get_origin(consumer_type)
|
|
80
|
+
|
|
81
|
+
if _is_union(consumer_type, consumer_origin):
|
|
82
|
+
return is_type_compatible(producer_inner, consumer_type)
|
|
83
|
+
|
|
84
|
+
if is_consumer_iterable:
|
|
85
|
+
consumer_inner = get_inner_type(consumer_type)
|
|
86
|
+
if consumer_inner is not None:
|
|
87
|
+
return is_type_compatible(producer_inner, consumer_inner)
|
|
88
|
+
return True
|
|
89
|
+
|
|
90
|
+
if is_scalar(consumer_type):
|
|
91
|
+
return is_type_compatible(producer_inner, consumer_type)
|
|
92
|
+
|
|
93
|
+
return False
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _check_scalar_producer_to_iterable_consumer(
|
|
97
|
+
producer_type: Any, consumer_type: Any
|
|
98
|
+
) -> bool:
|
|
99
|
+
producer_inner = get_inner_type(producer_type)
|
|
100
|
+
consumer_inner = get_inner_type(consumer_type)
|
|
101
|
+
|
|
102
|
+
if consumer_inner is None:
|
|
103
|
+
return True
|
|
104
|
+
|
|
105
|
+
if producer_inner is None:
|
|
106
|
+
return is_type_compatible(producer_type, consumer_inner)
|
|
107
|
+
|
|
108
|
+
return is_type_compatible(producer_inner, consumer_inner)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _check_scalar_compatibility(producer_type: Any, consumer_type: Any) -> bool:
|
|
112
|
+
producer_inner = get_inner_type(producer_type)
|
|
113
|
+
if producer_inner is not None:
|
|
114
|
+
return is_type_compatible(producer_inner, consumer_type)
|
|
115
|
+
return is_scalar(producer_type) and producer_type == consumer_type
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _is_union(tp: Any, origin: Any) -> bool:
|
|
119
|
+
return origin is types.UnionType or origin is __import__("typing").Union
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _is_iterable(tp: Any, origin: Any) -> bool:
|
|
123
|
+
if isinstance(tp, ListType):
|
|
124
|
+
return True
|
|
125
|
+
if origin is not None:
|
|
126
|
+
return origin in COLLECTION_ORIGINS
|
|
127
|
+
return tp in COLLECTION_ORIGINS
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def is_iterable_type(tp: Any) -> bool:
|
|
131
|
+
if tp is None:
|
|
132
|
+
return False
|
|
133
|
+
if isinstance(tp, ListType):
|
|
134
|
+
return True
|
|
135
|
+
return _is_iterable(tp, get_origin(tp))
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def is_scalar(tp: Any) -> bool:
|
|
139
|
+
if tp is None:
|
|
140
|
+
return False
|
|
141
|
+
if tp in SCALAR_TYPES:
|
|
142
|
+
return True
|
|
143
|
+
origin = get_origin(tp)
|
|
144
|
+
if origin is not None:
|
|
145
|
+
import types
|
|
146
|
+
from typing import Union
|
|
147
|
+
|
|
148
|
+
if origin is types.UnionType or origin is Union:
|
|
149
|
+
return all(is_scalar(a) for a in get_args(tp))
|
|
150
|
+
return False
|
|
151
|
+
return tp not in COLLECTION_ORIGINS
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def get_inner_type(tp: Any) -> Any:
|
|
155
|
+
if isinstance(tp, ListType):
|
|
156
|
+
return tp.inner_type
|
|
157
|
+
args = get_args(tp)
|
|
158
|
+
if args:
|
|
159
|
+
return args[0]
|
|
160
|
+
return None
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def get_type_name(tp: Any) -> str:
|
|
164
|
+
if tp is None:
|
|
165
|
+
return "None"
|
|
166
|
+
origin = get_origin(tp)
|
|
167
|
+
if origin is not None:
|
|
168
|
+
arg_names = ", ".join(get_type_name(a) for a in get_args(tp))
|
|
169
|
+
origin_name = getattr(origin, "__name__", str(origin))
|
|
170
|
+
return f"{origin_name}[{arg_names}]"
|
|
171
|
+
return getattr(tp, "__name__", str(tp))
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def is_materialized_consumer(tp: Any) -> bool:
|
|
175
|
+
"""Checks if a consumer type requires an eagerly materialized collection."""
|
|
176
|
+
if tp is None:
|
|
177
|
+
return False
|
|
178
|
+
if tp in (list, set, tuple):
|
|
179
|
+
return True
|
|
180
|
+
|
|
181
|
+
origin = get_origin(tp)
|
|
182
|
+
if origin in (list, set, tuple):
|
|
183
|
+
return True
|
|
184
|
+
if origin in (Iterator, Generator, Iterable):
|
|
185
|
+
return False
|
|
186
|
+
if is_scalar(tp):
|
|
187
|
+
return False
|
|
188
|
+
if _is_union(tp, origin):
|
|
189
|
+
return any(is_materialized_consumer(a) for a in get_args(tp))
|
|
190
|
+
|
|
191
|
+
return False
|
synaflow/types.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from enum import Enum
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class OnError(Enum):
|
|
9
|
+
CONTINUE = "continue"
|
|
10
|
+
STOP = "stop"
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass(frozen=True)
|
|
14
|
+
class StepParams:
|
|
15
|
+
values: dict[str, Any] = field(default_factory=dict)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class StepResult:
|
|
20
|
+
step_name: str
|
|
21
|
+
status: str # "ok" | "error" | "skipped"
|
|
22
|
+
output: Any = None
|
|
23
|
+
error: Exception | None = None
|
|
24
|
+
metrics: dict[str, int] = field(default_factory=dict)
|
synaflow/validator.py
ADDED
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
import inspect
|
|
2
|
+
from typing import Any, NamedTuple
|
|
3
|
+
|
|
4
|
+
from .step import Step
|
|
5
|
+
from .type_compatibility import (
|
|
6
|
+
ListType,
|
|
7
|
+
get_type_name,
|
|
8
|
+
is_iterable_type,
|
|
9
|
+
is_materialized_consumer,
|
|
10
|
+
is_scalar,
|
|
11
|
+
is_type_compatible,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def validate_and_build_dag(
|
|
16
|
+
name: str, steps: list[Step], params: type[NamedTuple]
|
|
17
|
+
) -> dict[str, dict]:
|
|
18
|
+
"""
|
|
19
|
+
Validates a list of steps and pipeline parameters and compiles them into a
|
|
20
|
+
Directed Acyclic Graph (DAG) represented as a dictionary.
|
|
21
|
+
"""
|
|
22
|
+
dag: dict[str, dict] = {}
|
|
23
|
+
|
|
24
|
+
_validate_params_is_namedtuple(params, name)
|
|
25
|
+
|
|
26
|
+
produced: dict[str, dict] = _initialize_parameters(params)
|
|
27
|
+
|
|
28
|
+
for step in steps:
|
|
29
|
+
_validate_step_is_callable(step, name)
|
|
30
|
+
_validate_unique_step_name(step.name, dag, pipeline_name=name)
|
|
31
|
+
|
|
32
|
+
node = _validate_and_compile_step(step, produced, pipeline_name=name)
|
|
33
|
+
|
|
34
|
+
dag[step.name] = node
|
|
35
|
+
produced[step.name] = node
|
|
36
|
+
|
|
37
|
+
_add_parameter_nodes_to_dag(dag, produced)
|
|
38
|
+
_compute_needs_materialize(dag)
|
|
39
|
+
|
|
40
|
+
return dag
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _validate_params_is_namedtuple(params: Any, pipeline_name: str) -> None:
|
|
44
|
+
if not hasattr(params, "_fields"):
|
|
45
|
+
raise ValueError(
|
|
46
|
+
f"Pipeline '{pipeline_name}': 'params' must be a NamedTuple, got {type(params).__name__}"
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _validate_step_is_callable(step: Step, pipeline_name: str) -> None:
|
|
51
|
+
if not callable(step.fn):
|
|
52
|
+
raise ValueError(
|
|
53
|
+
f"Pipeline '{pipeline_name}': step '{step.name}' must have a callable 'fn', got {type(step.fn).__name__}"
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _initialize_parameters(params: type[NamedTuple]) -> dict[str, dict]:
|
|
58
|
+
produced: dict[str, dict] = {}
|
|
59
|
+
for field in params._fields:
|
|
60
|
+
tp = getattr(params, "__annotations__", {}).get(field)
|
|
61
|
+
produced[field] = {"output": tp}
|
|
62
|
+
return produced
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _validate_unique_step_name(step_name: str, dag: dict, pipeline_name: str) -> None:
|
|
66
|
+
if step_name in dag:
|
|
67
|
+
raise ValueError(
|
|
68
|
+
f"Pipeline '{pipeline_name}': duplicate step name '{step_name}'"
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _validate_and_compile_step(
|
|
73
|
+
step: Step, produced: dict[str, dict], pipeline_name: str
|
|
74
|
+
) -> dict[str, Any]:
|
|
75
|
+
sig = inspect.signature(step.fn)
|
|
76
|
+
hints = _get_safe_type_hints(step.fn)
|
|
77
|
+
|
|
78
|
+
deps = _validate_and_resolve_dependencies(step, sig, hints, produced, pipeline_name)
|
|
79
|
+
output_type = _resolve_step_output_type(sig, hints, deps, produced)
|
|
80
|
+
|
|
81
|
+
return {
|
|
82
|
+
"deps": deps,
|
|
83
|
+
"output": output_type,
|
|
84
|
+
"fn": step.fn,
|
|
85
|
+
"on_error": step.on_error,
|
|
86
|
+
"needs_materialize": _any_dependency_needs_materialization(deps),
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _get_safe_type_hints(fn: Any) -> dict[str, Any]:
|
|
91
|
+
try:
|
|
92
|
+
return getattr(fn, "__annotations__", {})
|
|
93
|
+
except Exception:
|
|
94
|
+
return {}
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _validate_and_resolve_dependencies(
|
|
98
|
+
step: Step,
|
|
99
|
+
sig: inspect.Signature,
|
|
100
|
+
hints: dict[str, Any],
|
|
101
|
+
produced: dict[str, dict],
|
|
102
|
+
pipeline_name: str,
|
|
103
|
+
) -> dict[str, Any]:
|
|
104
|
+
deps: dict[str, Any] = {}
|
|
105
|
+
|
|
106
|
+
for param_name, param in sig.parameters.items():
|
|
107
|
+
consumer_type = hints.get(param_name, param.annotation)
|
|
108
|
+
if consumer_type is inspect.Parameter.empty:
|
|
109
|
+
consumer_type = None
|
|
110
|
+
|
|
111
|
+
if param_name not in produced:
|
|
112
|
+
raise ValueError(
|
|
113
|
+
f"Pipeline '{pipeline_name}': step '{step.name}' depends on '{param_name}' "
|
|
114
|
+
"but no prior step or param produces it"
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
producer_type = produced[param_name]["output"]
|
|
118
|
+
|
|
119
|
+
# Explicit validation for NoneType to strict types
|
|
120
|
+
if (
|
|
121
|
+
producer_type is type(None)
|
|
122
|
+
and consumer_type is not None
|
|
123
|
+
and not _is_optional_or_any(consumer_type)
|
|
124
|
+
):
|
|
125
|
+
raise ValueError(
|
|
126
|
+
f"Pipeline '{pipeline_name}': step '{step.name}' param '{param_name}': "
|
|
127
|
+
f"expects {get_type_name(consumer_type)} "
|
|
128
|
+
f"but '{param_name}' produces explicit NoneType"
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
if not is_type_compatible(producer_type, consumer_type):
|
|
132
|
+
raise ValueError(
|
|
133
|
+
f"Pipeline '{pipeline_name}': step '{step.name}' param '{param_name}': "
|
|
134
|
+
f"expects {get_type_name(consumer_type)} "
|
|
135
|
+
f"but '{param_name}' produces {get_type_name(producer_type)}"
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
deps[param_name] = consumer_type
|
|
139
|
+
|
|
140
|
+
return deps
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _is_optional_or_any(tp: Any) -> bool:
|
|
144
|
+
if tp is Any:
|
|
145
|
+
return True
|
|
146
|
+
# If it's a union containing NoneType
|
|
147
|
+
origin = getattr(tp, "__origin__", None)
|
|
148
|
+
import types
|
|
149
|
+
from typing import Union
|
|
150
|
+
|
|
151
|
+
if origin is types.UnionType or origin is Union:
|
|
152
|
+
return type(None) in getattr(tp, "__args__", [])
|
|
153
|
+
return False
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def _resolve_step_output_type(
|
|
157
|
+
sig: inspect.Signature,
|
|
158
|
+
hints: dict[str, Any],
|
|
159
|
+
deps: dict[str, Any],
|
|
160
|
+
produced: dict[str, dict],
|
|
161
|
+
) -> Any:
|
|
162
|
+
return_type = hints.get("return", sig.return_annotation)
|
|
163
|
+
if return_type is inspect.Parameter.empty:
|
|
164
|
+
return_type = None
|
|
165
|
+
|
|
166
|
+
if is_scalar(return_type) and deps:
|
|
167
|
+
first_dep_name = next(iter(deps))
|
|
168
|
+
first_dep_output = produced[first_dep_name]["output"]
|
|
169
|
+
if first_dep_output is not None and is_iterable_type(first_dep_output):
|
|
170
|
+
return ListType(return_type)
|
|
171
|
+
|
|
172
|
+
return return_type
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def _any_dependency_needs_materialization(deps: dict[str, Any]) -> bool:
|
|
176
|
+
return any(is_materialized_consumer(t) for t in deps.values())
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def _add_parameter_nodes_to_dag(dag: dict, produced: dict) -> None:
|
|
180
|
+
"""Nodes that are parameters need to be explicitly added to the final DAG so they can be processed."""
|
|
181
|
+
for name, info in list(produced.items()):
|
|
182
|
+
if name not in dag:
|
|
183
|
+
info["fn"] = None
|
|
184
|
+
info["deps"] = {}
|
|
185
|
+
info["on_error"] = None
|
|
186
|
+
info["needs_materialize"] = False
|
|
187
|
+
dag[name] = info
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def _compute_needs_materialize(dag: dict) -> None:
|
|
191
|
+
"""Updates each node to indicate if any downstream consumer needs its output materialized."""
|
|
192
|
+
for name, node in dag.items():
|
|
193
|
+
consumers = [
|
|
194
|
+
other_name
|
|
195
|
+
for other_name, other_node in dag.items()
|
|
196
|
+
if name in other_node.get("deps", {})
|
|
197
|
+
]
|
|
198
|
+
|
|
199
|
+
node["needs_materialize"] = any(
|
|
200
|
+
is_materialized_consumer(dag[consumer_name]["deps"][name])
|
|
201
|
+
for consumer_name in consumers
|
|
202
|
+
if consumer_name in dag and name in dag[consumer_name].get("deps", {})
|
|
203
|
+
)
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: synaflow
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A lightweight, type-hint driven engine for executing Directed Acyclic Graphs (DAGs) and lockstep pipelines in Python.
|
|
5
|
+
Project-URL: Homepage, https://github.com/humansoftware/synaflow
|
|
6
|
+
Project-URL: Repository, https://github.com/humansoftware/synaflow.git
|
|
7
|
+
Author-email: mvallebr <mvallebr@example.com>
|
|
8
|
+
License: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Operating System :: OS Independent
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Requires-Python: >=3.10
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
|
|
16
|
+
# SynaFlow 🌊ðŸ§
|
|
17
|
+
|
|
18
|
+
**SynaFlow** is a lightweight, pure-Python pipeline engine that uses **Type Hints** to magically wire and execute Directed Acyclic Graphs (DAGs).
|
|
19
|
+
|
|
20
|
+
It solves the "dependency hell" and boilerplate associated with building data pipelines by automatically inferring the flow of data based exclusively on Python's static type annotations.
|
|
21
|
+
|
|
22
|
+
## The Problem It Solves
|
|
23
|
+
|
|
24
|
+
Building data pipelines usually involves two headaches:
|
|
25
|
+
1. **Explicit Wiring:** You have to manually define which function outputs go to which function inputs (e.g., `A >> B >> C`), creating verbose and fragile architectures.
|
|
26
|
+
2. **Memory Explosions vs. Lazy Evaluation:** Passing large datasets around usually means holding them entirely in memory (Lists) or dealing with complex generator management. If you have multiple consumers for a single generator, you usually have to write clunky `itertools.tee` boilerplate yourself.
|
|
27
|
+
|
|
28
|
+
## The SynaFlow Solution
|
|
29
|
+
|
|
30
|
+
SynaFlow looks at the **Type Hints** of your functions and automatically wires everything together for you. If `Step A` outputs an `int` and `Step B` requires an `int`, SynaFlow connects them instantly.
|
|
31
|
+
|
|
32
|
+
Furthermore, SynaFlow has a **smart lockstep streaming engine**:
|
|
33
|
+
- If a producer yields a `Generator` and a consumer expects an `Iterator`, SynaFlow streams the data lazily without ever holding it in memory.
|
|
34
|
+
- If multiple consumers want that same generator, SynaFlow automatically forks it (`tee`) and drives them in parallel (lockstep).
|
|
35
|
+
- If one consumer explicitly asks for a `list`, SynaFlow automatically materializes the data only for that specific branch.
|
|
36
|
+
|
|
37
|
+
## How is it different from other frameworks?
|
|
38
|
+
|
|
39
|
+
There are many amazing orchestration frameworks out there, but SynaFlow fills a very specific gap: **In-process Streaming Micro-Orchestration**.
|
|
40
|
+
|
|
41
|
+
### vs. Hamilton
|
|
42
|
+
[Hamilton](https://github.com/DAGWorks-Inc/hamilton) is a fantastic tool that also uses Python function signatures to build DAGs. However, Hamilton is heavily geared towards DataFrames and feature engineering, generally expecting functions to return concrete values (columns/scalars). **SynaFlow**, on the other hand, is built from the ground up to support **Native Generators and Lazy Streaming**. While Hamilton maps functions to columns, SynaFlow maps functions to continuous data streams, automatically interleaving multiple consumers in lockstep without memory spikes.
|
|
43
|
+
|
|
44
|
+
### vs. Airflow / Prefect / Dagster
|
|
45
|
+
These are **Macro-Orchestrators**. They are designed to orchestrate heavy, distributed tasks across clusters, Docker containers, and different machines. They rely on state databases and massive IO overhead. **SynaFlow is a Micro-Orchestrator**. It runs entirely within a single Python process. You would use Airflow to trigger a daily job, but you would use SynaFlow *inside* that job to smartly route and stream millions of rows between your Python functions.
|
|
46
|
+
|
|
47
|
+
## Quickstart
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
from typing import NamedTuple
|
|
51
|
+
from collections.abc import Generator, Iterator
|
|
52
|
+
from synaflow import pipeline, step, run
|
|
53
|
+
|
|
54
|
+
# Define the data required to start your pipeline
|
|
55
|
+
class MyParams(NamedTuple):
|
|
56
|
+
count: int
|
|
57
|
+
|
|
58
|
+
# 1. Producer outputs a stream
|
|
59
|
+
def producer(count: int) -> Generator[int, None, None]:
|
|
60
|
+
yield from range(count)
|
|
61
|
+
|
|
62
|
+
# 2. Transformer consumes the stream lazily
|
|
63
|
+
def transformer(producer: Iterator[int]) -> Generator[int, None, None]:
|
|
64
|
+
for val in producer:
|
|
65
|
+
yield val * 10
|
|
66
|
+
|
|
67
|
+
# 3. Consumer automatically gets the stream!
|
|
68
|
+
def consumer(transformer: Iterator[int]) -> None:
|
|
69
|
+
for x in transformer:
|
|
70
|
+
print(f"Consumed: {x}")
|
|
71
|
+
|
|
72
|
+
# SynaFlow reads the Type Hints and wires the DAG automatically!
|
|
73
|
+
my_pipeline = pipeline(
|
|
74
|
+
name="example",
|
|
75
|
+
params=MyParams,
|
|
76
|
+
steps=[
|
|
77
|
+
step("producer", fn=producer),
|
|
78
|
+
step("transformer", fn=transformer),
|
|
79
|
+
step("consumer", fn=consumer)
|
|
80
|
+
]
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
# Run it
|
|
84
|
+
run(my_pipeline, MyParams(count=5))
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## Advanced Features
|
|
88
|
+
- **Auto-DAG compilation and validation** before execution.
|
|
89
|
+
- Strict type-checking: Pipeline refuses to run if type annotations are incompatible.
|
|
90
|
+
- Easily export DAG structures as JSON (`my_pipeline.to_dict()`) for snapshot testing or UI rendering.
|
|
91
|
+
|
|
92
|
+
## License
|
|
93
|
+
MIT License
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
synaflow/__init__.py,sha256=lc79G8-8BKwq_vHc2gggd6CERAMBEopgq0OhMFF4j3I,430
|
|
2
|
+
synaflow/executor.py,sha256=AysW8lKff4_Ks66IqlRrypQ9DIV2YHU6vb9txUbpY0w,14400
|
|
3
|
+
synaflow/iterator_utils.py,sha256=vgqS9SUxATAYVAJUKeiPmNkqAATp-ynTiAeaSnux6RU,697
|
|
4
|
+
synaflow/pipeline.py,sha256=zJzJIOxWaZFOv1DDin9zzMC-U6Z4dB_r9-BVCrxTGwk,1101
|
|
5
|
+
synaflow/step.py,sha256=GNxvDVki-7EODlDU2pifYTvgKNDyHEypDsC2vTpd8x0,320
|
|
6
|
+
synaflow/type_compatibility.py,sha256=WpnLsxjeeybk7YhQKBiVZ_05NDwUrf7Fl3Lwbk9rJ2s,5855
|
|
7
|
+
synaflow/types.py,sha256=lviJnTEjeZ_ygqfH_gcvmUAQUb6hp8fIUM_tWwPDD6o,502
|
|
8
|
+
synaflow/validator.py,sha256=BpjqYTSIUFqIwSr2di2M8iAAHxXS5j_8EViNcXuDdfc,6425
|
|
9
|
+
synaflow-0.1.0.dist-info/METADATA,sha256=NlaWpgcTxueXCG0Uv99to0JYU0l73AYTJ2tEmOfcCOs,4787
|
|
10
|
+
synaflow-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
11
|
+
synaflow-0.1.0.dist-info/licenses/LICENSE,sha256=XpDpkAL4Vwe8Mb6Uflgq6k-S7UkmcujQDar7Fx8T46E,1071
|
|
12
|
+
synaflow-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2023 Human Software
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|