synaflow 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
synaflow/__init__.py ADDED
@@ -0,0 +1,21 @@
1
+ """
2
+ Pipeline Engine
3
+
4
+ A lightweight, robust engine for defining and executing typed Directed Acyclic Graphs (DAGs).
5
+ This module defines the public interface for clients.
6
+ """
7
+
8
+ from .executor import run
9
+ from .pipeline import PipelineDef, pipeline
10
+ from .step import Step, step
11
+ from .types import OnError, StepParams
12
+
13
+ __all__ = [
14
+ "PipelineDef",
15
+ "pipeline",
16
+ "Step",
17
+ "step",
18
+ "OnError",
19
+ "StepParams",
20
+ "run",
21
+ ]
synaflow/executor.py ADDED
@@ -0,0 +1,416 @@
1
+ import inspect
2
+ import itertools
3
+ from collections.abc import Callable, Generator, Iterator
4
+ from typing import Any
5
+
6
+ from .iterator_utils import InterleavedIterator
7
+ from .pipeline import PipelineDef
8
+ from .type_compatibility import is_iterable_type, is_scalar
9
+
10
+
11
+ class PipelineStopException(Exception):
12
+ """Raised to stop the pipeline execution early."""
13
+
14
+ pass
15
+
16
+
17
+ class PipelineExecutor:
18
+ """Executes a compiled Directed Acyclic Graph (DAG) for a pipeline."""
19
+
20
+ def __init__(self, dag: dict[str, dict], materialize_fn: Callable = list):
21
+ self.dag = dag
22
+ self.materialize_fn = materialize_fn
23
+ self.context: dict[str, Any] = {}
24
+ self.executed_steps: set[str] = set()
25
+
26
+ def execute(self, params: Any) -> None:
27
+ self._initialize_context_with_params(params)
28
+
29
+ try:
30
+ levels = self._compute_topological_levels()
31
+ for level in levels:
32
+ self._execute_level(level)
33
+ except PipelineStopException:
34
+ pass
35
+
36
+ def _initialize_context_with_params(self, params: Any) -> None:
37
+ for field, value in params._asdict().items():
38
+ node = self.dag.get(field, {})
39
+ needs_materialization = node.get("needs_materialize", False)
40
+
41
+ if needs_materialization and isinstance(value, Iterator):
42
+ value = self.materialize_fn(value)
43
+ elif isinstance(value, Iterator):
44
+ value = self._tee_iterator_for_consumers(field, value)
45
+
46
+ self.context[field] = value
47
+
48
+ def _tee_iterator_for_consumers(
49
+ self, producer_name: str, iterator_value: Iterator
50
+ ) -> Any:
51
+ consumers = [
52
+ consumer_name
53
+ for consumer_name, node in self.dag.items()
54
+ if producer_name in node.get("deps", {})
55
+ ]
56
+ if len(consumers) > 1:
57
+ tees = itertools.tee(iterator_value, len(consumers))
58
+ return {"__tees__": dict(zip(consumers, tees))}
59
+ return iterator_value
60
+
61
+ def _compute_topological_levels(self) -> list[list[str]]:
62
+ in_degree: dict[str, int] = {name: 0 for name in self.dag}
63
+ for name, node in self.dag.items():
64
+ for dep in node.get("deps", {}):
65
+ if dep in in_degree:
66
+ in_degree[name] += 1
67
+
68
+ levels: list[list[str]] = []
69
+ processed: set[str] = set()
70
+
71
+ while len(processed) < len(self.dag):
72
+ level = [
73
+ name
74
+ for name, degree in in_degree.items()
75
+ if degree == 0 and name not in processed
76
+ ]
77
+ if not level:
78
+ break
79
+ levels.append(level)
80
+ processed.update(level)
81
+
82
+ for name in level:
83
+ for other_name, node in self.dag.items():
84
+ if name in node.get("deps", {}):
85
+ in_degree[other_name] -= 1
86
+
87
+ return levels
88
+
89
+ def _execute_level(self, level: list[str]) -> None:
90
+ dep_each_nodes, dep_all_nodes, independent_nodes = (
91
+ self._group_nodes_by_execution_mode(level)
92
+ )
93
+
94
+ all_dependencies = set(dep_each_nodes.keys()) | set(dep_all_nodes.keys())
95
+
96
+ for dep_name in all_dependencies:
97
+ each_names = dep_each_nodes.get(dep_name, [])
98
+ all_names = dep_all_nodes.get(dep_name, [])
99
+ self._process_grouped_dependencies(
100
+ dep_name, each_names, all_names, independent_nodes
101
+ )
102
+
103
+ for name in independent_nodes:
104
+ self._execute_independent_node(name)
105
+
106
+ def _group_nodes_by_execution_mode(
107
+ self, level: list[str]
108
+ ) -> tuple[dict, dict, list]:
109
+ dep_each_nodes: dict[str, list[str]] = {}
110
+ dep_all_nodes: dict[str, list[str]] = {}
111
+ independent_nodes: list[str] = []
112
+
113
+ for name in level:
114
+ if name in self.executed_steps:
115
+ continue
116
+
117
+ node = self.dag.get(name)
118
+ if not node or node.get("fn") is None:
119
+ continue
120
+
121
+ deps = node.get("deps", {})
122
+ if not deps:
123
+ independent_nodes.append(name)
124
+ continue
125
+
126
+ first_dep_name = next(iter(deps))
127
+ if self._is_each_mode_execution(deps, first_dep_name):
128
+ dep_each_nodes.setdefault(first_dep_name, []).append(name)
129
+ else:
130
+ consumer_type = (
131
+ inspect.signature(node["fn"]).parameters[first_dep_name].annotation
132
+ )
133
+ if self._is_lazy_iterator_type(consumer_type):
134
+ dep_all_nodes.setdefault(first_dep_name, []).append(name)
135
+ else:
136
+ independent_nodes.append(name)
137
+
138
+ return dep_each_nodes, dep_all_nodes, independent_nodes
139
+
140
+ def _process_grouped_dependencies(
141
+ self,
142
+ dep_name: str,
143
+ each_names: list[str],
144
+ all_names: list[str],
145
+ independent_nodes: list[str],
146
+ ) -> None:
147
+ eager_each_names, lazy_each_names = self._split_eager_and_lazy_each_nodes(
148
+ each_names
149
+ )
150
+
151
+ eager_callbacks = self._create_eager_callbacks(eager_each_names, dep_name)
152
+
153
+ independent_nodes.extend(lazy_each_names)
154
+
155
+ if eager_callbacks:
156
+ self._execute_eager_callbacks(
157
+ eager_callbacks, dep_name, all_names, independent_nodes
158
+ )
159
+ else:
160
+ independent_nodes.extend(all_names)
161
+
162
+ def _split_eager_and_lazy_each_nodes(
163
+ self, each_names: list[str]
164
+ ) -> tuple[list[str], list[str]]:
165
+ eager = []
166
+ lazy = []
167
+ for name in each_names:
168
+ consumers = [
169
+ cn for cn, cnode in self.dag.items() if name in cnode.get("deps", {})
170
+ ]
171
+ if name.startswith("_") or not consumers:
172
+ eager.append(name)
173
+ else:
174
+ lazy.append(name)
175
+ return eager, lazy
176
+
177
+ def _create_eager_callbacks(
178
+ self, eager_names: list[str], dep_name: str
179
+ ) -> list[Callable]:
180
+ callbacks = []
181
+ for name in eager_names:
182
+ node = self.dag[name]
183
+ fn = node["fn"]
184
+ kwargs = self._resolve_node_arguments(name, node)
185
+ on_error = node.get("on_error")
186
+
187
+ def make_callback(fn, kwargs, dep_name, on_error):
188
+ def cb(item):
189
+ try:
190
+ item_kwargs = dict(kwargs)
191
+ item_kwargs[dep_name] = item
192
+ fn(**item_kwargs)
193
+ except Exception as e:
194
+ if on_error and on_error.value == "stop":
195
+ raise PipelineStopException() from e
196
+
197
+ return cb
198
+
199
+ callbacks.append(make_callback(fn, kwargs, dep_name, on_error))
200
+ self.executed_steps.add(name)
201
+
202
+ return callbacks
203
+
204
+ def _execute_eager_callbacks(
205
+ self,
206
+ callbacks: list[Callable],
207
+ dep_name: str,
208
+ all_names: list[str],
209
+ independent_nodes: list[str],
210
+ ) -> None:
211
+ items_source = self.context.get(dep_name)
212
+
213
+ if all_names:
214
+ first_all = all_names[0]
215
+ self._execute_interleaved_node(first_all, dep_name, callbacks)
216
+ independent_nodes.extend(all_names[1:])
217
+ else:
218
+ self._execute_lockstep_loop(items_source, callbacks, dep_name)
219
+
220
+ def _execute_interleaved_node(
221
+ self, node_name: str, dep_name: str, callbacks: list[Callable]
222
+ ) -> None:
223
+ node = self.dag[node_name]
224
+ fn = node["fn"]
225
+ kwargs = self._resolve_node_arguments(node_name, node)
226
+
227
+ dep_val = kwargs.get(dep_name)
228
+ if isinstance(dep_val, dict) and "__tees__" in dep_val:
229
+ dep_val = dep_val["__tees__"][node_name]
230
+
231
+ kwargs[dep_name] = InterleavedIterator(dep_val, callbacks)
232
+
233
+ try:
234
+ output = fn(**kwargs)
235
+ if node_name and not node_name.startswith("_"):
236
+ self.context[node_name] = output
237
+ except Exception:
238
+ if node.get("on_error") and node["on_error"].value == "stop":
239
+ raise PipelineStopException()
240
+
241
+ self.executed_steps.add(node_name)
242
+
243
+ def _execute_lockstep_loop(
244
+ self, items_source: Any, callbacks: list[Callable], dep_name: str
245
+ ) -> None:
246
+ dep_val = items_source
247
+ if isinstance(dep_val, dict) and "__tees__" in dep_val:
248
+ consumers = [
249
+ cn
250
+ for cn, cnode in self.dag.items()
251
+ if dep_name in cnode.get("deps", {})
252
+ ]
253
+ first_tee_name = next(c for c in consumers if c in dep_val["__tees__"])
254
+ dep_val = dep_val["__tees__"][first_tee_name]
255
+
256
+ for item in dep_val:
257
+ for cb in callbacks:
258
+ cb(item)
259
+
260
+ def _execute_independent_node(self, name: str) -> None:
261
+ if name in self.executed_steps:
262
+ return
263
+
264
+ node = self.dag.get(name)
265
+ if not node or node.get("fn") is None:
266
+ return
267
+
268
+ fn = node["fn"]
269
+ deps = node.get("deps", {})
270
+ kwargs = self._resolve_node_arguments(name, node)
271
+
272
+ if deps and self._is_each_mode_execution(deps, next(iter(deps))):
273
+ self._execute_independent_each_node(name, fn, deps, kwargs, node)
274
+ else:
275
+ self._execute_standard_node(name, fn, kwargs, node)
276
+
277
+ self.executed_steps.add(name)
278
+
279
+ def _execute_independent_each_node(
280
+ self, name: str, fn: Callable, deps: dict, kwargs: dict, node: dict
281
+ ) -> None:
282
+ first_dep = next(iter(deps))
283
+ items = self.context.get(first_dep)
284
+
285
+ if isinstance(items, dict) and "__tees__" in items:
286
+ items = items["__tees__"][name]
287
+
288
+ consumers = [
289
+ cn for cn, cnode in self.dag.items() if name in cnode.get("deps", {})
290
+ ]
291
+ is_sink = name.startswith("_") or len(consumers) == 0
292
+
293
+ if is_sink:
294
+ for item in items:
295
+ try:
296
+ item_kwargs = dict(kwargs)
297
+ item_kwargs[first_dep] = item
298
+ fn(**item_kwargs)
299
+ except Exception:
300
+ if node.get("on_error") and node["on_error"].value == "stop":
301
+ raise PipelineStopException()
302
+ else:
303
+
304
+ def each_generator(items, kwargs, first_dep, fn, on_error):
305
+ for item in items:
306
+ try:
307
+ item_kwargs = dict(kwargs)
308
+ item_kwargs[first_dep] = item
309
+ yield fn(**item_kwargs)
310
+ except Exception as e:
311
+ if on_error and on_error.value == "stop":
312
+ raise PipelineStopException() from e
313
+
314
+ output = each_generator(items, kwargs, first_dep, fn, node.get("on_error"))
315
+
316
+ if len(consumers) > 1:
317
+ tees = itertools.tee(output, len(consumers))
318
+ output = {"__tees__": dict(zip(consumers, tees))}
319
+
320
+ self.context[name] = output
321
+
322
+ def _execute_standard_node(
323
+ self, name: str, fn: Callable, kwargs: dict, node: dict
324
+ ) -> None:
325
+ try:
326
+ output = fn(**kwargs)
327
+
328
+ if name and not name.startswith("_"):
329
+ if isinstance(output, Iterator) and node.get("needs_materialize"):
330
+ output = self.materialize_fn(output)
331
+ elif isinstance(output, Iterator):
332
+ output = self._tee_iterator_for_consumers(name, output)
333
+
334
+ self.context[name] = output
335
+
336
+ except Exception:
337
+ if node.get("on_error") and node["on_error"].value == "stop":
338
+ raise PipelineStopException()
339
+
340
+ def _resolve_node_arguments(self, consumer_name: str, node: dict) -> dict[str, Any]:
341
+ sig = inspect.signature(node["fn"])
342
+ deps = node.get("deps", {})
343
+ kwargs: dict[str, Any] = {}
344
+
345
+ for param_name in sig.parameters:
346
+ if param_name in self.context:
347
+ value = self.context.get(param_name)
348
+
349
+ if isinstance(value, dict) and "__tees__" in value:
350
+ value = value["__tees__"][consumer_name]
351
+
352
+ if param_name in deps:
353
+ consumer_type = deps[param_name]
354
+ value = self._adapt_argument_to_consumer_type(value, consumer_type)
355
+
356
+ kwargs[param_name] = value
357
+
358
+ return kwargs
359
+
360
+ def _adapt_argument_to_consumer_type(self, value: Any, consumer_type: Any) -> Any:
361
+ is_lazy_iterator = self._is_lazy_iterator_type(consumer_type)
362
+ needs_materialization = self._needs_materialize_for(consumer_type)
363
+
364
+ if is_lazy_iterator or needs_materialization:
365
+ if not isinstance(value, (list, set, tuple, Iterator, Generator)):
366
+ value = [value]
367
+
368
+ if isinstance(value, Iterator) and needs_materialization:
369
+ value = self.materialize_fn(value)
370
+
371
+ origin = getattr(consumer_type, "__origin__", consumer_type)
372
+ if origin is set:
373
+ value = set(value)
374
+ elif origin is tuple:
375
+ value = tuple(value)
376
+ elif origin in (Iterator, Generator):
377
+ value = iter(value)
378
+
379
+ return value
380
+
381
+ def _is_each_mode_execution(self, deps: dict, first_dep_name: str) -> bool:
382
+ if not deps:
383
+ return False
384
+
385
+ first_type = deps[first_dep_name]
386
+ producer = self.dag.get(first_dep_name)
387
+ if not producer or producer.get("output") is None:
388
+ return False
389
+
390
+ producer_output = producer.get("output")
391
+ return is_iterable_type(producer_output) and is_scalar(first_type)
392
+
393
+ def _is_lazy_iterator_type(self, tp: Any) -> bool:
394
+ if tp is Iterator:
395
+ return True
396
+ origin = getattr(tp, "__origin__", tp)
397
+ return origin in (Iterator, Generator)
398
+
399
+ def _needs_materialize_for(self, tp: Any) -> bool:
400
+ if tp is None:
401
+ return False
402
+ if tp in (list, set, tuple):
403
+ return True
404
+ origin = getattr(tp, "__origin__", None)
405
+ return origin in (list, set, tuple)
406
+
407
+
408
+ def run(
409
+ pipeline: PipelineDef,
410
+ params: Any,
411
+ *,
412
+ materialize: Callable = list,
413
+ ) -> None:
414
+ """Executes a pipeline definition."""
415
+ executor = PipelineExecutor(pipeline._dag, materialize)
416
+ executor.execute(params)
@@ -0,0 +1,24 @@
1
+ from typing import Any, Callable, Iterable
2
+
3
+
4
+ class InterleavedIterator:
5
+ """
6
+ An iterator that yields items from a source, while simultaneously
7
+ executing a set of callbacks on each item.
8
+
9
+ This is useful for lockstep processing where eager consumers must execute
10
+ on items before lazy downstream consumers iterate over them.
11
+ """
12
+
13
+ def __init__(self, source: Iterable[Any], callbacks: list[Callable[[Any], None]]):
14
+ self.source = iter(source)
15
+ self.callbacks = callbacks
16
+
17
+ def __next__(self):
18
+ item = next(self.source)
19
+ for callback in self.callbacks:
20
+ callback(item)
21
+ return item
22
+
23
+ def __iter__(self):
24
+ return self
synaflow/pipeline.py ADDED
@@ -0,0 +1,39 @@
1
+ from dataclasses import dataclass
2
+ from typing import Any
3
+
4
+ from .step import Step
5
+
6
+
7
+ @dataclass
8
+ class PipelineDef:
9
+ """
10
+ Defines a Pipeline workflow.
11
+ """
12
+
13
+ name: str
14
+ params: Any
15
+ steps: list[Step]
16
+ description: str = ""
17
+
18
+ def __post_init__(self) -> None:
19
+ from .validator import validate_and_build_dag
20
+
21
+ self._dag = validate_and_build_dag(self.name, self.steps, self.params)
22
+
23
+ def to_dict(self) -> dict:
24
+ """Exports the compiled DAG structure to a JSON-serializable dictionary."""
25
+ from .type_compatibility import get_type_name
26
+
27
+ serialized = {}
28
+ for name, node in self._dag.items():
29
+ serialized[name] = {
30
+ "deps": {k: get_type_name(v) for k, v in node["deps"].items()},
31
+ "output": get_type_name(node["output"]),
32
+ "fn": node["fn"].__name__ if node["fn"] else None,
33
+ "on_error": node["on_error"].value if node["on_error"] else None,
34
+ "needs_materialize": node["needs_materialize"],
35
+ }
36
+ return serialized
37
+
38
+
39
+ pipeline = PipelineDef
synaflow/step.py ADDED
@@ -0,0 +1,18 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass, field
4
+ from typing import Any, Callable
5
+
6
+ from .types import OnError, StepParams
7
+
8
+
9
+ @dataclass
10
+ class Step:
11
+ name: str
12
+ fn: Callable
13
+ on_error: OnError = OnError.STOP
14
+ params: StepParams | None = None
15
+ description: str = ""
16
+
17
+
18
+ step = Step
@@ -0,0 +1,191 @@
1
+ import types
2
+ from collections.abc import Generator, Iterable, Iterator
3
+ from typing import Any, get_args, get_origin
4
+
5
+ SCALAR_TYPES = {int, float, str, bool, bytes, type(None)}
6
+ COLLECTION_ORIGINS = {list, set, tuple, Generator, Iterator, Iterable}
7
+
8
+
9
+ class ListType:
10
+ """Wrapper to represent a runtime-resolved list of a specific type."""
11
+
12
+ def __init__(self, inner_type: Any):
13
+ self.inner_type = inner_type
14
+
15
+ def __repr__(self):
16
+ return f"ListType({self.inner_type})"
17
+
18
+
19
+ def is_type_compatible(producer_type: Any, consumer_type: Any) -> bool:
20
+ """Checks if a producer output type satisfies a consumer input type."""
21
+ if producer_type is None or consumer_type is None:
22
+ return True
23
+
24
+ producer_origin = get_origin(producer_type)
25
+ consumer_origin = get_origin(consumer_type)
26
+
27
+ if _is_union(producer_type, producer_origin) and _is_union(
28
+ consumer_type, consumer_origin
29
+ ):
30
+ return _all_producer_types_match_any_consumer_type(producer_type, consumer_type)
31
+
32
+ if _is_union(producer_type, producer_origin):
33
+ return _all_producer_types_match_consumer(producer_type, consumer_type)
34
+
35
+ is_producer_iterable = _is_iterable(producer_type, producer_origin)
36
+ is_consumer_iterable = _is_iterable(consumer_type, consumer_origin)
37
+
38
+ if is_producer_iterable:
39
+ return _check_iterable_producer_compatibility(
40
+ producer_type, consumer_type, is_consumer_iterable
41
+ )
42
+
43
+ if _is_union(consumer_type, consumer_origin):
44
+ return _producer_matches_any_consumer_type(producer_type, consumer_type)
45
+
46
+ if is_consumer_iterable:
47
+ return _check_scalar_producer_to_iterable_consumer(producer_type, consumer_type)
48
+
49
+ if is_scalar(consumer_type):
50
+ return _check_scalar_compatibility(producer_type, consumer_type)
51
+
52
+ return True
53
+
54
+
55
+ def _all_producer_types_match_any_consumer_type(
56
+ producer_type: Any, consumer_type: Any
57
+ ) -> bool:
58
+ return all(
59
+ any(is_type_compatible(p, c) for c in get_args(consumer_type))
60
+ for p in get_args(producer_type)
61
+ )
62
+
63
+
64
+ def _all_producer_types_match_consumer(producer_type: Any, consumer_type: Any) -> bool:
65
+ return all(is_type_compatible(p, consumer_type) for p in get_args(producer_type))
66
+
67
+
68
+ def _producer_matches_any_consumer_type(producer_type: Any, consumer_type: Any) -> bool:
69
+ return any(is_type_compatible(producer_type, c) for c in get_args(consumer_type))
70
+
71
+
72
+ def _check_iterable_producer_compatibility(
73
+ producer_type: Any, consumer_type: Any, is_consumer_iterable: bool
74
+ ) -> bool:
75
+ producer_inner = get_inner_type(producer_type)
76
+ if producer_inner is None:
77
+ return False
78
+
79
+ consumer_origin = get_origin(consumer_type)
80
+
81
+ if _is_union(consumer_type, consumer_origin):
82
+ return is_type_compatible(producer_inner, consumer_type)
83
+
84
+ if is_consumer_iterable:
85
+ consumer_inner = get_inner_type(consumer_type)
86
+ if consumer_inner is not None:
87
+ return is_type_compatible(producer_inner, consumer_inner)
88
+ return True
89
+
90
+ if is_scalar(consumer_type):
91
+ return is_type_compatible(producer_inner, consumer_type)
92
+
93
+ return False
94
+
95
+
96
+ def _check_scalar_producer_to_iterable_consumer(
97
+ producer_type: Any, consumer_type: Any
98
+ ) -> bool:
99
+ producer_inner = get_inner_type(producer_type)
100
+ consumer_inner = get_inner_type(consumer_type)
101
+
102
+ if consumer_inner is None:
103
+ return True
104
+
105
+ if producer_inner is None:
106
+ return is_type_compatible(producer_type, consumer_inner)
107
+
108
+ return is_type_compatible(producer_inner, consumer_inner)
109
+
110
+
111
+ def _check_scalar_compatibility(producer_type: Any, consumer_type: Any) -> bool:
112
+ producer_inner = get_inner_type(producer_type)
113
+ if producer_inner is not None:
114
+ return is_type_compatible(producer_inner, consumer_type)
115
+ return is_scalar(producer_type) and producer_type == consumer_type
116
+
117
+
118
+ def _is_union(tp: Any, origin: Any) -> bool:
119
+ return origin is types.UnionType or origin is __import__("typing").Union
120
+
121
+
122
+ def _is_iterable(tp: Any, origin: Any) -> bool:
123
+ if isinstance(tp, ListType):
124
+ return True
125
+ if origin is not None:
126
+ return origin in COLLECTION_ORIGINS
127
+ return tp in COLLECTION_ORIGINS
128
+
129
+
130
+ def is_iterable_type(tp: Any) -> bool:
131
+ if tp is None:
132
+ return False
133
+ if isinstance(tp, ListType):
134
+ return True
135
+ return _is_iterable(tp, get_origin(tp))
136
+
137
+
138
+ def is_scalar(tp: Any) -> bool:
139
+ if tp is None:
140
+ return False
141
+ if tp in SCALAR_TYPES:
142
+ return True
143
+ origin = get_origin(tp)
144
+ if origin is not None:
145
+ import types
146
+ from typing import Union
147
+
148
+ if origin is types.UnionType or origin is Union:
149
+ return all(is_scalar(a) for a in get_args(tp))
150
+ return False
151
+ return tp not in COLLECTION_ORIGINS
152
+
153
+
154
+ def get_inner_type(tp: Any) -> Any:
155
+ if isinstance(tp, ListType):
156
+ return tp.inner_type
157
+ args = get_args(tp)
158
+ if args:
159
+ return args[0]
160
+ return None
161
+
162
+
163
+ def get_type_name(tp: Any) -> str:
164
+ if tp is None:
165
+ return "None"
166
+ origin = get_origin(tp)
167
+ if origin is not None:
168
+ arg_names = ", ".join(get_type_name(a) for a in get_args(tp))
169
+ origin_name = getattr(origin, "__name__", str(origin))
170
+ return f"{origin_name}[{arg_names}]"
171
+ return getattr(tp, "__name__", str(tp))
172
+
173
+
174
+ def is_materialized_consumer(tp: Any) -> bool:
175
+ """Checks if a consumer type requires an eagerly materialized collection."""
176
+ if tp is None:
177
+ return False
178
+ if tp in (list, set, tuple):
179
+ return True
180
+
181
+ origin = get_origin(tp)
182
+ if origin in (list, set, tuple):
183
+ return True
184
+ if origin in (Iterator, Generator, Iterable):
185
+ return False
186
+ if is_scalar(tp):
187
+ return False
188
+ if _is_union(tp, origin):
189
+ return any(is_materialized_consumer(a) for a in get_args(tp))
190
+
191
+ return False
synaflow/types.py ADDED
@@ -0,0 +1,24 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass, field
4
+ from enum import Enum
5
+ from typing import Any
6
+
7
+
8
+ class OnError(Enum):
9
+ CONTINUE = "continue"
10
+ STOP = "stop"
11
+
12
+
13
+ @dataclass(frozen=True)
14
+ class StepParams:
15
+ values: dict[str, Any] = field(default_factory=dict)
16
+
17
+
18
+ @dataclass
19
+ class StepResult:
20
+ step_name: str
21
+ status: str # "ok" | "error" | "skipped"
22
+ output: Any = None
23
+ error: Exception | None = None
24
+ metrics: dict[str, int] = field(default_factory=dict)
synaflow/validator.py ADDED
@@ -0,0 +1,203 @@
1
+ import inspect
2
+ from typing import Any, NamedTuple
3
+
4
+ from .step import Step
5
+ from .type_compatibility import (
6
+ ListType,
7
+ get_type_name,
8
+ is_iterable_type,
9
+ is_materialized_consumer,
10
+ is_scalar,
11
+ is_type_compatible,
12
+ )
13
+
14
+
15
+ def validate_and_build_dag(
16
+ name: str, steps: list[Step], params: type[NamedTuple]
17
+ ) -> dict[str, dict]:
18
+ """
19
+ Validates a list of steps and pipeline parameters and compiles them into a
20
+ Directed Acyclic Graph (DAG) represented as a dictionary.
21
+ """
22
+ dag: dict[str, dict] = {}
23
+
24
+ _validate_params_is_namedtuple(params, name)
25
+
26
+ produced: dict[str, dict] = _initialize_parameters(params)
27
+
28
+ for step in steps:
29
+ _validate_step_is_callable(step, name)
30
+ _validate_unique_step_name(step.name, dag, pipeline_name=name)
31
+
32
+ node = _validate_and_compile_step(step, produced, pipeline_name=name)
33
+
34
+ dag[step.name] = node
35
+ produced[step.name] = node
36
+
37
+ _add_parameter_nodes_to_dag(dag, produced)
38
+ _compute_needs_materialize(dag)
39
+
40
+ return dag
41
+
42
+
43
+ def _validate_params_is_namedtuple(params: Any, pipeline_name: str) -> None:
44
+ if not hasattr(params, "_fields"):
45
+ raise ValueError(
46
+ f"Pipeline '{pipeline_name}': 'params' must be a NamedTuple, got {type(params).__name__}"
47
+ )
48
+
49
+
50
+ def _validate_step_is_callable(step: Step, pipeline_name: str) -> None:
51
+ if not callable(step.fn):
52
+ raise ValueError(
53
+ f"Pipeline '{pipeline_name}': step '{step.name}' must have a callable 'fn', got {type(step.fn).__name__}"
54
+ )
55
+
56
+
57
+ def _initialize_parameters(params: type[NamedTuple]) -> dict[str, dict]:
58
+ produced: dict[str, dict] = {}
59
+ for field in params._fields:
60
+ tp = getattr(params, "__annotations__", {}).get(field)
61
+ produced[field] = {"output": tp}
62
+ return produced
63
+
64
+
65
+ def _validate_unique_step_name(step_name: str, dag: dict, pipeline_name: str) -> None:
66
+ if step_name in dag:
67
+ raise ValueError(
68
+ f"Pipeline '{pipeline_name}': duplicate step name '{step_name}'"
69
+ )
70
+
71
+
72
+ def _validate_and_compile_step(
73
+ step: Step, produced: dict[str, dict], pipeline_name: str
74
+ ) -> dict[str, Any]:
75
+ sig = inspect.signature(step.fn)
76
+ hints = _get_safe_type_hints(step.fn)
77
+
78
+ deps = _validate_and_resolve_dependencies(step, sig, hints, produced, pipeline_name)
79
+ output_type = _resolve_step_output_type(sig, hints, deps, produced)
80
+
81
+ return {
82
+ "deps": deps,
83
+ "output": output_type,
84
+ "fn": step.fn,
85
+ "on_error": step.on_error,
86
+ "needs_materialize": _any_dependency_needs_materialization(deps),
87
+ }
88
+
89
+
90
+ def _get_safe_type_hints(fn: Any) -> dict[str, Any]:
91
+ try:
92
+ return getattr(fn, "__annotations__", {})
93
+ except Exception:
94
+ return {}
95
+
96
+
97
+ def _validate_and_resolve_dependencies(
98
+ step: Step,
99
+ sig: inspect.Signature,
100
+ hints: dict[str, Any],
101
+ produced: dict[str, dict],
102
+ pipeline_name: str,
103
+ ) -> dict[str, Any]:
104
+ deps: dict[str, Any] = {}
105
+
106
+ for param_name, param in sig.parameters.items():
107
+ consumer_type = hints.get(param_name, param.annotation)
108
+ if consumer_type is inspect.Parameter.empty:
109
+ consumer_type = None
110
+
111
+ if param_name not in produced:
112
+ raise ValueError(
113
+ f"Pipeline '{pipeline_name}': step '{step.name}' depends on '{param_name}' "
114
+ "but no prior step or param produces it"
115
+ )
116
+
117
+ producer_type = produced[param_name]["output"]
118
+
119
+ # Explicit validation for NoneType to strict types
120
+ if (
121
+ producer_type is type(None)
122
+ and consumer_type is not None
123
+ and not _is_optional_or_any(consumer_type)
124
+ ):
125
+ raise ValueError(
126
+ f"Pipeline '{pipeline_name}': step '{step.name}' param '{param_name}': "
127
+ f"expects {get_type_name(consumer_type)} "
128
+ f"but '{param_name}' produces explicit NoneType"
129
+ )
130
+
131
+ if not is_type_compatible(producer_type, consumer_type):
132
+ raise ValueError(
133
+ f"Pipeline '{pipeline_name}': step '{step.name}' param '{param_name}': "
134
+ f"expects {get_type_name(consumer_type)} "
135
+ f"but '{param_name}' produces {get_type_name(producer_type)}"
136
+ )
137
+
138
+ deps[param_name] = consumer_type
139
+
140
+ return deps
141
+
142
+
143
+ def _is_optional_or_any(tp: Any) -> bool:
144
+ if tp is Any:
145
+ return True
146
+ # If it's a union containing NoneType
147
+ origin = getattr(tp, "__origin__", None)
148
+ import types
149
+ from typing import Union
150
+
151
+ if origin is types.UnionType or origin is Union:
152
+ return type(None) in getattr(tp, "__args__", [])
153
+ return False
154
+
155
+
156
+ def _resolve_step_output_type(
157
+ sig: inspect.Signature,
158
+ hints: dict[str, Any],
159
+ deps: dict[str, Any],
160
+ produced: dict[str, dict],
161
+ ) -> Any:
162
+ return_type = hints.get("return", sig.return_annotation)
163
+ if return_type is inspect.Parameter.empty:
164
+ return_type = None
165
+
166
+ if is_scalar(return_type) and deps:
167
+ first_dep_name = next(iter(deps))
168
+ first_dep_output = produced[first_dep_name]["output"]
169
+ if first_dep_output is not None and is_iterable_type(first_dep_output):
170
+ return ListType(return_type)
171
+
172
+ return return_type
173
+
174
+
175
+ def _any_dependency_needs_materialization(deps: dict[str, Any]) -> bool:
176
+ return any(is_materialized_consumer(t) for t in deps.values())
177
+
178
+
179
+ def _add_parameter_nodes_to_dag(dag: dict, produced: dict) -> None:
180
+ """Nodes that are parameters need to be explicitly added to the final DAG so they can be processed."""
181
+ for name, info in list(produced.items()):
182
+ if name not in dag:
183
+ info["fn"] = None
184
+ info["deps"] = {}
185
+ info["on_error"] = None
186
+ info["needs_materialize"] = False
187
+ dag[name] = info
188
+
189
+
190
+ def _compute_needs_materialize(dag: dict) -> None:
191
+ """Updates each node to indicate if any downstream consumer needs its output materialized."""
192
+ for name, node in dag.items():
193
+ consumers = [
194
+ other_name
195
+ for other_name, other_node in dag.items()
196
+ if name in other_node.get("deps", {})
197
+ ]
198
+
199
+ node["needs_materialize"] = any(
200
+ is_materialized_consumer(dag[consumer_name]["deps"][name])
201
+ for consumer_name in consumers
202
+ if consumer_name in dag and name in dag[consumer_name].get("deps", {})
203
+ )
@@ -0,0 +1,93 @@
1
+ Metadata-Version: 2.4
2
+ Name: synaflow
3
+ Version: 0.1.0
4
+ Summary: A lightweight, type-hint driven engine for executing Directed Acyclic Graphs (DAGs) and lockstep pipelines in Python.
5
+ Project-URL: Homepage, https://github.com/humansoftware/synaflow
6
+ Project-URL: Repository, https://github.com/humansoftware/synaflow.git
7
+ Author-email: mvallebr <mvallebr@example.com>
8
+ License: MIT
9
+ License-File: LICENSE
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Operating System :: OS Independent
12
+ Classifier: Programming Language :: Python :: 3
13
+ Requires-Python: >=3.10
14
+ Description-Content-Type: text/markdown
15
+
16
+ # SynaFlow 🌊🧠
17
+
18
+ **SynaFlow** is a lightweight, pure-Python pipeline engine that uses **Type Hints** to magically wire and execute Directed Acyclic Graphs (DAGs).
19
+
20
+ It solves the "dependency hell" and boilerplate associated with building data pipelines by automatically inferring the flow of data based exclusively on Python's static type annotations.
21
+
22
+ ## The Problem It Solves
23
+
24
+ Building data pipelines usually involves two headaches:
25
+ 1. **Explicit Wiring:** You have to manually define which function outputs go to which function inputs (e.g., `A >> B >> C`), creating verbose and fragile architectures.
26
+ 2. **Memory Explosions vs. Lazy Evaluation:** Passing large datasets around usually means holding them entirely in memory (Lists) or dealing with complex generator management. If you have multiple consumers for a single generator, you usually have to write clunky `itertools.tee` boilerplate yourself.
27
+
28
+ ## The SynaFlow Solution
29
+
30
+ SynaFlow looks at the **Type Hints** of your functions and automatically wires everything together for you. If `Step A` outputs an `int` and `Step B` requires an `int`, SynaFlow connects them instantly.
31
+
32
+ Furthermore, SynaFlow has a **smart lockstep streaming engine**:
33
+ - If a producer yields a `Generator` and a consumer expects an `Iterator`, SynaFlow streams the data lazily without ever holding it in memory.
34
+ - If multiple consumers want that same generator, SynaFlow automatically forks it (`tee`) and drives them in parallel (lockstep).
35
+ - If one consumer explicitly asks for a `list`, SynaFlow automatically materializes the data only for that specific branch.
36
+
37
+ ## How is it different from other frameworks?
38
+
39
+ There are many amazing orchestration frameworks out there, but SynaFlow fills a very specific gap: **In-process Streaming Micro-Orchestration**.
40
+
41
+ ### vs. Hamilton
42
+ [Hamilton](https://github.com/DAGWorks-Inc/hamilton) is a fantastic tool that also uses Python function signatures to build DAGs. However, Hamilton is heavily geared towards DataFrames and feature engineering, generally expecting functions to return concrete values (columns/scalars). **SynaFlow**, on the other hand, is built from the ground up to support **Native Generators and Lazy Streaming**. While Hamilton maps functions to columns, SynaFlow maps functions to continuous data streams, automatically interleaving multiple consumers in lockstep without memory spikes.
43
+
44
+ ### vs. Airflow / Prefect / Dagster
45
+ These are **Macro-Orchestrators**. They are designed to orchestrate heavy, distributed tasks across clusters, Docker containers, and different machines. They rely on state databases and massive IO overhead. **SynaFlow is a Micro-Orchestrator**. It runs entirely within a single Python process. You would use Airflow to trigger a daily job, but you would use SynaFlow *inside* that job to smartly route and stream millions of rows between your Python functions.
46
+
47
+ ## Quickstart
48
+
49
+ ```python
50
+ from typing import NamedTuple
51
+ from collections.abc import Generator, Iterator
52
+ from synaflow import pipeline, step, run
53
+
54
+ # Define the data required to start your pipeline
55
+ class MyParams(NamedTuple):
56
+ count: int
57
+
58
+ # 1. Producer outputs a stream
59
+ def producer(count: int) -> Generator[int, None, None]:
60
+ yield from range(count)
61
+
62
+ # 2. Transformer consumes the stream lazily
63
+ def transformer(producer: Iterator[int]) -> Generator[int, None, None]:
64
+ for val in producer:
65
+ yield val * 10
66
+
67
+ # 3. Consumer automatically gets the stream!
68
+ def consumer(transformer: Iterator[int]) -> None:
69
+ for x in transformer:
70
+ print(f"Consumed: {x}")
71
+
72
+ # SynaFlow reads the Type Hints and wires the DAG automatically!
73
+ my_pipeline = pipeline(
74
+ name="example",
75
+ params=MyParams,
76
+ steps=[
77
+ step("producer", fn=producer),
78
+ step("transformer", fn=transformer),
79
+ step("consumer", fn=consumer)
80
+ ]
81
+ )
82
+
83
+ # Run it
84
+ run(my_pipeline, MyParams(count=5))
85
+ ```
86
+
87
+ ## Advanced Features
88
+ - **Auto-DAG compilation and validation** before execution.
89
+ - Strict type-checking: Pipeline refuses to run if type annotations are incompatible.
90
+ - Easily export DAG structures as JSON (`my_pipeline.to_dict()`) for snapshot testing or UI rendering.
91
+
92
+ ## License
93
+ MIT License
@@ -0,0 +1,12 @@
1
+ synaflow/__init__.py,sha256=lc79G8-8BKwq_vHc2gggd6CERAMBEopgq0OhMFF4j3I,430
2
+ synaflow/executor.py,sha256=AysW8lKff4_Ks66IqlRrypQ9DIV2YHU6vb9txUbpY0w,14400
3
+ synaflow/iterator_utils.py,sha256=vgqS9SUxATAYVAJUKeiPmNkqAATp-ynTiAeaSnux6RU,697
4
+ synaflow/pipeline.py,sha256=zJzJIOxWaZFOv1DDin9zzMC-U6Z4dB_r9-BVCrxTGwk,1101
5
+ synaflow/step.py,sha256=GNxvDVki-7EODlDU2pifYTvgKNDyHEypDsC2vTpd8x0,320
6
+ synaflow/type_compatibility.py,sha256=WpnLsxjeeybk7YhQKBiVZ_05NDwUrf7Fl3Lwbk9rJ2s,5855
7
+ synaflow/types.py,sha256=lviJnTEjeZ_ygqfH_gcvmUAQUb6hp8fIUM_tWwPDD6o,502
8
+ synaflow/validator.py,sha256=BpjqYTSIUFqIwSr2di2M8iAAHxXS5j_8EViNcXuDdfc,6425
9
+ synaflow-0.1.0.dist-info/METADATA,sha256=NlaWpgcTxueXCG0Uv99to0JYU0l73AYTJ2tEmOfcCOs,4787
10
+ synaflow-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
11
+ synaflow-0.1.0.dist-info/licenses/LICENSE,sha256=XpDpkAL4Vwe8Mb6Uflgq6k-S7UkmcujQDar7Fx8T46E,1071
12
+ synaflow-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2023 Human Software
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.