starforge-kernel 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- starforge/__init__.py +86 -0
- starforge/core/__init__.py +0 -0
- starforge/core/checkpoints.py +178 -0
- starforge/core/figures.py +119 -0
- starforge/core/previews.py +109 -0
- starforge/core/provenance.py +192 -0
- starforge/core/runner.py +293 -0
- starforge/core/serializers.py +141 -0
- starforge/core/spec.py +126 -0
- starforge/index/__init__.py +9 -0
- starforge/index/scanner.py +487 -0
- starforge/kernel/__init__.py +0 -0
- starforge/kernel/__main__.py +3 -0
- starforge/kernel/server.py +351 -0
- starforge/kernel/worker.py +66 -0
- starforge/mcp.py +283 -0
- starforge_kernel-0.1.0.dist-info/METADATA +76 -0
- starforge_kernel-0.1.0.dist-info/RECORD +20 -0
- starforge_kernel-0.1.0.dist-info/WHEEL +5 -0
- starforge_kernel-0.1.0.dist-info/top_level.txt +1 -0
starforge/core/runner.py
ADDED
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
"""Pipeline execution. Runs INSIDE the per-run worker process — the kernel
|
|
2
|
+
never imports user code (DESIGN.md §3, process-per-run).
|
|
3
|
+
|
|
4
|
+
Planning rules:
|
|
5
|
+
- stale nodes (no checkpoint for their history hash) execute;
|
|
6
|
+
- a fresh node is pulled back into execution if an executing descendant needs
|
|
7
|
+
one of its outputs and that output was ephemeral (never persisted) — this
|
|
8
|
+
cascades upward until every needed value is either loadable or recomputed;
|
|
9
|
+
- nodes with hashing problems, and every node downstream of a problem or a
|
|
10
|
+
failure, are blocked (independent branches keep running).
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from dataclasses import dataclass
|
|
16
|
+
import datetime as dt
|
|
17
|
+
import importlib
|
|
18
|
+
import sys
|
|
19
|
+
import time
|
|
20
|
+
import traceback
|
|
21
|
+
from typing import Any, Callable
|
|
22
|
+
|
|
23
|
+
import starforge
|
|
24
|
+
from starforge.core import figures as figmod
|
|
25
|
+
from starforge.core.checkpoints import CheckpointStore
|
|
26
|
+
from starforge.core.provenance import BUILTIN_PREFIX, toposort
|
|
27
|
+
from starforge.core.serializers import EphemeralValueError
|
|
28
|
+
from starforge.core.spec import PipelineDoc, Node
|
|
29
|
+
|
|
30
|
+
#: Minimum seconds between forwarded progress events per node — blocks may
|
|
31
|
+
#: call progress() in tight loops; the canvas needs ~12fps at most.
|
|
32
|
+
PROGRESS_THROTTLE_S = 0.08
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _progress_hook(node_id: str, emit: Emit) -> Callable[[Any, Any, Any], None]:
|
|
36
|
+
last = {"at": 0.0}
|
|
37
|
+
|
|
38
|
+
def hook(current: Any, total: Any, label: Any) -> None:
|
|
39
|
+
now = time.monotonic()
|
|
40
|
+
final = total is not None and current == total
|
|
41
|
+
if not final and now - last["at"] < PROGRESS_THROTTLE_S:
|
|
42
|
+
return
|
|
43
|
+
last["at"] = now
|
|
44
|
+
event: dict[str, Any] = {"event": "node_progress", "node": node_id}
|
|
45
|
+
if current is not None:
|
|
46
|
+
event["current"] = current
|
|
47
|
+
if total is not None:
|
|
48
|
+
event["total"] = total
|
|
49
|
+
if label is not None:
|
|
50
|
+
event["label"] = str(label)
|
|
51
|
+
if isinstance(current, (int, float)) and isinstance(total, (int, float)) and total:
|
|
52
|
+
event["percent"] = max(0.0, min(1.0, current / total))
|
|
53
|
+
emit(event)
|
|
54
|
+
|
|
55
|
+
return hook
|
|
56
|
+
|
|
57
|
+
Emit = Callable[[dict[str, Any]], None]
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@dataclass
|
|
61
|
+
class _NodePlanInfo:
|
|
62
|
+
history_hash: str | None
|
|
63
|
+
problems: list[str]
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _ancestor_cone(doc: PipelineDoc, target: str) -> set[str]:
|
|
67
|
+
"""The target plus everything upstream of it."""
|
|
68
|
+
cone = {target}
|
|
69
|
+
frontier = [target]
|
|
70
|
+
while frontier:
|
|
71
|
+
nid = frontier.pop()
|
|
72
|
+
for edge in doc.in_edges(nid):
|
|
73
|
+
if edge.source not in cone:
|
|
74
|
+
cone.add(edge.source)
|
|
75
|
+
frontier.append(edge.source)
|
|
76
|
+
return cone
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def plan_execution(
|
|
80
|
+
doc: PipelineDoc,
|
|
81
|
+
states: dict[str, dict[str, Any]],
|
|
82
|
+
store: CheckpointStore,
|
|
83
|
+
target: str | None = None,
|
|
84
|
+
) -> tuple[list[str], set[str], set[str], set[str]]:
|
|
85
|
+
"""Returns (topo_order, execute, reuse, blocked).
|
|
86
|
+
|
|
87
|
+
With ``target``, planning is scoped to the target's ancestor cone —
|
|
88
|
+
"run to here": only what's needed to produce that node executes."""
|
|
89
|
+
order, cyclic = toposort(doc)
|
|
90
|
+
if target is not None and any(n.id == target for n in doc.nodes):
|
|
91
|
+
cone = _ancestor_cone(doc, target)
|
|
92
|
+
order = [nid for nid in order if nid in cone]
|
|
93
|
+
blocked: set[str] = set(cyclic) & set(order)
|
|
94
|
+
execute: set[str] = set()
|
|
95
|
+
|
|
96
|
+
for nid in order:
|
|
97
|
+
state = states.get(nid, {})
|
|
98
|
+
if state.get("problems") or state.get("history_hash") is None:
|
|
99
|
+
blocked.add(nid)
|
|
100
|
+
elif state.get("stale", True):
|
|
101
|
+
execute.add(nid)
|
|
102
|
+
|
|
103
|
+
# Block everything downstream of a blocked node.
|
|
104
|
+
for nid in order:
|
|
105
|
+
if nid in blocked:
|
|
106
|
+
continue
|
|
107
|
+
if any(edge.source in blocked for edge in doc.in_edges(nid)):
|
|
108
|
+
blocked.add(nid)
|
|
109
|
+
execute.discard(nid)
|
|
110
|
+
|
|
111
|
+
# Ephemeral cascade: pull fresh parents back in when an executing child
|
|
112
|
+
# needs an output that was never persisted.
|
|
113
|
+
changed = True
|
|
114
|
+
while changed:
|
|
115
|
+
changed = False
|
|
116
|
+
for edge in doc.edges:
|
|
117
|
+
if edge.target not in execute or edge.source in execute or edge.source in blocked:
|
|
118
|
+
continue
|
|
119
|
+
parent_hash = states.get(edge.source, {}).get("history_hash")
|
|
120
|
+
if parent_hash is None:
|
|
121
|
+
continue
|
|
122
|
+
if store.is_ephemeral(parent_hash, edge.source_output):
|
|
123
|
+
execute.add(edge.source)
|
|
124
|
+
changed = True
|
|
125
|
+
|
|
126
|
+
reuse = {nid for nid in order if nid not in execute and nid not in blocked}
|
|
127
|
+
return order, execute, reuse, blocked
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def _run_builtin(node: Node) -> dict[str, Any]:
|
|
131
|
+
if node.block == "builtin:constant":
|
|
132
|
+
return {"output": node.params.get("value")}
|
|
133
|
+
raise ValueError(f"unknown builtin '{node.block}'")
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _import_function(module: str, qualname: str) -> Callable[..., Any]:
|
|
137
|
+
mod = importlib.import_module(module)
|
|
138
|
+
fn = getattr(mod, qualname, None)
|
|
139
|
+
if fn is None or not callable(fn):
|
|
140
|
+
raise AttributeError(f"function '{qualname}' not found in module '{module}'")
|
|
141
|
+
return fn
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _map_outputs(result: Any, output_names: list[str], block_label: str) -> dict[str, Any]:
|
|
145
|
+
if len(output_names) == 1:
|
|
146
|
+
return {output_names[0]: result}
|
|
147
|
+
if not isinstance(result, (tuple, list)) or len(result) != len(output_names):
|
|
148
|
+
raise TypeError(
|
|
149
|
+
f"'{block_label}' declares outputs {output_names} but returned "
|
|
150
|
+
f"{type(result).__name__} (expected a {len(output_names)}-tuple)"
|
|
151
|
+
)
|
|
152
|
+
return dict(zip(output_names, result))
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def run_pipeline(
|
|
156
|
+
doc: PipelineDoc,
|
|
157
|
+
blocks: dict[str, dict[str, Any]],
|
|
158
|
+
states: dict[str, dict[str, Any]],
|
|
159
|
+
store: CheckpointStore,
|
|
160
|
+
emit: Emit,
|
|
161
|
+
pickle_enabled: bool = False,
|
|
162
|
+
target: str | None = None,
|
|
163
|
+
) -> str:
|
|
164
|
+
"""Execute the plan; returns terminal status: 'completed' or 'failed'."""
|
|
165
|
+
store.ensure_layout()
|
|
166
|
+
order, execute, reuse, blocked = plan_execution(doc, states, store, target=target)
|
|
167
|
+
emit(
|
|
168
|
+
{
|
|
169
|
+
"event": "run_plan",
|
|
170
|
+
"execute": sorted(execute),
|
|
171
|
+
"reuse": sorted(reuse),
|
|
172
|
+
"blocked": sorted(blocked),
|
|
173
|
+
}
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
# In-memory values for this run: (node_id, output_name) -> value.
|
|
177
|
+
memory: dict[tuple[str, str], Any] = {}
|
|
178
|
+
failed: set[str] = set()
|
|
179
|
+
any_failed = False
|
|
180
|
+
|
|
181
|
+
for nid in order:
|
|
182
|
+
state = states.get(nid, {})
|
|
183
|
+
node_hash = state.get("history_hash")
|
|
184
|
+
|
|
185
|
+
if nid in blocked or any(e.source in failed or e.source in blocked for e in doc.in_edges(nid)):
|
|
186
|
+
blocked.add(nid)
|
|
187
|
+
emit({"event": "node_blocked", "node": nid, "problems": state.get("problems", [])})
|
|
188
|
+
continue
|
|
189
|
+
|
|
190
|
+
if nid in reuse:
|
|
191
|
+
store.touch(node_hash) # LRU signal for checkpoint GC
|
|
192
|
+
emit({"event": "node_skipped", "node": nid, "history_hash": node_hash})
|
|
193
|
+
continue
|
|
194
|
+
|
|
195
|
+
node = doc.node(nid)
|
|
196
|
+
emit({"event": "node_started", "node": nid, "block": node.block})
|
|
197
|
+
started = time.time()
|
|
198
|
+
try:
|
|
199
|
+
connected: dict[str, dict[str, str]] = {}
|
|
200
|
+
side_figures: list[Any] = []
|
|
201
|
+
if node.block.startswith(BUILTIN_PREFIX):
|
|
202
|
+
outputs = _run_builtin(node)
|
|
203
|
+
label = node.block.removeprefix(BUILTIN_PREFIX).title()
|
|
204
|
+
source_hash = node.block
|
|
205
|
+
else:
|
|
206
|
+
info = blocks[node.block]
|
|
207
|
+
kwargs: dict[str, Any] = {}
|
|
208
|
+
for edge in doc.in_edges(nid):
|
|
209
|
+
key = (edge.source, edge.source_output)
|
|
210
|
+
if key not in memory:
|
|
211
|
+
parent_hash = states[edge.source]["history_hash"]
|
|
212
|
+
try:
|
|
213
|
+
memory[key] = store.load_output(parent_hash, edge.source_output)
|
|
214
|
+
except EphemeralValueError:
|
|
215
|
+
# plan_execution should have prevented this; surface loudly.
|
|
216
|
+
raise RuntimeError(
|
|
217
|
+
f"input '{edge.target_param}' of node '{nid}' is ephemeral and its "
|
|
218
|
+
f"producer was not scheduled — planner bug, please report"
|
|
219
|
+
)
|
|
220
|
+
kwargs[edge.target_param] = memory[key]
|
|
221
|
+
connected[edge.target_param] = {
|
|
222
|
+
"node": edge.source,
|
|
223
|
+
"output": edge.source_output,
|
|
224
|
+
"history_hash": states[edge.source]["history_hash"],
|
|
225
|
+
}
|
|
226
|
+
for name, value in node.params.items():
|
|
227
|
+
if name not in kwargs:
|
|
228
|
+
kwargs[name] = value
|
|
229
|
+
# `T | None` params with no signature default are optional by
|
|
230
|
+
# *Forge convention: inject None when nothing else fills them.
|
|
231
|
+
for name in info.get("optional_params", ()):
|
|
232
|
+
kwargs.setdefault(name, None)
|
|
233
|
+
|
|
234
|
+
# capture() wraps the import too: module-level figure code
|
|
235
|
+
# and figures created/shown inside the call are both swept.
|
|
236
|
+
starforge._progress_hook = _progress_hook(nid, emit)
|
|
237
|
+
try:
|
|
238
|
+
with figmod.capture() as captured:
|
|
239
|
+
fn = _import_function(info["module"], info["qualname"])
|
|
240
|
+
result = fn(**kwargs)
|
|
241
|
+
finally:
|
|
242
|
+
starforge._progress_hook = None
|
|
243
|
+
outputs = _map_outputs(result, list(info["outputs"]), info.get("label", node.block))
|
|
244
|
+
side_figures = [
|
|
245
|
+
fig for fig in captured.all_objects()
|
|
246
|
+
if all(fig is not value for value in outputs.values())
|
|
247
|
+
]
|
|
248
|
+
label = info.get("label", node.block)
|
|
249
|
+
source_hash = info.get("source_hash")
|
|
250
|
+
|
|
251
|
+
duration = time.time() - started
|
|
252
|
+
manifest = store.write(
|
|
253
|
+
node_hash,
|
|
254
|
+
{
|
|
255
|
+
"block_id": node.block,
|
|
256
|
+
"label": label,
|
|
257
|
+
"source_hash": source_hash,
|
|
258
|
+
"params": {k: v for k, v in node.params.items() if k not in connected},
|
|
259
|
+
"inputs": connected,
|
|
260
|
+
"duration_seconds": round(duration, 6),
|
|
261
|
+
"finished_at": dt.datetime.now(dt.timezone.utc).isoformat(),
|
|
262
|
+
},
|
|
263
|
+
outputs,
|
|
264
|
+
pickle_enabled=pickle_enabled,
|
|
265
|
+
side_figures=side_figures,
|
|
266
|
+
)
|
|
267
|
+
figmod.close_figures(side_figures)
|
|
268
|
+
for name, value in outputs.items():
|
|
269
|
+
memory[(nid, name)] = value
|
|
270
|
+
emit(
|
|
271
|
+
{
|
|
272
|
+
"event": "node_completed",
|
|
273
|
+
"node": nid,
|
|
274
|
+
"history_hash": node_hash,
|
|
275
|
+
"duration_seconds": round(duration, 6),
|
|
276
|
+
"outputs": manifest,
|
|
277
|
+
}
|
|
278
|
+
)
|
|
279
|
+
except Exception:
|
|
280
|
+
any_failed = True
|
|
281
|
+
failed.add(nid)
|
|
282
|
+
emit(
|
|
283
|
+
{
|
|
284
|
+
"event": "node_failed",
|
|
285
|
+
"node": nid,
|
|
286
|
+
"duration_seconds": round(time.time() - started, 6),
|
|
287
|
+
"traceback": traceback.format_exc(),
|
|
288
|
+
}
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
status = "failed" if any_failed else "completed"
|
|
292
|
+
emit({"event": "run_finished", "status": status})
|
|
293
|
+
return status
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
"""Typed serializer registry for checkpoint outputs (DESIGN.md §8).
|
|
2
|
+
|
|
3
|
+
Probed in order per value: parquet (DataFrame/Series) → npy (ndarray) → json
|
|
4
|
+
(plain data) → pickle (opt-in, default OFF) → ephemeral.
|
|
5
|
+
|
|
6
|
+
Ephemeral values flow normally to downstream nodes within the same run but
|
|
7
|
+
are not persisted, so future runs recompute them on demand. That keeps the
|
|
8
|
+
cost of unserializable types localized instead of poisoning the store.
|
|
9
|
+
|
|
10
|
+
Import discipline: this module never imports pandas/numpy itself. If a value
|
|
11
|
+
IS a DataFrame, pandas is by definition already imported in this process —
|
|
12
|
+
we detect by the type's module name first, then import for free.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import json
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
import pickle
|
|
20
|
+
from typing import Any
|
|
21
|
+
|
|
22
|
+
EPHEMERAL = "ephemeral"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class EphemeralValueError(RuntimeError):
|
|
26
|
+
"""Raised when loading an output that was never persisted."""
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _root_type_module(value: Any) -> str:
|
|
30
|
+
return type(value).__module__.split(".")[0]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _meta(value: Any) -> dict[str, Any]:
|
|
34
|
+
meta: dict[str, Any] = {"type": type(value).__name__}
|
|
35
|
+
shape = getattr(value, "shape", None)
|
|
36
|
+
if isinstance(shape, tuple) and all(isinstance(d, int) for d in shape):
|
|
37
|
+
meta["shape"] = list(shape)
|
|
38
|
+
elif isinstance(value, (list, dict, str)):
|
|
39
|
+
meta["len"] = len(value)
|
|
40
|
+
return meta
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def save_value(
|
|
44
|
+
value: Any,
|
|
45
|
+
outputs_dir: Path,
|
|
46
|
+
name: str,
|
|
47
|
+
pickle_enabled: bool = False,
|
|
48
|
+
) -> dict[str, Any]:
|
|
49
|
+
"""Persist one output value; returns its manifest entry."""
|
|
50
|
+
entry: dict[str, Any] = {"name": name, "meta": _meta(value)}
|
|
51
|
+
outputs_dir.mkdir(parents=True, exist_ok=True)
|
|
52
|
+
|
|
53
|
+
if _root_type_module(value) == "pandas":
|
|
54
|
+
import pandas as pd
|
|
55
|
+
|
|
56
|
+
frame = value
|
|
57
|
+
if isinstance(value, pd.Series):
|
|
58
|
+
frame = value.to_frame(name=value.name if value.name is not None else "value")
|
|
59
|
+
entry["meta"]["series"] = True
|
|
60
|
+
if isinstance(frame, pd.DataFrame):
|
|
61
|
+
try:
|
|
62
|
+
filename = f"{name}.parquet"
|
|
63
|
+
frame.to_parquet(outputs_dir / filename)
|
|
64
|
+
entry.update(serializer="parquet", file=filename)
|
|
65
|
+
return entry
|
|
66
|
+
except (ImportError, ValueError, OSError):
|
|
67
|
+
pass # no pyarrow, or unserializable dtypes — fall through
|
|
68
|
+
|
|
69
|
+
if _root_type_module(value) == "numpy":
|
|
70
|
+
import numpy as np
|
|
71
|
+
|
|
72
|
+
if isinstance(value, np.ndarray):
|
|
73
|
+
try:
|
|
74
|
+
filename = f"{name}.npy"
|
|
75
|
+
np.save(outputs_dir / filename, value, allow_pickle=False)
|
|
76
|
+
entry.update(serializer="npy", file=filename)
|
|
77
|
+
return entry
|
|
78
|
+
except (ValueError, OSError):
|
|
79
|
+
pass
|
|
80
|
+
|
|
81
|
+
try:
|
|
82
|
+
text = json.dumps(value)
|
|
83
|
+
filename = f"{name}.json"
|
|
84
|
+
(outputs_dir / filename).write_text(text, encoding="utf-8")
|
|
85
|
+
entry.update(serializer="json", file=filename)
|
|
86
|
+
return entry
|
|
87
|
+
except (TypeError, ValueError, OSError):
|
|
88
|
+
pass
|
|
89
|
+
|
|
90
|
+
# Figures can't round-trip from their rendered form, so the VALUE is
|
|
91
|
+
# ephemeral (flows within a run; downstream recomputes later via the
|
|
92
|
+
# cascade) — but it leaves a rendered artifact behind for display.
|
|
93
|
+
from starforge.core import figures
|
|
94
|
+
|
|
95
|
+
try:
|
|
96
|
+
artifact = figures.render_figure(value, outputs_dir, name)
|
|
97
|
+
except Exception:
|
|
98
|
+
artifact = None
|
|
99
|
+
if artifact is not None:
|
|
100
|
+
entry.update(serializer=EPHEMERAL, file=None, artifact=artifact)
|
|
101
|
+
return entry
|
|
102
|
+
|
|
103
|
+
if pickle_enabled:
|
|
104
|
+
try:
|
|
105
|
+
filename = f"{name}.pkl"
|
|
106
|
+
with (outputs_dir / filename).open("wb") as handle:
|
|
107
|
+
pickle.dump(value, handle, protocol=pickle.HIGHEST_PROTOCOL)
|
|
108
|
+
entry.update(serializer="pickle", file=filename)
|
|
109
|
+
return entry
|
|
110
|
+
except Exception:
|
|
111
|
+
pass
|
|
112
|
+
|
|
113
|
+
entry.update(serializer=EPHEMERAL, file=None)
|
|
114
|
+
return entry
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def load_value(outputs_dir: Path, entry: dict[str, Any]) -> Any:
|
|
118
|
+
serializer = entry.get("serializer")
|
|
119
|
+
name = entry.get("name")
|
|
120
|
+
if serializer == EPHEMERAL:
|
|
121
|
+
raise EphemeralValueError(
|
|
122
|
+
f"output '{name}' was not persisted (ephemeral); the producing node must re-run"
|
|
123
|
+
)
|
|
124
|
+
path = outputs_dir / entry["file"]
|
|
125
|
+
if serializer == "parquet":
|
|
126
|
+
import pandas as pd
|
|
127
|
+
|
|
128
|
+
frame = pd.read_parquet(path)
|
|
129
|
+
if entry.get("meta", {}).get("series"):
|
|
130
|
+
return frame.iloc[:, 0]
|
|
131
|
+
return frame
|
|
132
|
+
if serializer == "npy":
|
|
133
|
+
import numpy as np
|
|
134
|
+
|
|
135
|
+
return np.load(path, allow_pickle=False)
|
|
136
|
+
if serializer == "json":
|
|
137
|
+
return json.loads(path.read_text(encoding="utf-8"))
|
|
138
|
+
if serializer == "pickle":
|
|
139
|
+
with path.open("rb") as handle:
|
|
140
|
+
return pickle.load(handle)
|
|
141
|
+
raise ValueError(f"unknown serializer {serializer!r} for output '{name}'")
|
starforge/core/spec.py
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
"""The ``.forge`` pipeline document model.
|
|
2
|
+
|
|
3
|
+
Documents are plain JSON text — the VS Code custom editor is text-based so
|
|
4
|
+
undo/redo/diff/merge come for free. Layout and notes are durable metadata and
|
|
5
|
+
never participate in provenance hashing.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
import json
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
SCHEMA = "starforge/1"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class Node:
|
|
20
|
+
id: str
|
|
21
|
+
block: str # "dotted.module:qualname"
|
|
22
|
+
params: dict[str, Any] = field(default_factory=dict) # literals for unconnected params
|
|
23
|
+
position: dict[str, float] = field(default_factory=lambda: {"x": 0.0, "y": 0.0})
|
|
24
|
+
notes: str = ""
|
|
25
|
+
|
|
26
|
+
def to_dict(self) -> dict[str, Any]:
|
|
27
|
+
return {
|
|
28
|
+
"id": self.id,
|
|
29
|
+
"block": self.block,
|
|
30
|
+
"params": self.params,
|
|
31
|
+
"position": self.position,
|
|
32
|
+
"notes": self.notes,
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass
|
|
37
|
+
class Edge:
|
|
38
|
+
id: str
|
|
39
|
+
source: str
|
|
40
|
+
target: str
|
|
41
|
+
target_param: str # parameter NAME on the target function — never a slot index
|
|
42
|
+
source_output: str = "output"
|
|
43
|
+
|
|
44
|
+
def to_dict(self) -> dict[str, Any]:
|
|
45
|
+
return {
|
|
46
|
+
"id": self.id,
|
|
47
|
+
"source": self.source,
|
|
48
|
+
"source_output": self.source_output,
|
|
49
|
+
"target": self.target,
|
|
50
|
+
"target_param": self.target_param,
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@dataclass
|
|
55
|
+
class PipelineDoc:
|
|
56
|
+
name: str = "Untitled"
|
|
57
|
+
nodes: list[Node] = field(default_factory=list)
|
|
58
|
+
edges: list[Edge] = field(default_factory=list)
|
|
59
|
+
#: Canvas annotation boxes: [{id, title, description, position, width,
|
|
60
|
+
#: height, color}]. Pure layout metadata — round-trips untouched, never
|
|
61
|
+
#: participates in hashing or execution.
|
|
62
|
+
comments: list[dict[str, Any]] = field(default_factory=list)
|
|
63
|
+
|
|
64
|
+
def node(self, node_id: str) -> Node:
|
|
65
|
+
for node in self.nodes:
|
|
66
|
+
if node.id == node_id:
|
|
67
|
+
return node
|
|
68
|
+
raise KeyError(node_id)
|
|
69
|
+
|
|
70
|
+
def in_edges(self, node_id: str) -> list[Edge]:
|
|
71
|
+
return [e for e in self.edges if e.target == node_id]
|
|
72
|
+
|
|
73
|
+
def to_dict(self) -> dict[str, Any]:
|
|
74
|
+
return {
|
|
75
|
+
"schema": SCHEMA,
|
|
76
|
+
"name": self.name,
|
|
77
|
+
"nodes": [n.to_dict() for n in self.nodes],
|
|
78
|
+
"edges": [e.to_dict() for e in self.edges],
|
|
79
|
+
"comments": self.comments,
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
def to_json(self) -> str:
|
|
83
|
+
return json.dumps(self.to_dict(), indent=2)
|
|
84
|
+
|
|
85
|
+
@classmethod
|
|
86
|
+
def from_dict(cls, d: dict[str, Any]) -> "PipelineDoc":
|
|
87
|
+
schema = d.get("schema", SCHEMA)
|
|
88
|
+
if schema != SCHEMA:
|
|
89
|
+
raise ValueError(f"unsupported .forge schema {schema!r} (expected {SCHEMA!r})")
|
|
90
|
+
nodes = [
|
|
91
|
+
Node(
|
|
92
|
+
id=n["id"],
|
|
93
|
+
block=n["block"],
|
|
94
|
+
params=dict(n.get("params", {})),
|
|
95
|
+
position=dict(n.get("position", {"x": 0.0, "y": 0.0})),
|
|
96
|
+
notes=n.get("notes", ""),
|
|
97
|
+
)
|
|
98
|
+
for n in d.get("nodes", [])
|
|
99
|
+
]
|
|
100
|
+
edges = [
|
|
101
|
+
Edge(
|
|
102
|
+
id=e["id"],
|
|
103
|
+
source=e["source"],
|
|
104
|
+
source_output=e.get("source_output", "output"),
|
|
105
|
+
target=e["target"],
|
|
106
|
+
target_param=e["target_param"],
|
|
107
|
+
)
|
|
108
|
+
for e in d.get("edges", [])
|
|
109
|
+
]
|
|
110
|
+
return cls(
|
|
111
|
+
name=d.get("name", "Untitled"),
|
|
112
|
+
nodes=nodes,
|
|
113
|
+
edges=edges,
|
|
114
|
+
comments=list(d.get("comments", [])),
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
@classmethod
|
|
118
|
+
def from_json(cls, text: str) -> "PipelineDoc":
|
|
119
|
+
return cls.from_dict(json.loads(text))
|
|
120
|
+
|
|
121
|
+
@classmethod
|
|
122
|
+
def load(cls, path: str | Path) -> "PipelineDoc":
|
|
123
|
+
return cls.from_json(Path(path).read_text(encoding="utf-8"))
|
|
124
|
+
|
|
125
|
+
def save(self, path: str | Path) -> None:
|
|
126
|
+
Path(path).write_text(self.to_json() + "\n", encoding="utf-8")
|