starforge-kernel 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
starforge/__init__.py ADDED
@@ -0,0 +1,86 @@
1
+ """*Forge — pipeline canvas for the repo you already have open.
2
+
3
+ This top-level module is the entire public surface that user code touches.
4
+ It must import in microseconds and depend on nothing: the decorator lives in
5
+ production codebases and has to be free. Everything heavy (indexer, engine,
6
+ kernel) lives in submodules that only *Forge itself* imports.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ __version__ = "0.1.0"
12
+
13
+ __all__ = ["block", "progress", "BLOCK_ATTR"]
14
+
15
+ #: Attribute set on decorated functions. The AST indexer matches the decorator
16
+ #: syntactically and never imports user code; this runtime tag exists so user
17
+ #: code and future runtime introspection can also recognize blocks.
18
+ BLOCK_ATTR = "__starforge_block__"
19
+
20
+
21
+ def block(fn=None, *, label=None, category=None, outputs=None):
22
+ """Register a function as a *Forge block.
23
+
24
+ Usable bare or with keyword arguments::
25
+
26
+ @block
27
+ def clean(raw: pd.DataFrame) -> pd.DataFrame: ...
28
+
29
+ @block(label="Clean AUC Matrix", category="QC", outputs=("clean", "stats"))
30
+ def clean_auc(raw, min_coverage: float = 0.8): ...
31
+
32
+ The decorated function is returned unchanged — behavior under pytest, in
33
+ CI, or in production is identical whether or not *Forge is anywhere near.
34
+
35
+ Args:
36
+ label: Palette display name. Defaults to the function name, title-cased.
37
+ category: Palette grouping. Defaults to the defining module's path.
38
+ outputs: Names for multiple return values (function must return a tuple
39
+ of the same length). Defaults to a single output named "output".
40
+
41
+ Note for palette metadata: the indexer reads ``label``/``category``/
42
+ ``outputs`` from the *source*, so they must be literals at the decoration
43
+ site to appear in the palette.
44
+ """
45
+
46
+ def apply(f):
47
+ setattr(
48
+ f,
49
+ BLOCK_ATTR,
50
+ {
51
+ "label": label,
52
+ "category": category,
53
+ "outputs": tuple(outputs) if outputs is not None else None,
54
+ },
55
+ )
56
+ return f
57
+
58
+ if fn is not None:
59
+ return apply(fn)
60
+ return apply
61
+
62
+
63
+ #: Installed by the *Forge run worker around each block call; None everywhere
64
+ #: else, which keeps progress() a guaranteed no-op in pytest/CI/production.
65
+ _progress_hook = None
66
+
67
+
68
+ def progress(current=None, total=None, label=None):
69
+ """Report block progress to the *Forge canvas.
70
+
71
+ Call freely inside a block::
72
+
73
+ for i, chunk in enumerate(chunks):
74
+ progress(i + 1, len(chunks), "fitting folds")
75
+ ...
76
+
77
+ Outside a *Forge run this does nothing and costs one attribute read —
78
+ safe to leave in production code. Any combination of arguments works:
79
+ (current, total) renders a determinate bar, label alone updates the text.
80
+ """
81
+ hook = _progress_hook
82
+ if hook is not None:
83
+ try:
84
+ hook(current, total, label)
85
+ except Exception:
86
+ pass # progress must never break user code
File without changes
@@ -0,0 +1,178 @@
1
+ """Checkpoint store under ``<workspace>/.forge/checkpoints/``.
2
+
3
+ One directory per history hash (truncated to 32 hex chars — 128 bits — to
4
+ stay friendly to Windows path limits):
5
+
6
+ .forge/checkpoints/<hash32>/
7
+ ├── provenance.json # written LAST: its presence marks completeness
8
+ └── outputs/<name>.<ext per serializer>
9
+
10
+ The store also owns ``.forge/.gitignore`` so checkpoints and caches never
11
+ land in the user's repo history while ``pipelines/`` remains committable.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import json
17
+ import os
18
+ from pathlib import Path
19
+ import shutil
20
+ import time
21
+ from typing import Any
22
+
23
+ from starforge.core import figures as figmod
24
+ from starforge.core import previews, serializers
25
+
26
+ FORGE_DIR = ".forge"
27
+ GITIGNORE_BODY = "checkpoints/\ncache/\n"
28
+
29
+
30
+ class CheckpointStore:
31
+ def __init__(self, workspace: str | Path) -> None:
32
+ self.workspace = Path(workspace)
33
+ self.forge_dir = self.workspace / FORGE_DIR
34
+ self.base = self.forge_dir / "checkpoints"
35
+
36
+ def ensure_layout(self) -> None:
37
+ (self.forge_dir / "pipelines").mkdir(parents=True, exist_ok=True)
38
+ (self.forge_dir / "cache").mkdir(parents=True, exist_ok=True)
39
+ self.base.mkdir(parents=True, exist_ok=True)
40
+ gitignore = self.forge_dir / ".gitignore"
41
+ if not gitignore.exists():
42
+ gitignore.write_text(GITIGNORE_BODY, encoding="utf-8")
43
+
44
+ def dir_for(self, history_hash: str) -> Path:
45
+ return self.base / history_hash[:32]
46
+
47
+ def exists(self, history_hash: str) -> bool:
48
+ return (self.dir_for(history_hash) / "provenance.json").is_file()
49
+
50
+ def read_provenance(self, history_hash: str) -> dict[str, Any]:
51
+ path = self.dir_for(history_hash) / "provenance.json"
52
+ return json.loads(path.read_text(encoding="utf-8"))
53
+
54
+ def write(
55
+ self,
56
+ history_hash: str,
57
+ provenance: dict[str, Any],
58
+ outputs: dict[str, Any],
59
+ pickle_enabled: bool = False,
60
+ side_figures: list[Any] | None = None,
61
+ ) -> list[dict[str, Any]]:
62
+ """Persist outputs then provenance (in that order, for atomicity).
63
+ Returns the output manifest, including ephemeral entries.
64
+
65
+ ``side_figures`` are figures the block created or showed without
66
+ returning them (plt.show() and friends); they render to artifacts
67
+ recorded under the provenance ``figures`` key."""
68
+ directory = self.dir_for(history_hash)
69
+ outputs_dir = directory / "outputs"
70
+ directory.mkdir(parents=True, exist_ok=True)
71
+ manifest = []
72
+ for name, value in outputs.items():
73
+ entry = serializers.save_value(value, outputs_dir, name, pickle_enabled=pickle_enabled)
74
+ try:
75
+ # Previews ride inside provenance.json so the stdlib-only
76
+ # kernel can serve them without deserializing data. Computed
77
+ # for ephemeral outputs too — their only window is right now.
78
+ if entry.get("artifact"):
79
+ entry["preview"] = {
80
+ "kind": "figure",
81
+ "file": entry["artifact"]["file"],
82
+ "format": entry["artifact"]["kind"],
83
+ }
84
+ else:
85
+ entry["preview"] = previews.build_preview(value)
86
+ except Exception:
87
+ entry["preview"] = {"kind": "text", "text": f"<preview failed for {type(value).__name__}>"}
88
+ manifest.append(entry)
89
+
90
+ rendered_figures: list[dict[str, Any]] = []
91
+ for i, fig in enumerate(side_figures or []):
92
+ try:
93
+ artifact = figmod.render_figure(fig, outputs_dir, f"figure_{i}")
94
+ except Exception:
95
+ artifact = None
96
+ if artifact is not None:
97
+ rendered_figures.append(artifact)
98
+
99
+ record = dict(provenance)
100
+ record["history_hash"] = history_hash
101
+ record["outputs"] = manifest
102
+ record["figures"] = rendered_figures
103
+ record["dir"] = directory.relative_to(self.workspace).as_posix()
104
+ path = directory / "provenance.json"
105
+ tmp = directory / "provenance.json.tmp"
106
+ tmp.write_text(json.dumps(record, indent=2, default=repr), encoding="utf-8")
107
+ tmp.replace(path)
108
+ return manifest
109
+
110
+ def output_entry(self, history_hash: str, name: str) -> dict[str, Any]:
111
+ for entry in self.read_provenance(history_hash).get("outputs", []):
112
+ if entry.get("name") == name:
113
+ return entry
114
+ raise KeyError(f"checkpoint {history_hash[:12]} has no output named '{name}'")
115
+
116
+ def load_output(self, history_hash: str, name: str) -> Any:
117
+ """Raises serializers.EphemeralValueError for non-persisted outputs."""
118
+ entry = self.output_entry(history_hash, name)
119
+ return serializers.load_value(self.dir_for(history_hash) / "outputs", entry)
120
+
121
+ def is_ephemeral(self, history_hash: str, name: str) -> bool:
122
+ try:
123
+ entry = self.output_entry(history_hash, name)
124
+ except (KeyError, FileNotFoundError, json.JSONDecodeError):
125
+ return True
126
+ return entry.get("serializer") == serializers.EPHEMERAL
127
+
128
+ def touch(self, history_hash: str) -> None:
129
+ """Bump the checkpoint dir's mtime so LRU GC sees reuse as recency."""
130
+ try:
131
+ os.utime(self.dir_for(history_hash))
132
+ except OSError:
133
+ pass
134
+
135
+ def gc(self, max_bytes: int) -> dict[str, int]:
136
+ """Least-recently-used eviction down to ``max_bytes`` total.
137
+
138
+ Deleting a live checkpoint is always safe — the node just reads as
139
+ stale and recomputes — so a plain LRU needs no liveness analysis.
140
+ Returns {"freed_bytes", "deleted", "remaining_bytes"}.
141
+ """
142
+ entries: list[tuple[float, int, Path]] = []
143
+ total = 0
144
+ if self.base.is_dir():
145
+ for directory in self.base.iterdir():
146
+ if not directory.is_dir():
147
+ continue
148
+ size = sum(f.stat().st_size for f in directory.rglob("*") if f.is_file())
149
+ try:
150
+ mtime = directory.stat().st_mtime
151
+ except OSError:
152
+ continue
153
+ entries.append((mtime, size, directory))
154
+ total += size
155
+
156
+ freed = 0
157
+ deleted = 0
158
+ entries.sort() # oldest first
159
+ for _mtime, size, directory in entries:
160
+ if total - freed <= max_bytes:
161
+ break
162
+ shutil.rmtree(directory, ignore_errors=True)
163
+ freed += size
164
+ deleted += 1
165
+ return {"freed_bytes": freed, "deleted": deleted, "remaining_bytes": total - freed}
166
+
167
+ def clean_run_specs(self, max_age_seconds: float = 86400.0) -> None:
168
+ """Run-spec files are one-shot worker inputs; sweep the stale ones."""
169
+ runs_dir = self.forge_dir / "cache" / "runs"
170
+ if not runs_dir.is_dir():
171
+ return
172
+ cutoff = time.time() - max_age_seconds
173
+ for spec in runs_dir.glob("*.json"):
174
+ try:
175
+ if spec.stat().st_mtime < cutoff:
176
+ spec.unlink()
177
+ except OSError:
178
+ continue
@@ -0,0 +1,119 @@
1
+ """Figure capture and artifact rendering.
2
+
3
+ The notebook muscle memory is ``plt.plot(...); plt.show()`` — or no show()
4
+ at all. The worker honors it with zero code changes: matplotlib runs on the
5
+ Agg backend, and :func:`capture` sweeps every figure that exists after the
6
+ block call that didn't exist before (``plt.show`` is a no-op under Agg, so
7
+ "shown" figures are still open when we sweep). Plotly's ``fig.show()`` is
8
+ intercepted by patching ``plotly.io.show`` while the block runs.
9
+
10
+ Captured and returned figures render to checkpoint artifacts — matplotlib →
11
+ PNG, plotly → self-contained HTML — and are closed afterward so a long run
12
+ never accumulates canvases.
13
+
14
+ Import discipline: stdlib-only at import time; matplotlib/plotly are only
15
+ touched when the user's process already loaded them.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ from contextlib import contextmanager
21
+ from dataclasses import dataclass, field
22
+ from pathlib import Path
23
+ import sys
24
+ from typing import Any, Iterator
25
+
26
+
27
+ def _pyplot() -> Any | None:
28
+ return sys.modules.get("matplotlib.pyplot")
29
+
30
+
31
+ def _root_module(value: Any) -> str:
32
+ return type(value).__module__.split(".")[0]
33
+
34
+
35
+ @dataclass
36
+ class CapturedFigures:
37
+ matplotlib: list[Any] = field(default_factory=list)
38
+ plotly: list[Any] = field(default_factory=list)
39
+
40
+ def all_objects(self) -> list[Any]:
41
+ return [*self.matplotlib, *self.plotly]
42
+
43
+
44
+ @contextmanager
45
+ def capture() -> Iterator[CapturedFigures]:
46
+ """Collect figures created (matplotlib) or shown (plotly) inside the
47
+ block call. The matplotlib sweep also catches figures created during the
48
+ block module's first import, since the import happens inside this
49
+ context in the runner."""
50
+ captured = CapturedFigures()
51
+
52
+ plt = _pyplot()
53
+ before: set[int] = set(plt.get_fignums()) if plt is not None else set()
54
+
55
+ pio = sys.modules.get("plotly.io")
56
+ original_show = getattr(pio, "show", None) if pio is not None else None
57
+ if pio is not None and original_show is not None:
58
+
59
+ def _grab(fig: Any, *args: Any, **kwargs: Any) -> None:
60
+ captured.plotly.append(fig)
61
+
62
+ pio.show = _grab
63
+
64
+ try:
65
+ yield captured
66
+ finally:
67
+ if pio is not None and original_show is not None:
68
+ pio.show = original_show
69
+ plt = _pyplot() # may have been imported during the call
70
+ if plt is not None:
71
+ for num in plt.get_fignums():
72
+ if num not in before:
73
+ captured.matplotlib.append(plt.figure(num))
74
+
75
+
76
+ def as_figure(value: Any) -> Any | None:
77
+ """Return a renderable figure for ``value``, or None.
78
+
79
+ Accepts matplotlib Figures, matplotlib Axes (``sns.heatmap`` et al.
80
+ return Axes — we render their parent figure), and plotly figures.
81
+ """
82
+ root = _root_module(value)
83
+ if root == "matplotlib":
84
+ if hasattr(value, "savefig"):
85
+ return value
86
+ parent = getattr(value, "figure", None) # Axes and friends
87
+ if parent is not None and hasattr(parent, "savefig"):
88
+ return parent
89
+ if root == "plotly" and hasattr(value, "write_html"):
90
+ return value
91
+ return None
92
+
93
+
94
+ def render_figure(value: Any, directory: Path, basename: str) -> dict[str, Any] | None:
95
+ """Render to ``directory/basename.(png|html)``; returns the artifact
96
+ entry ``{"file", "kind"}`` or None if ``value`` is not a figure."""
97
+ fig = as_figure(value)
98
+ if fig is None:
99
+ return None
100
+ directory.mkdir(parents=True, exist_ok=True)
101
+ if _root_module(fig) == "matplotlib":
102
+ filename = f"{basename}.png"
103
+ fig.savefig(directory / filename, dpi=110, bbox_inches="tight", facecolor=fig.get_facecolor())
104
+ return {"file": filename, "kind": "image"}
105
+ filename = f"{basename}.html"
106
+ fig.write_html(directory / filename, include_plotlyjs=True, full_html=True)
107
+ return {"file": filename, "kind": "html"}
108
+
109
+
110
+ def close_figures(figures: list[Any]) -> None:
111
+ plt = _pyplot()
112
+ if plt is None:
113
+ return
114
+ for fig in figures:
115
+ if _root_module(fig) == "matplotlib":
116
+ try:
117
+ plt.close(fig)
118
+ except Exception:
119
+ pass
@@ -0,0 +1,109 @@
1
+ """Cropped, JSON-safe output previews, computed at checkpoint-write time.
2
+
3
+ Previews are precomputed artifacts stored inside ``provenance.json`` — the
4
+ kernel serves them by reading a file, never by deserializing data (it stays
5
+ stdlib-only and instant). Because they're built while the value is in the
6
+ worker's hands, even EPHEMERAL outputs get a preview of their last run.
7
+
8
+ Everything emitted here must survive strict JSON.parse on the TypeScript
9
+ side: NaN/Infinity are stringified, containers are size-capped, and unknown
10
+ objects fall back to repr.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import json
16
+ from typing import Any
17
+
18
+ MAX_ROWS = 8
19
+ MAX_COLS = 10
20
+ MAX_ITEMS = 50
21
+ MAX_DEPTH = 5
22
+ MAX_CELL_CHARS = 120
23
+ MAX_TEXT_CHARS = 600
24
+ MAX_VALUE_CHARS = 2000
25
+
26
+
27
+ def _cell(value: Any) -> Any:
28
+ """One scalar table/array cell, strict-JSON safe."""
29
+ if isinstance(value, bool) or value is None:
30
+ return value
31
+ if isinstance(value, int):
32
+ return value
33
+ if isinstance(value, float):
34
+ return value if value == value and abs(value) != float("inf") else str(value)
35
+ text = value if isinstance(value, str) else repr(value)
36
+ return text[:MAX_CELL_CHARS] + ("…" if len(text) > MAX_CELL_CHARS else "")
37
+
38
+
39
+ def _sanitize(value: Any, depth: int = 0) -> Any:
40
+ if depth >= MAX_DEPTH:
41
+ return _cell(value)
42
+ if isinstance(value, dict):
43
+ items = list(value.items())[:MAX_ITEMS]
44
+ out = {str(k)[:MAX_CELL_CHARS]: _sanitize(v, depth + 1) for k, v in items}
45
+ if len(value) > MAX_ITEMS:
46
+ out["…"] = f"+{len(value) - MAX_ITEMS} more"
47
+ return out
48
+ if isinstance(value, (list, tuple)):
49
+ out = [_sanitize(v, depth + 1) for v in value[:MAX_ITEMS]]
50
+ if len(value) > MAX_ITEMS:
51
+ out.append(f"… +{len(value) - MAX_ITEMS} more")
52
+ return out
53
+ return _cell(value)
54
+
55
+
56
+ def _root_type_module(value: Any) -> str:
57
+ return type(value).__module__.split(".")[0]
58
+
59
+
60
+ def build_preview(value: Any) -> dict[str, Any]:
61
+ if _root_type_module(value) == "pandas":
62
+ import pandas as pd
63
+
64
+ frame = value.to_frame() if isinstance(value, pd.Series) else value
65
+ if isinstance(frame, pd.DataFrame):
66
+ columns = [str(c) for c in frame.columns[:MAX_COLS]]
67
+ head = frame.iloc[:MAX_ROWS, :MAX_COLS]
68
+ return {
69
+ "kind": "table",
70
+ "shape": [int(frame.shape[0]), int(frame.shape[1])],
71
+ "columns": columns,
72
+ "columns_truncated": frame.shape[1] > MAX_COLS,
73
+ "index": [_cell(i) for i in head.index.tolist()],
74
+ "rows": [[_cell(v) for v in row] for row in head.itertuples(index=False, name=None)],
75
+ }
76
+
77
+ if _root_type_module(value) == "numpy":
78
+ import numpy as np
79
+
80
+ if isinstance(value, np.ndarray):
81
+ corner = value
82
+ if corner.ndim == 0:
83
+ corner_list: Any = _cell(corner.item())
84
+ else:
85
+ slicer = tuple(slice(0, MAX_ROWS) for _ in range(corner.ndim))
86
+ corner_list = _sanitize(corner[slicer].tolist())
87
+ return {
88
+ "kind": "array",
89
+ "dtype": str(value.dtype),
90
+ "shape": list(value.shape),
91
+ "corner": corner_list,
92
+ }
93
+ if isinstance(value, np.generic):
94
+ return {"kind": "value", "value": _cell(value.item())}
95
+
96
+ if isinstance(value, (dict, list, tuple, str, int, float, bool)) or value is None:
97
+ sanitized = _sanitize(value)
98
+ try:
99
+ encoded = json.dumps(sanitized, allow_nan=False)
100
+ except (TypeError, ValueError):
101
+ encoded = None
102
+ if encoded is not None:
103
+ if len(encoded) > MAX_VALUE_CHARS:
104
+ return {"kind": "text", "text": encoded[:MAX_VALUE_CHARS] + "…"}
105
+ return {"kind": "value", "value": sanitized}
106
+
107
+ # Arbitrary objects: an honest repr, marked as text rather than data.
108
+ text = repr(value)
109
+ return {"kind": "text", "text": text[:MAX_TEXT_CHARS] + ("…" if len(text) > MAX_TEXT_CHARS else "")}
@@ -0,0 +1,192 @@
1
+ """History-hash computation — the Tier 2 staleness recipe from DESIGN.md §7.
2
+
3
+ history_hash = sha256(canonical_json({
4
+ fn: source hash of the decorated function (AST-normalized),
5
+ closure: hash of the defining module's repo import-closure,
6
+ env: environment fingerprint (python version + dependency files),
7
+ params: literal params for UNCONNECTED parameters only,
8
+ inputs: {param_name: [parent_history_hash, source_output]},
9
+ }))
10
+
11
+ A node is stale iff no checkpoint exists for its computed hash. Everything
12
+ here is pure stdlib and cheap enough to recompute on every document edit.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ from dataclasses import dataclass, field
18
+ import hashlib
19
+ import json
20
+ from pathlib import Path
21
+ import sys
22
+ from typing import Any, Callable
23
+
24
+ from starforge.core.spec import PipelineDoc
25
+ from starforge.index.scanner import WorkspaceIndex
26
+
27
+ #: Doc-native nodes that execute without importing user code. Constants are
28
+ #: the first; the snippet node (DESIGN.md §10) will join this namespace.
29
+ BUILTIN_PREFIX = "builtin:"
30
+ BUILTINS = {"builtin:constant"}
31
+
32
+ #: Dependency manifests folded into the environment fingerprint. pyproject is
33
+ #: deliberately excluded — version bumps would invalidate every checkpoint.
34
+ ENV_FILES = (
35
+ "requirements.txt",
36
+ "requirements.lock",
37
+ "poetry.lock",
38
+ "uv.lock",
39
+ "Pipfile.lock",
40
+ )
41
+
42
+
43
+ def canonical_json(value: Any) -> str:
44
+ return json.dumps(value, sort_keys=True, separators=(",", ":"), default=repr)
45
+
46
+
47
+ def _sha(text: str) -> str:
48
+ return hashlib.sha256(text.encode("utf-8")).hexdigest()
49
+
50
+
51
+ def env_fingerprint(workspace: str | Path) -> str:
52
+ workspace = Path(workspace)
53
+ parts = [f"python:{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"]
54
+ for name in ENV_FILES:
55
+ path = workspace / name
56
+ if path.is_file():
57
+ try:
58
+ parts.append(f"{name}:{hashlib.sha256(path.read_bytes()).hexdigest()}")
59
+ except OSError:
60
+ continue
61
+ return _sha("\n".join(parts))
62
+
63
+
64
+ @dataclass
65
+ class NodeState:
66
+ history_hash: str | None = None
67
+ stale: bool = True
68
+ #: Human-readable reasons the node cannot hash or run (missing block,
69
+ #: cycle membership, bad edge target, ...). Empty means healthy.
70
+ problems: list[str] = field(default_factory=list)
71
+
72
+ def to_dict(self) -> dict[str, Any]:
73
+ return {"history_hash": self.history_hash, "stale": self.stale, "problems": self.problems}
74
+
75
+
76
+ def toposort(doc: PipelineDoc) -> tuple[list[str], set[str]]:
77
+ """Kahn's algorithm. Returns (ordered_ids, ids_stuck_in_cycles)."""
78
+ indegree = {n.id: 0 for n in doc.nodes}
79
+ children: dict[str, list[str]] = {n.id: [] for n in doc.nodes}
80
+ for edge in doc.edges:
81
+ if edge.source in indegree and edge.target in indegree:
82
+ indegree[edge.target] += 1
83
+ children[edge.source].append(edge.target)
84
+ ready = sorted(nid for nid, deg in indegree.items() if deg == 0)
85
+ order: list[str] = []
86
+ while ready:
87
+ nid = ready.pop(0)
88
+ order.append(nid)
89
+ for child in children[nid]:
90
+ indegree[child] -= 1
91
+ if indegree[child] == 0:
92
+ ready.append(child)
93
+ ready.sort() # determinism beats micro-speed at canvas scale
94
+ return order, {nid for nid, deg in indegree.items() if deg > 0}
95
+
96
+
97
+ #: Staleness tiers (DESIGN.md §7). The closure component per tier:
98
+ #: T0 — function body only (cheapest, misses helper edits entirely)
99
+ #: T1 — + the defining module (catches same-file helpers)
100
+ #: T2 — + the repo import closure (default; safe over-invalidation)
101
+ TIERS = ("T0", "T1", "T2")
102
+
103
+
104
+ def _closure_component(tier: str, index: WorkspaceIndex, module: str) -> str:
105
+ if tier == "T0":
106
+ return ""
107
+ if tier == "T1":
108
+ info = index.modules.get(module)
109
+ return (info.ast_hash or info.file_hash) if info else ""
110
+ return index.closure_hash(module)
111
+
112
+
113
+ def compute_states(
114
+ doc: PipelineDoc,
115
+ index: WorkspaceIndex,
116
+ env_fp: str,
117
+ checkpoint_exists: Callable[[str], bool],
118
+ tier: str = "T2",
119
+ ) -> dict[str, NodeState]:
120
+ """Hash every node and decide staleness. Never raises on a sick document —
121
+ problems are reported per-node so the canvas can render them in place."""
122
+ blocks = index.blocks
123
+ states: dict[str, NodeState] = {n.id: NodeState() for n in doc.nodes}
124
+ order, cyclic = toposort(doc)
125
+ for nid in cyclic:
126
+ states[nid].problems.append("part of a dependency cycle")
127
+
128
+ for nid in order:
129
+ node = doc.node(nid)
130
+ state = states[nid]
131
+
132
+ if node.block.startswith(BUILTIN_PREFIX):
133
+ if node.block not in BUILTINS:
134
+ state.problems.append(f"unknown builtin '{node.block}'")
135
+ continue
136
+ if doc.in_edges(nid):
137
+ state.problems.append("Constant is a source node and takes no inputs")
138
+ continue
139
+ # Deliberately excludes env/closure: a constant's identity is its
140
+ # value, so checkpoints survive dependency upgrades and edits.
141
+ state.history_hash = _sha(canonical_json({"builtin": node.block, "params": node.params}))
142
+ state.stale = not checkpoint_exists(state.history_hash)
143
+ continue
144
+
145
+ info = blocks.get(node.block)
146
+ if info is None:
147
+ state.problems.append(f"block '{node.block}' not found in workspace (decorator removed or file deleted?)")
148
+ continue
149
+
150
+ param_names = {p.name for p in info.params}
151
+ inputs: dict[str, list[str]] = {}
152
+ broken = False
153
+ for edge in doc.in_edges(nid):
154
+ parent_state = states.get(edge.source)
155
+ if parent_state is None or parent_state.history_hash is None:
156
+ state.problems.append(f"input '{edge.target_param}' depends on unresolvable node '{edge.source}'")
157
+ broken = True
158
+ continue
159
+ if edge.target_param not in param_names:
160
+ state.problems.append(f"edge targets unknown parameter '{edge.target_param}'")
161
+ broken = True
162
+ continue
163
+ if edge.target_param in inputs:
164
+ state.problems.append(f"parameter '{edge.target_param}' has multiple incoming edges")
165
+ broken = True
166
+ continue
167
+ parent_block = blocks.get(doc.node(edge.source).block)
168
+ if parent_block is not None and edge.source_output not in parent_block.outputs:
169
+ state.problems.append(
170
+ f"edge expects output '{edge.source_output}' but '{parent_block.label}' produces {parent_block.outputs}"
171
+ )
172
+ broken = True
173
+ continue
174
+ inputs[edge.target_param] = [parent_state.history_hash, edge.source_output]
175
+ if broken:
176
+ continue
177
+
178
+ literals = {k: v for k, v in node.params.items() if k not in inputs}
179
+ state.history_hash = _sha(
180
+ canonical_json(
181
+ {
182
+ "fn": info.source_hash,
183
+ "closure": _closure_component(tier, index, info.module),
184
+ "env": env_fp,
185
+ "params": literals,
186
+ "inputs": inputs,
187
+ }
188
+ )
189
+ )
190
+ state.stale = not checkpoint_exists(state.history_hash)
191
+
192
+ return states