starforge-kernel 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. starforge_kernel-0.1.0/PKG-INFO +76 -0
  2. starforge_kernel-0.1.0/README.md +59 -0
  3. starforge_kernel-0.1.0/pyproject.toml +31 -0
  4. starforge_kernel-0.1.0/setup.cfg +4 -0
  5. starforge_kernel-0.1.0/src/starforge/__init__.py +86 -0
  6. starforge_kernel-0.1.0/src/starforge/core/__init__.py +0 -0
  7. starforge_kernel-0.1.0/src/starforge/core/checkpoints.py +178 -0
  8. starforge_kernel-0.1.0/src/starforge/core/figures.py +119 -0
  9. starforge_kernel-0.1.0/src/starforge/core/previews.py +109 -0
  10. starforge_kernel-0.1.0/src/starforge/core/provenance.py +192 -0
  11. starforge_kernel-0.1.0/src/starforge/core/runner.py +293 -0
  12. starforge_kernel-0.1.0/src/starforge/core/serializers.py +141 -0
  13. starforge_kernel-0.1.0/src/starforge/core/spec.py +126 -0
  14. starforge_kernel-0.1.0/src/starforge/index/__init__.py +9 -0
  15. starforge_kernel-0.1.0/src/starforge/index/scanner.py +487 -0
  16. starforge_kernel-0.1.0/src/starforge/kernel/__init__.py +0 -0
  17. starforge_kernel-0.1.0/src/starforge/kernel/__main__.py +3 -0
  18. starforge_kernel-0.1.0/src/starforge/kernel/server.py +351 -0
  19. starforge_kernel-0.1.0/src/starforge/kernel/worker.py +66 -0
  20. starforge_kernel-0.1.0/src/starforge/mcp.py +283 -0
  21. starforge_kernel-0.1.0/src/starforge_kernel.egg-info/PKG-INFO +76 -0
  22. starforge_kernel-0.1.0/src/starforge_kernel.egg-info/SOURCES.txt +33 -0
  23. starforge_kernel-0.1.0/src/starforge_kernel.egg-info/dependency_links.txt +1 -0
  24. starforge_kernel-0.1.0/src/starforge_kernel.egg-info/requires.txt +9 -0
  25. starforge_kernel-0.1.0/src/starforge_kernel.egg-info/top_level.txt +1 -0
  26. starforge_kernel-0.1.0/tests/test_decorator.py +43 -0
  27. starforge_kernel-0.1.0/tests/test_figures.py +146 -0
  28. starforge_kernel-0.1.0/tests/test_indexer.py +142 -0
  29. starforge_kernel-0.1.0/tests/test_kernel_protocol.py +204 -0
  30. starforge_kernel-0.1.0/tests/test_m1_features.py +182 -0
  31. starforge_kernel-0.1.0/tests/test_mcp_module.py +60 -0
  32. starforge_kernel-0.1.0/tests/test_previews.py +55 -0
  33. starforge_kernel-0.1.0/tests/test_provenance.py +86 -0
  34. starforge_kernel-0.1.0/tests/test_runner_end_to_end.py +271 -0
  35. starforge_kernel-0.1.0/tests/test_serializers.py +61 -0
@@ -0,0 +1,76 @@
1
+ Metadata-Version: 2.4
2
+ Name: starforge-kernel
3
+ Version: 0.1.0
4
+ Summary: *Forge — pipeline canvas, checkpointing, and stale/hydrate execution for the repo you already have open
5
+ Author: Jonathan Potter
6
+ License-Expression: Apache-2.0
7
+ Project-URL: Homepage, https://github.com/Jonpot/forge
8
+ Requires-Python: >=3.10
9
+ Description-Content-Type: text/markdown
10
+ Provides-Extra: dev
11
+ Requires-Dist: pytest>=8.0; extra == "dev"
12
+ Requires-Dist: pandas>=2.0; extra == "dev"
13
+ Requires-Dist: pyarrow>=15.0; extra == "dev"
14
+ Requires-Dist: numpy>=1.26; extra == "dev"
15
+ Provides-Extra: mcp
16
+ Requires-Dist: mcp>=1.8; extra == "mcp"
17
+
18
+ # *Forge (`starforge`)
19
+
20
+ Forge's canvas — checkpointing, provenance, stale/hydrate execution — as a VS Code
21
+ extension over the repo you already have open. Blocks are ordinary Python functions
22
+ tagged with `@block`. See [DESIGN.md](DESIGN.md) for the full design.
23
+
24
+ ## Try it (M0)
25
+
26
+ ```bash
27
+ # 1. Install the kernel + decorator into the venv your target repo uses
28
+ pip install -e <forge-repo>/starforge
29
+
30
+ # 2. Build the extension
31
+ cd <forge-repo>/starforge/vscode
32
+ npm install && npm run build
33
+
34
+ # 3. Open starforge/vscode in VS Code and press F5 (extension dev host).
35
+ # In the dev-host window, open any Python repo.
36
+ ```
37
+
38
+ In your repo:
39
+
40
+ ```python
41
+ # analysis/blocks.py
42
+ import matplotlib.pyplot as plt
43
+ from starforge import block
44
+
45
+ @block(category="IO")
46
+ def make_numbers(n: int = 5) -> dict:
47
+ return {"values": list(range(1, n + 1))}
48
+
49
+ @block
50
+ def scale(data: dict, factor: float = 2.0) -> dict:
51
+ return {"values": [v * factor for v in data["values"]]}
52
+
53
+ @block
54
+ def plot(data: dict) -> dict:
55
+ plt.plot(data["values"])
56
+ plt.show() # rendered inline on the canvas node
57
+ return data
58
+ ```
59
+
60
+ Save, run **“*Forge: New Pipeline”**, drag the blocks from the palette, wire
61
+ `output → data`, hit **▶ Run**. Run again — instant, everything reused. Edit
62
+ `scale`, watch it (and only it) go stale.
63
+
64
+ ## Layout
65
+
66
+ | Path | What |
67
+ |---|---|
68
+ | `src/starforge/__init__.py` | the `@block` decorator — zero-dep, the only thing user code touches |
69
+ | `src/starforge/index/` | static AST indexer (discovery, import graph, incremental cache) |
70
+ | `src/starforge/core/` | doc schema, history hashing, serializers, checkpoint store, runner |
71
+ | `src/starforge/kernel/` | stdio JSON-RPC kernel + per-run worker subprocess |
72
+ | `vscode/` | the extension (TS host + React Flow webview) |
73
+ | `tests/` | headless M0 proof — `python -m pytest starforge/tests` |
74
+
75
+ State lives in the target repo under `.forge/` — `pipelines/` is committable,
76
+ `checkpoints/` and `cache/` are auto-gitignored.
@@ -0,0 +1,59 @@
1
+ # *Forge (`starforge`)
2
+
3
+ Forge's canvas — checkpointing, provenance, stale/hydrate execution — as a VS Code
4
+ extension over the repo you already have open. Blocks are ordinary Python functions
5
+ tagged with `@block`. See [DESIGN.md](DESIGN.md) for the full design.
6
+
7
+ ## Try it (M0)
8
+
9
+ ```bash
10
+ # 1. Install the kernel + decorator into the venv your target repo uses
11
+ pip install -e <forge-repo>/starforge
12
+
13
+ # 2. Build the extension
14
+ cd <forge-repo>/starforge/vscode
15
+ npm install && npm run build
16
+
17
+ # 3. Open starforge/vscode in VS Code and press F5 (extension dev host).
18
+ # In the dev-host window, open any Python repo.
19
+ ```
20
+
21
+ In your repo:
22
+
23
+ ```python
24
+ # analysis/blocks.py
25
+ import matplotlib.pyplot as plt
26
+ from starforge import block
27
+
28
+ @block(category="IO")
29
+ def make_numbers(n: int = 5) -> dict:
30
+ return {"values": list(range(1, n + 1))}
31
+
32
+ @block
33
+ def scale(data: dict, factor: float = 2.0) -> dict:
34
+ return {"values": [v * factor for v in data["values"]]}
35
+
36
+ @block
37
+ def plot(data: dict) -> dict:
38
+ plt.plot(data["values"])
39
+ plt.show() # rendered inline on the canvas node
40
+ return data
41
+ ```
42
+
43
+ Save, run **“*Forge: New Pipeline”**, drag the blocks from the palette, wire
44
+ `output → data`, hit **▶ Run**. Run again — instant, everything reused. Edit
45
+ `scale`, watch it (and only it) go stale.
46
+
47
+ ## Layout
48
+
49
+ | Path | What |
50
+ |---|---|
51
+ | `src/starforge/__init__.py` | the `@block` decorator — zero-dep, the only thing user code touches |
52
+ | `src/starforge/index/` | static AST indexer (discovery, import graph, incremental cache) |
53
+ | `src/starforge/core/` | doc schema, history hashing, serializers, checkpoint store, runner |
54
+ | `src/starforge/kernel/` | stdio JSON-RPC kernel + per-run worker subprocess |
55
+ | `vscode/` | the extension (TS host + React Flow webview) |
56
+ | `tests/` | headless M0 proof — `python -m pytest starforge/tests` |
57
+
58
+ State lives in the target repo under `.forge/` — `pipelines/` is committable,
59
+ `checkpoints/` and `cache/` are auto-gitignored.
@@ -0,0 +1,31 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ # PyPI name `starforge` is squatted by a dormant Galaxy tool (see DESIGN.md §4);
7
+ # the import name is still `starforge`.
8
+ name = "starforge-kernel"
9
+ version = "0.1.0"
10
+ description = "*Forge — pipeline canvas, checkpointing, and stale/hydrate execution for the repo you already have open"
11
+ readme = "README.md"
12
+ authors = [{ name = "Jonathan Potter" }]
13
+ license = "Apache-2.0"
14
+ requires-python = ">=3.10"
15
+ # Intentionally empty: the decorator must import in microseconds inside user
16
+ # production code, and the kernel runs stdlib-only. pandas/numpy/pyarrow are
17
+ # probed lazily in workers and used only if the workspace env provides them.
18
+ dependencies = []
19
+
20
+ [project.urls]
21
+ Homepage = "https://github.com/Jonpot/forge"
22
+
23
+ [project.optional-dependencies]
24
+ dev = ["pytest>=8.0", "pandas>=2.0", "pyarrow>=15.0", "numpy>=1.26"]
25
+ mcp = ["mcp>=1.8"]
26
+
27
+ [tool.setuptools.packages.find]
28
+ where = ["src"]
29
+
30
+ [tool.pytest.ini_options]
31
+ testpaths = ["tests"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,86 @@
1
+ """*Forge — pipeline canvas for the repo you already have open.
2
+
3
+ This top-level module is the entire public surface that user code touches.
4
+ It must import in microseconds and depend on nothing: the decorator lives in
5
+ production codebases and has to be free. Everything heavy (indexer, engine,
6
+ kernel) lives in submodules that only *Forge itself* imports.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ __version__ = "0.1.0"
12
+
13
+ __all__ = ["block", "progress", "BLOCK_ATTR"]
14
+
15
+ #: Attribute set on decorated functions. The AST indexer matches the decorator
16
+ #: syntactically and never imports user code; this runtime tag exists so user
17
+ #: code and future runtime introspection can also recognize blocks.
18
+ BLOCK_ATTR = "__starforge_block__"
19
+
20
+
21
+ def block(fn=None, *, label=None, category=None, outputs=None):
22
+ """Register a function as a *Forge block.
23
+
24
+ Usable bare or with keyword arguments::
25
+
26
+ @block
27
+ def clean(raw: pd.DataFrame) -> pd.DataFrame: ...
28
+
29
+ @block(label="Clean AUC Matrix", category="QC", outputs=("clean", "stats"))
30
+ def clean_auc(raw, min_coverage: float = 0.8): ...
31
+
32
+ The decorated function is returned unchanged — behavior under pytest, in
33
+ CI, or in production is identical whether or not *Forge is anywhere near.
34
+
35
+ Args:
36
+ label: Palette display name. Defaults to the function name, title-cased.
37
+ category: Palette grouping. Defaults to the defining module's path.
38
+ outputs: Names for multiple return values (function must return a tuple
39
+ of the same length). Defaults to a single output named "output".
40
+
41
+ Note for palette metadata: the indexer reads ``label``/``category``/
42
+ ``outputs`` from the *source*, so they must be literals at the decoration
43
+ site to appear in the palette.
44
+ """
45
+
46
+ def apply(f):
47
+ setattr(
48
+ f,
49
+ BLOCK_ATTR,
50
+ {
51
+ "label": label,
52
+ "category": category,
53
+ "outputs": tuple(outputs) if outputs is not None else None,
54
+ },
55
+ )
56
+ return f
57
+
58
+ if fn is not None:
59
+ return apply(fn)
60
+ return apply
61
+
62
+
63
+ #: Installed by the *Forge run worker around each block call; None everywhere
64
+ #: else, which keeps progress() a guaranteed no-op in pytest/CI/production.
65
+ _progress_hook = None
66
+
67
+
68
+ def progress(current=None, total=None, label=None):
69
+ """Report block progress to the *Forge canvas.
70
+
71
+ Call freely inside a block::
72
+
73
+ for i, chunk in enumerate(chunks):
74
+ progress(i + 1, len(chunks), "fitting folds")
75
+ ...
76
+
77
+ Outside a *Forge run this does nothing and costs one attribute read —
78
+ safe to leave in production code. Any combination of arguments works:
79
+ (current, total) renders a determinate bar, label alone updates the text.
80
+ """
81
+ hook = _progress_hook
82
+ if hook is not None:
83
+ try:
84
+ hook(current, total, label)
85
+ except Exception:
86
+ pass # progress must never break user code
File without changes
@@ -0,0 +1,178 @@
1
+ """Checkpoint store under ``<workspace>/.forge/checkpoints/``.
2
+
3
+ One directory per history hash (truncated to 32 hex chars — 128 bits — to
4
+ stay friendly to Windows path limits):
5
+
6
+ .forge/checkpoints/<hash32>/
7
+ ├── provenance.json # written LAST: its presence marks completeness
8
+ └── outputs/<name>.<ext per serializer>
9
+
10
+ The store also owns ``.forge/.gitignore`` so checkpoints and caches never
11
+ land in the user's repo history while ``pipelines/`` remains committable.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import json
17
+ import os
18
+ from pathlib import Path
19
+ import shutil
20
+ import time
21
+ from typing import Any
22
+
23
+ from starforge.core import figures as figmod
24
+ from starforge.core import previews, serializers
25
+
26
+ FORGE_DIR = ".forge"
27
+ GITIGNORE_BODY = "checkpoints/\ncache/\n"
28
+
29
+
30
+ class CheckpointStore:
31
+ def __init__(self, workspace: str | Path) -> None:
32
+ self.workspace = Path(workspace)
33
+ self.forge_dir = self.workspace / FORGE_DIR
34
+ self.base = self.forge_dir / "checkpoints"
35
+
36
+ def ensure_layout(self) -> None:
37
+ (self.forge_dir / "pipelines").mkdir(parents=True, exist_ok=True)
38
+ (self.forge_dir / "cache").mkdir(parents=True, exist_ok=True)
39
+ self.base.mkdir(parents=True, exist_ok=True)
40
+ gitignore = self.forge_dir / ".gitignore"
41
+ if not gitignore.exists():
42
+ gitignore.write_text(GITIGNORE_BODY, encoding="utf-8")
43
+
44
+ def dir_for(self, history_hash: str) -> Path:
45
+ return self.base / history_hash[:32]
46
+
47
+ def exists(self, history_hash: str) -> bool:
48
+ return (self.dir_for(history_hash) / "provenance.json").is_file()
49
+
50
+ def read_provenance(self, history_hash: str) -> dict[str, Any]:
51
+ path = self.dir_for(history_hash) / "provenance.json"
52
+ return json.loads(path.read_text(encoding="utf-8"))
53
+
54
+ def write(
55
+ self,
56
+ history_hash: str,
57
+ provenance: dict[str, Any],
58
+ outputs: dict[str, Any],
59
+ pickle_enabled: bool = False,
60
+ side_figures: list[Any] | None = None,
61
+ ) -> list[dict[str, Any]]:
62
+ """Persist outputs then provenance (in that order, for atomicity).
63
+ Returns the output manifest, including ephemeral entries.
64
+
65
+ ``side_figures`` are figures the block created or showed without
66
+ returning them (plt.show() and friends); they render to artifacts
67
+ recorded under the provenance ``figures`` key."""
68
+ directory = self.dir_for(history_hash)
69
+ outputs_dir = directory / "outputs"
70
+ directory.mkdir(parents=True, exist_ok=True)
71
+ manifest = []
72
+ for name, value in outputs.items():
73
+ entry = serializers.save_value(value, outputs_dir, name, pickle_enabled=pickle_enabled)
74
+ try:
75
+ # Previews ride inside provenance.json so the stdlib-only
76
+ # kernel can serve them without deserializing data. Computed
77
+ # for ephemeral outputs too — their only window is right now.
78
+ if entry.get("artifact"):
79
+ entry["preview"] = {
80
+ "kind": "figure",
81
+ "file": entry["artifact"]["file"],
82
+ "format": entry["artifact"]["kind"],
83
+ }
84
+ else:
85
+ entry["preview"] = previews.build_preview(value)
86
+ except Exception:
87
+ entry["preview"] = {"kind": "text", "text": f"<preview failed for {type(value).__name__}>"}
88
+ manifest.append(entry)
89
+
90
+ rendered_figures: list[dict[str, Any]] = []
91
+ for i, fig in enumerate(side_figures or []):
92
+ try:
93
+ artifact = figmod.render_figure(fig, outputs_dir, f"figure_{i}")
94
+ except Exception:
95
+ artifact = None
96
+ if artifact is not None:
97
+ rendered_figures.append(artifact)
98
+
99
+ record = dict(provenance)
100
+ record["history_hash"] = history_hash
101
+ record["outputs"] = manifest
102
+ record["figures"] = rendered_figures
103
+ record["dir"] = directory.relative_to(self.workspace).as_posix()
104
+ path = directory / "provenance.json"
105
+ tmp = directory / "provenance.json.tmp"
106
+ tmp.write_text(json.dumps(record, indent=2, default=repr), encoding="utf-8")
107
+ tmp.replace(path)
108
+ return manifest
109
+
110
+ def output_entry(self, history_hash: str, name: str) -> dict[str, Any]:
111
+ for entry in self.read_provenance(history_hash).get("outputs", []):
112
+ if entry.get("name") == name:
113
+ return entry
114
+ raise KeyError(f"checkpoint {history_hash[:12]} has no output named '{name}'")
115
+
116
+ def load_output(self, history_hash: str, name: str) -> Any:
117
+ """Raises serializers.EphemeralValueError for non-persisted outputs."""
118
+ entry = self.output_entry(history_hash, name)
119
+ return serializers.load_value(self.dir_for(history_hash) / "outputs", entry)
120
+
121
+ def is_ephemeral(self, history_hash: str, name: str) -> bool:
122
+ try:
123
+ entry = self.output_entry(history_hash, name)
124
+ except (KeyError, FileNotFoundError, json.JSONDecodeError):
125
+ return True
126
+ return entry.get("serializer") == serializers.EPHEMERAL
127
+
128
+ def touch(self, history_hash: str) -> None:
129
+ """Bump the checkpoint dir's mtime so LRU GC sees reuse as recency."""
130
+ try:
131
+ os.utime(self.dir_for(history_hash))
132
+ except OSError:
133
+ pass
134
+
135
+ def gc(self, max_bytes: int) -> dict[str, int]:
136
+ """Least-recently-used eviction down to ``max_bytes`` total.
137
+
138
+ Deleting a live checkpoint is always safe — the node just reads as
139
+ stale and recomputes — so a plain LRU needs no liveness analysis.
140
+ Returns {"freed_bytes", "deleted", "remaining_bytes"}.
141
+ """
142
+ entries: list[tuple[float, int, Path]] = []
143
+ total = 0
144
+ if self.base.is_dir():
145
+ for directory in self.base.iterdir():
146
+ if not directory.is_dir():
147
+ continue
148
+ size = sum(f.stat().st_size for f in directory.rglob("*") if f.is_file())
149
+ try:
150
+ mtime = directory.stat().st_mtime
151
+ except OSError:
152
+ continue
153
+ entries.append((mtime, size, directory))
154
+ total += size
155
+
156
+ freed = 0
157
+ deleted = 0
158
+ entries.sort() # oldest first
159
+ for _mtime, size, directory in entries:
160
+ if total - freed <= max_bytes:
161
+ break
162
+ shutil.rmtree(directory, ignore_errors=True)
163
+ freed += size
164
+ deleted += 1
165
+ return {"freed_bytes": freed, "deleted": deleted, "remaining_bytes": total - freed}
166
+
167
+ def clean_run_specs(self, max_age_seconds: float = 86400.0) -> None:
168
+ """Run-spec files are one-shot worker inputs; sweep the stale ones."""
169
+ runs_dir = self.forge_dir / "cache" / "runs"
170
+ if not runs_dir.is_dir():
171
+ return
172
+ cutoff = time.time() - max_age_seconds
173
+ for spec in runs_dir.glob("*.json"):
174
+ try:
175
+ if spec.stat().st_mtime < cutoff:
176
+ spec.unlink()
177
+ except OSError:
178
+ continue
@@ -0,0 +1,119 @@
1
+ """Figure capture and artifact rendering.
2
+
3
+ The notebook muscle memory is ``plt.plot(...); plt.show()`` — or no show()
4
+ at all. The worker honors it with zero code changes: matplotlib runs on the
5
+ Agg backend, and :func:`capture` sweeps every figure that exists after the
6
+ block call that didn't exist before (``plt.show`` is a no-op under Agg, so
7
+ "shown" figures are still open when we sweep). Plotly's ``fig.show()`` is
8
+ intercepted by patching ``plotly.io.show`` while the block runs.
9
+
10
+ Captured and returned figures render to checkpoint artifacts — matplotlib →
11
+ PNG, plotly → self-contained HTML — and are closed afterward so a long run
12
+ never accumulates canvases.
13
+
14
+ Import discipline: stdlib-only at import time; matplotlib/plotly are only
15
+ touched when the user's process already loaded them.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ from contextlib import contextmanager
21
+ from dataclasses import dataclass, field
22
+ from pathlib import Path
23
+ import sys
24
+ from typing import Any, Iterator
25
+
26
+
27
+ def _pyplot() -> Any | None:
28
+ return sys.modules.get("matplotlib.pyplot")
29
+
30
+
31
+ def _root_module(value: Any) -> str:
32
+ return type(value).__module__.split(".")[0]
33
+
34
+
35
+ @dataclass
36
+ class CapturedFigures:
37
+ matplotlib: list[Any] = field(default_factory=list)
38
+ plotly: list[Any] = field(default_factory=list)
39
+
40
+ def all_objects(self) -> list[Any]:
41
+ return [*self.matplotlib, *self.plotly]
42
+
43
+
44
+ @contextmanager
45
+ def capture() -> Iterator[CapturedFigures]:
46
+ """Collect figures created (matplotlib) or shown (plotly) inside the
47
+ block call. The matplotlib sweep also catches figures created during the
48
+ block module's first import, since the import happens inside this
49
+ context in the runner."""
50
+ captured = CapturedFigures()
51
+
52
+ plt = _pyplot()
53
+ before: set[int] = set(plt.get_fignums()) if plt is not None else set()
54
+
55
+ pio = sys.modules.get("plotly.io")
56
+ original_show = getattr(pio, "show", None) if pio is not None else None
57
+ if pio is not None and original_show is not None:
58
+
59
+ def _grab(fig: Any, *args: Any, **kwargs: Any) -> None:
60
+ captured.plotly.append(fig)
61
+
62
+ pio.show = _grab
63
+
64
+ try:
65
+ yield captured
66
+ finally:
67
+ if pio is not None and original_show is not None:
68
+ pio.show = original_show
69
+ plt = _pyplot() # may have been imported during the call
70
+ if plt is not None:
71
+ for num in plt.get_fignums():
72
+ if num not in before:
73
+ captured.matplotlib.append(plt.figure(num))
74
+
75
+
76
+ def as_figure(value: Any) -> Any | None:
77
+ """Return a renderable figure for ``value``, or None.
78
+
79
+ Accepts matplotlib Figures, matplotlib Axes (``sns.heatmap`` et al.
80
+ return Axes — we render their parent figure), and plotly figures.
81
+ """
82
+ root = _root_module(value)
83
+ if root == "matplotlib":
84
+ if hasattr(value, "savefig"):
85
+ return value
86
+ parent = getattr(value, "figure", None) # Axes and friends
87
+ if parent is not None and hasattr(parent, "savefig"):
88
+ return parent
89
+ if root == "plotly" and hasattr(value, "write_html"):
90
+ return value
91
+ return None
92
+
93
+
94
+ def render_figure(value: Any, directory: Path, basename: str) -> dict[str, Any] | None:
95
+ """Render to ``directory/basename.(png|html)``; returns the artifact
96
+ entry ``{"file", "kind"}`` or None if ``value`` is not a figure."""
97
+ fig = as_figure(value)
98
+ if fig is None:
99
+ return None
100
+ directory.mkdir(parents=True, exist_ok=True)
101
+ if _root_module(fig) == "matplotlib":
102
+ filename = f"{basename}.png"
103
+ fig.savefig(directory / filename, dpi=110, bbox_inches="tight", facecolor=fig.get_facecolor())
104
+ return {"file": filename, "kind": "image"}
105
+ filename = f"{basename}.html"
106
+ fig.write_html(directory / filename, include_plotlyjs=True, full_html=True)
107
+ return {"file": filename, "kind": "html"}
108
+
109
+
110
+ def close_figures(figures: list[Any]) -> None:
111
+ plt = _pyplot()
112
+ if plt is None:
113
+ return
114
+ for fig in figures:
115
+ if _root_module(fig) == "matplotlib":
116
+ try:
117
+ plt.close(fig)
118
+ except Exception:
119
+ pass
@@ -0,0 +1,109 @@
1
+ """Cropped, JSON-safe output previews, computed at checkpoint-write time.
2
+
3
+ Previews are precomputed artifacts stored inside ``provenance.json`` — the
4
+ kernel serves them by reading a file, never by deserializing data (it stays
5
+ stdlib-only and instant). Because they're built while the value is in the
6
+ worker's hands, even EPHEMERAL outputs get a preview of their last run.
7
+
8
+ Everything emitted here must survive strict JSON.parse on the TypeScript
9
+ side: NaN/Infinity are stringified, containers are size-capped, and unknown
10
+ objects fall back to repr.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import json
16
+ from typing import Any
17
+
18
+ MAX_ROWS = 8
19
+ MAX_COLS = 10
20
+ MAX_ITEMS = 50
21
+ MAX_DEPTH = 5
22
+ MAX_CELL_CHARS = 120
23
+ MAX_TEXT_CHARS = 600
24
+ MAX_VALUE_CHARS = 2000
25
+
26
+
27
+ def _cell(value: Any) -> Any:
28
+ """One scalar table/array cell, strict-JSON safe."""
29
+ if isinstance(value, bool) or value is None:
30
+ return value
31
+ if isinstance(value, int):
32
+ return value
33
+ if isinstance(value, float):
34
+ return value if value == value and abs(value) != float("inf") else str(value)
35
+ text = value if isinstance(value, str) else repr(value)
36
+ return text[:MAX_CELL_CHARS] + ("…" if len(text) > MAX_CELL_CHARS else "")
37
+
38
+
39
+ def _sanitize(value: Any, depth: int = 0) -> Any:
40
+ if depth >= MAX_DEPTH:
41
+ return _cell(value)
42
+ if isinstance(value, dict):
43
+ items = list(value.items())[:MAX_ITEMS]
44
+ out = {str(k)[:MAX_CELL_CHARS]: _sanitize(v, depth + 1) for k, v in items}
45
+ if len(value) > MAX_ITEMS:
46
+ out["…"] = f"+{len(value) - MAX_ITEMS} more"
47
+ return out
48
+ if isinstance(value, (list, tuple)):
49
+ out = [_sanitize(v, depth + 1) for v in value[:MAX_ITEMS]]
50
+ if len(value) > MAX_ITEMS:
51
+ out.append(f"… +{len(value) - MAX_ITEMS} more")
52
+ return out
53
+ return _cell(value)
54
+
55
+
56
+ def _root_type_module(value: Any) -> str:
57
+ return type(value).__module__.split(".")[0]
58
+
59
+
60
+ def build_preview(value: Any) -> dict[str, Any]:
61
+ if _root_type_module(value) == "pandas":
62
+ import pandas as pd
63
+
64
+ frame = value.to_frame() if isinstance(value, pd.Series) else value
65
+ if isinstance(frame, pd.DataFrame):
66
+ columns = [str(c) for c in frame.columns[:MAX_COLS]]
67
+ head = frame.iloc[:MAX_ROWS, :MAX_COLS]
68
+ return {
69
+ "kind": "table",
70
+ "shape": [int(frame.shape[0]), int(frame.shape[1])],
71
+ "columns": columns,
72
+ "columns_truncated": frame.shape[1] > MAX_COLS,
73
+ "index": [_cell(i) for i in head.index.tolist()],
74
+ "rows": [[_cell(v) for v in row] for row in head.itertuples(index=False, name=None)],
75
+ }
76
+
77
+ if _root_type_module(value) == "numpy":
78
+ import numpy as np
79
+
80
+ if isinstance(value, np.ndarray):
81
+ corner = value
82
+ if corner.ndim == 0:
83
+ corner_list: Any = _cell(corner.item())
84
+ else:
85
+ slicer = tuple(slice(0, MAX_ROWS) for _ in range(corner.ndim))
86
+ corner_list = _sanitize(corner[slicer].tolist())
87
+ return {
88
+ "kind": "array",
89
+ "dtype": str(value.dtype),
90
+ "shape": list(value.shape),
91
+ "corner": corner_list,
92
+ }
93
+ if isinstance(value, np.generic):
94
+ return {"kind": "value", "value": _cell(value.item())}
95
+
96
+ if isinstance(value, (dict, list, tuple, str, int, float, bool)) or value is None:
97
+ sanitized = _sanitize(value)
98
+ try:
99
+ encoded = json.dumps(sanitized, allow_nan=False)
100
+ except (TypeError, ValueError):
101
+ encoded = None
102
+ if encoded is not None:
103
+ if len(encoded) > MAX_VALUE_CHARS:
104
+ return {"kind": "text", "text": encoded[:MAX_VALUE_CHARS] + "…"}
105
+ return {"kind": "value", "value": sanitized}
106
+
107
+ # Arbitrary objects: an honest repr, marked as text rather than data.
108
+ text = repr(value)
109
+ return {"kind": "text", "text": text[:MAX_TEXT_CHARS] + ("…" if len(text) > MAX_TEXT_CHARS else "")}