tallyman-data 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. tallyman_data-0.0.1/.github/workflows/tests.yml +74 -0
  2. tallyman_data-0.0.1/.gitignore +18 -0
  3. tallyman_data-0.0.1/.mcp.json +13 -0
  4. tallyman_data-0.0.1/PKG-INFO +163 -0
  5. tallyman_data-0.0.1/README.md +132 -0
  6. tallyman_data-0.0.1/TICKETS.md +482 -0
  7. tallyman_data-0.0.1/demo/datasets.md +402 -0
  8. tallyman_data-0.0.1/demo/script.md +210 -0
  9. tallyman_data-0.0.1/demo/storyboard.json +59 -0
  10. tallyman_data-0.0.1/docs/installing.md +196 -0
  11. tallyman_data-0.0.1/packages/app/.gitignore +1 -0
  12. tallyman_data-0.0.1/packages/app/index.html +12 -0
  13. tallyman_data-0.0.1/packages/app/package.json +31 -0
  14. tallyman_data-0.0.1/packages/app/pnpm-lock.yaml +2564 -0
  15. tallyman_data-0.0.1/packages/app/src/App.tsx +71 -0
  16. tallyman_data-0.0.1/packages/app/src/SSEContext.tsx +65 -0
  17. tallyman_data-0.0.1/packages/app/src/api.ts +140 -0
  18. tallyman_data-0.0.1/packages/app/src/components/BuckarooEmbed.tsx +89 -0
  19. tallyman_data-0.0.1/packages/app/src/components/CatalogSidebar.tsx +184 -0
  20. tallyman_data-0.0.1/packages/app/src/components/DagSvg.tsx +174 -0
  21. tallyman_data-0.0.1/packages/app/src/components/Header.tsx +99 -0
  22. tallyman_data-0.0.1/packages/app/src/components/NewEntryPill.tsx +93 -0
  23. tallyman_data-0.0.1/packages/app/src/components/VegaChart.tsx +55 -0
  24. tallyman_data-0.0.1/packages/app/src/main.tsx +13 -0
  25. tallyman_data-0.0.1/packages/app/src/newEntryNotice.test.ts +50 -0
  26. tallyman_data-0.0.1/packages/app/src/newEntryNotice.ts +55 -0
  27. tallyman_data-0.0.1/packages/app/src/pages/CachePage.tsx +113 -0
  28. tallyman_data-0.0.1/packages/app/src/pages/CatalogPage.tsx +385 -0
  29. tallyman_data-0.0.1/packages/app/src/pages/DiffPage.tsx +249 -0
  30. tallyman_data-0.0.1/packages/app/src/pages/EmptyStatePage.tsx +59 -0
  31. tallyman_data-0.0.1/packages/app/src/pages/LineageEntryPage.tsx +74 -0
  32. tallyman_data-0.0.1/packages/app/src/pages/LineagePage.tsx +55 -0
  33. tallyman_data-0.0.1/packages/app/src/pages/NotebookPage.tsx +290 -0
  34. tallyman_data-0.0.1/packages/app/src/pages/ProjectsPage.tsx +130 -0
  35. tallyman_data-0.0.1/packages/app/src/styles.css +536 -0
  36. tallyman_data-0.0.1/packages/app/src/types.ts +197 -0
  37. tallyman_data-0.0.1/packages/app/tsconfig.json +20 -0
  38. tallyman_data-0.0.1/packages/app/tsconfig.tsbuildinfo +1 -0
  39. tallyman_data-0.0.1/packages/app/vite.config.ts +26 -0
  40. tallyman_data-0.0.1/packages/embed/.gitignore +1 -0
  41. tallyman_data-0.0.1/packages/embed/package.json +23 -0
  42. tallyman_data-0.0.1/packages/embed/pnpm-lock.yaml +1495 -0
  43. tallyman_data-0.0.1/packages/embed/src/buckaroo-embed.tsx +71 -0
  44. tallyman_data-0.0.1/packages/embed/tsconfig.json +14 -0
  45. tallyman_data-0.0.1/packages/embed/vite.config.ts +42 -0
  46. tallyman_data-0.0.1/plan.md +340 -0
  47. tallyman_data-0.0.1/plans/adr-git-subprocess-threading.md +51 -0
  48. tallyman_data-0.0.1/plans/ds-demo-scripts.md +1396 -0
  49. tallyman_data-0.0.1/plans/eda-codegen-run1.md +56 -0
  50. tallyman_data-0.0.1/plans/eda-prompt-research.md +191 -0
  51. tallyman_data-0.0.1/plans/haiku-codegen-findings.md +99 -0
  52. tallyman_data-0.0.1/plans/llm-summary-stats.md +274 -0
  53. tallyman_data-0.0.1/plans/model-codegen-comparison.md +51 -0
  54. tallyman_data-0.0.1/plans/plotting-testcases.md +227 -0
  55. tallyman_data-0.0.1/plans/project_switcher.md +424 -0
  56. tallyman_data-0.0.1/plans/xorq-sklearn-assessment.md +72 -0
  57. tallyman_data-0.0.1/proposal.md +31 -0
  58. tallyman_data-0.0.1/pyproject.toml +82 -0
  59. tallyman_data-0.0.1/scripts/h3_pipeline_bench.py +268 -0
  60. tallyman_data-0.0.1/scripts/publish.sh +58 -0
  61. tallyman_data-0.0.1/src/tallyman_cli/__init__.py +3 -0
  62. tallyman_data-0.0.1/src/tallyman_cli/fixtures.py +29 -0
  63. tallyman_data-0.0.1/src/tallyman_cli/main.py +371 -0
  64. tallyman_data-0.0.1/src/tallyman_companion/__init__.py +3 -0
  65. tallyman_data-0.0.1/src/tallyman_companion/app.py +1253 -0
  66. tallyman_data-0.0.1/src/tallyman_companion/buckaroo_lifecycle.py +514 -0
  67. tallyman_data-0.0.1/src/tallyman_companion/diff.py +187 -0
  68. tallyman_data-0.0.1/src/tallyman_companion/diff_color_maps.py +262 -0
  69. tallyman_data-0.0.1/src/tallyman_companion/diff_extras/display/detailed_absolute.py +82 -0
  70. tallyman_data-0.0.1/src/tallyman_companion/diff_extras/display/detailed_pct.py +82 -0
  71. tallyman_data-0.0.1/src/tallyman_companion/diff_extras/display/main.py +60 -0
  72. tallyman_data-0.0.1/src/tallyman_core/__init__.py +111 -0
  73. tallyman_data-0.0.1/src/tallyman_core/aliases.py +141 -0
  74. tallyman_data-0.0.1/src/tallyman_core/catalog_state.py +501 -0
  75. tallyman_data-0.0.1/src/tallyman_core/charts.py +72 -0
  76. tallyman_data-0.0.1/src/tallyman_core/display_configs.py +52 -0
  77. tallyman_data-0.0.1/src/tallyman_core/errors.py +74 -0
  78. tallyman_data-0.0.1/src/tallyman_core/git_util.py +73 -0
  79. tallyman_data-0.0.1/src/tallyman_core/manifest.py +30 -0
  80. tallyman_data-0.0.1/src/tallyman_core/marimo_export.py +280 -0
  81. tallyman_data-0.0.1/src/tallyman_core/notebook.py +133 -0
  82. tallyman_data-0.0.1/src/tallyman_core/paths.py +341 -0
  83. tallyman_data-0.0.1/src/tallyman_core/post_processing.py +305 -0
  84. tallyman_data-0.0.1/src/tallyman_core/summary_stats.py +200 -0
  85. tallyman_data-0.0.1/src/tallyman_core/xorq_catalog.py +99 -0
  86. tallyman_data-0.0.1/src/tallyman_mcp/__init__.py +3 -0
  87. tallyman_data-0.0.1/src/tallyman_mcp/server.py +1294 -0
  88. tallyman_data-0.0.1/src/tallyman_xorq/__init__.py +60 -0
  89. tallyman_data-0.0.1/src/tallyman_xorq/_git_state_guard.py +113 -0
  90. tallyman_data-0.0.1/src/tallyman_xorq/build.py +412 -0
  91. tallyman_data-0.0.1/src/tallyman_xorq/diff.py +155 -0
  92. tallyman_data-0.0.1/src/tallyman_xorq/io.py +72 -0
  93. tallyman_data-0.0.1/src/tallyman_xorq/layout.py +78 -0
  94. tallyman_data-0.0.1/src/tallyman_xorq/lineage.py +130 -0
  95. tallyman_data-0.0.1/src/tallyman_xorq/portable.py +75 -0
  96. tallyman_data-0.0.1/src/tallyman_xorq/primary_key.py +166 -0
  97. tallyman_data-0.0.1/src/tallyman_xorq/result_cache.py +145 -0
  98. tallyman_data-0.0.1/tests/__init__.py +0 -0
  99. tallyman_data-0.0.1/tests/conftest.py +75 -0
  100. tallyman_data-0.0.1/tests/git_multithread_reliable.py +244 -0
  101. tallyman_data-0.0.1/tests/repro_git_state_segfault.py +283 -0
  102. tallyman_data-0.0.1/tests/test_aliases.py +316 -0
  103. tallyman_data-0.0.1/tests/test_buckaroo.py +584 -0
  104. tallyman_data-0.0.1/tests/test_buckaroo_multi_project.py +151 -0
  105. tallyman_data-0.0.1/tests/test_build.py +152 -0
  106. tallyman_data-0.0.1/tests/test_cache_inspector.py +110 -0
  107. tallyman_data-0.0.1/tests/test_charts.py +198 -0
  108. tallyman_data-0.0.1/tests/test_cli.py +34 -0
  109. tallyman_data-0.0.1/tests/test_code_edit.py +120 -0
  110. tallyman_data-0.0.1/tests/test_companion.py +157 -0
  111. tallyman_data-0.0.1/tests/test_diff.py +501 -0
  112. tallyman_data-0.0.1/tests/test_disk_usage.py +48 -0
  113. tallyman_data-0.0.1/tests/test_display_configs.py +125 -0
  114. tallyman_data-0.0.1/tests/test_errors.py +181 -0
  115. tallyman_data-0.0.1/tests/test_fouc.py +105 -0
  116. tallyman_data-0.0.1/tests/test_git_state_guard.py +213 -0
  117. tallyman_data-0.0.1/tests/test_io.py +34 -0
  118. tallyman_data-0.0.1/tests/test_lineage.py +121 -0
  119. tallyman_data-0.0.1/tests/test_lineage_view.py +123 -0
  120. tallyman_data-0.0.1/tests/test_manifest.py +28 -0
  121. tallyman_data-0.0.1/tests/test_mcp_project_tools.py +307 -0
  122. tallyman_data-0.0.1/tests/test_mcp_stdio.py +117 -0
  123. tallyman_data-0.0.1/tests/test_mcp_tool.py +87 -0
  124. tallyman_data-0.0.1/tests/test_notebook.py +406 -0
  125. tallyman_data-0.0.1/tests/test_pack.py +101 -0
  126. tallyman_data-0.0.1/tests/test_paths.py +235 -0
  127. tallyman_data-0.0.1/tests/test_portable.py +114 -0
  128. tallyman_data-0.0.1/tests/test_post_processing.py +229 -0
  129. tallyman_data-0.0.1/tests/test_primary_key.py +53 -0
  130. tallyman_data-0.0.1/tests/test_project_switcher.py +345 -0
  131. tallyman_data-0.0.1/tests/test_promote_diff.py +436 -0
  132. tallyman_data-0.0.1/tests/test_prompts.py +56 -0
  133. tallyman_data-0.0.1/tests/test_replay.py +131 -0
  134. tallyman_data-0.0.1/tests/test_reset_to_revision.py +636 -0
  135. tallyman_data-0.0.1/tests/test_result_cache.py +81 -0
  136. tallyman_data-0.0.1/tests/test_serve.py +132 -0
  137. tallyman_data-0.0.1/tests/test_summary_stats.py +191 -0
  138. tallyman_data-0.0.1/uv.lock +3002 -0
@@ -0,0 +1,74 @@
1
+ name: tests
2
+
3
+ on:
4
+ push:
5
+ branches: [main, "spike/**"]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ # CI runs three sequential jobs: lint → fast tests → integration tests.
10
+ #
11
+ # - The React SPA (packages/app/) IS built in the fast job: the SPA-serving
12
+ # tests assert the real built index.html (#root div), so dist/ must exist
13
+ # or create_app's catch-all returns 503. The embed bundle (packages/embed/)
14
+ # is still NOT built — nothing serves it under test.
15
+ # - Integration tests spawn the real buckaroo Tornado subprocess via
16
+ # tallyman's BuckarooManager. buckaroo==0.14.6 is pinned in
17
+ # pyproject.toml as a normal PyPI dep, so plain `uv sync` is enough.
18
+
19
+ jobs:
20
+ lint:
21
+ name: ruff
22
+ runs-on: ubuntu-latest
23
+ steps:
24
+ - uses: actions/checkout@v4
25
+ - uses: astral-sh/setup-uv@v3
26
+ with:
27
+ enable-cache: true
28
+ - name: ruff check
29
+ # uvx runs the latest ruff without polluting the project venv;
30
+ # matches the local pre-push hook in ~/.claude/settings.json.
31
+ run: uvx ruff check
32
+
33
+ fast:
34
+ name: fast suite (excludes integration)
35
+ runs-on: ubuntu-latest
36
+ needs: lint
37
+ steps:
38
+ - uses: actions/checkout@v4
39
+ - uses: astral-sh/setup-uv@v3
40
+ with:
41
+ enable-cache: true
42
+ # The SPA-serving tests hit create_app's catch-all, which reads
43
+ # packages/app/dist/index.html. Build it here (pnpm 9 matches the
44
+ # lockfileVersion 9.0) so those routes return the page, not a 503.
45
+ - uses: pnpm/action-setup@v4
46
+ with:
47
+ version: 9
48
+ - uses: actions/setup-node@v4
49
+ with:
50
+ node-version: 20
51
+ cache: pnpm
52
+ cache-dependency-path: packages/app/pnpm-lock.yaml
53
+ - name: build React SPA
54
+ run: pnpm -C packages/app install --frozen-lockfile && pnpm -C packages/app build
55
+ - name: vitest (React unit)
56
+ run: pnpm -C packages/app test
57
+ - name: sync deps
58
+ run: uv sync
59
+ - name: pytest (fast)
60
+ # pyproject.toml's addopts already excludes `-m integration`;
61
+ # being explicit here so the job name matches the behaviour.
62
+ run: uv run pytest -m "not integration"
63
+
64
+ integration:
65
+ name: integration suite
66
+ runs-on: ubuntu-latest
67
+ needs: fast
68
+ steps:
69
+ - uses: actions/checkout@v4
70
+ - uses: astral-sh/setup-uv@v3
71
+ with:
72
+ enable-cache: true
73
+ - run: uv sync
74
+ - run: uv run pytest -m integration
@@ -0,0 +1,18 @@
1
+ .venv/
2
+ __pycache__/
3
+ *.pyc
4
+ *.egg-info/
5
+ dist/
6
+ build/
7
+ .ruff_cache/
8
+ .pytest_cache/
9
+ .DS_Store
10
+ node_modules/
11
+ src/tallyman_companion/static/*.map
12
+ # vite build output — rebuild via `cd packages/embed && pnpm build`
13
+ src/tallyman_companion/static/buckaroo-embed.js
14
+ src/tallyman_companion/static/buckaroo-embed.css
15
+ # xorq-backed catalog git repo (managed externally via `xorq catalog`)
16
+ catalog/
17
+ # Stray ./projects/ from anything run with TALLYMAN_HOME unset to repo root.
18
+ projects/
@@ -0,0 +1,13 @@
1
+ {
2
+ "mcpServers": {
3
+ "tallyman": {
4
+ "command": "uv",
5
+ "args": ["run", "tallyman", "mcp"],
6
+ "cwd": "/Users/paddy/tallyman",
7
+ "env": {
8
+ "TALLYMAN_PROJECT": "spike",
9
+ "TALLYMAN_COMPANION_URL": "http://127.0.0.1:7860"
10
+ }
11
+ }
12
+ }
13
+ }
@@ -0,0 +1,163 @@
1
+ Metadata-Version: 2.4
2
+ Name: tallyman-data
3
+ Version: 0.0.1
4
+ Summary: tallyman — deconstructed notebook platform
5
+ Requires-Python: <3.14,>=3.13
6
+ Requires-Dist: buckaroo==0.14.15
7
+ Requires-Dist: click>=8.1
8
+ Requires-Dist: fastapi>=0.115
9
+ Requires-Dist: fastmcp>=2.0
10
+ Requires-Dist: geopandas>=1.1.3
11
+ Requires-Dist: h3>=4.4.2
12
+ Requires-Dist: httpx>=0.27
13
+ Requires-Dist: ibis-framework>=12.0.0
14
+ Requires-Dist: markdown>=3.10.2
15
+ Requires-Dist: pandas>=2.2
16
+ Requires-Dist: pillow>=12.2.0
17
+ Requires-Dist: polars
18
+ Requires-Dist: pyarrow>=16
19
+ Requires-Dist: pydantic>=2.7
20
+ Requires-Dist: pygments>=2.20.0
21
+ Requires-Dist: scikit-learn<2.0,>=1.4
22
+ Requires-Dist: scipy>=1.17.1
23
+ Requires-Dist: seaborn>=0.13.2
24
+ Requires-Dist: sse-starlette>=2.1
25
+ Requires-Dist: tomli-w>=1.0
26
+ Requires-Dist: tornado>=6.0
27
+ Requires-Dist: uvicorn[standard]>=0.30
28
+ Requires-Dist: vl-convert-python>=1.9.0.post1
29
+ Requires-Dist: xorq==0.3.26
30
+ Description-Content-Type: text/markdown
31
+
32
+ # tallyman-notebooks
33
+
34
+ Spike for the Tallyman London 2026 talk *"The Future of Notebooks in a Claude Code World"*.
35
+ The plan and proposal live in `plan.md` / `proposal.md`. This README covers the V0 spike only.
36
+
37
+ ## V0 scope
38
+
39
+ End-to-end: a Claude Code MCP tool that compiles a xorq expression, materializes
40
+ a parquet to a content-hashed catalog entry on disk, and pushes a live update
41
+ to a browser companion via SSE.
42
+
43
+ What's working:
44
+
45
+ - **MCP tools:**
46
+ - Catalog: `catalog_run`, `catalog_load_parquet`, `catalog_create`,
47
+ `catalog_revise`, `catalog_alias`, `catalog_rename`, `catalog_unalias`,
48
+ `catalog_list`, `catalog_diff`.
49
+ - Notebook: `notebook_reorder`, `notebook_remove`, `notebook_edit_markdown`.
50
+ - **Companion** (FastAPI on `:7860`):
51
+ - `/catalog`, `/catalog/<hash>` or `/catalog/<alias>` — entry list and detail
52
+ with V_n chips, forensic history, and a link to internal lineage.
53
+ - `/notebook` — curated narrative: cells anchored on aliases, vertical layout,
54
+ inline markdown editor, ↑/↓ reorder, × remove.
55
+ - `/lineage` and `/lineage/<hash>` — catalog DAG (cross-entry parents derived
56
+ from `from_catalog`) and per-entry internal expression DAG. Pure SVG, no
57
+ Cytoscape dep.
58
+ - `/diff/<alias>[/<va>/<vb>]` — version diff with code diff, schema diff,
59
+ per-column stats, key-joined side-by-side, and head() side-by-side.
60
+ - `/errors/<id>` — build-failure detail.
61
+ - `/api/{entries,aliases,errors,notebook,lineage,catalog_dag}` — JSON.
62
+ - `/api/sse` — live updates (`new_entry`, `build_failed`, `alias_changed`,
63
+ `notebook_changed`).
64
+ - **Buckaroo subprocess** — `tallyman run` spawns `python -m buckaroo.server`
65
+ on `:8700` (falls back to a random port if busy), watches for the
66
+ `BUCKAROO_PORT=...` handshake, and lazily creates per-entry sessions on
67
+ first view by POSTing the entry's `xorq_build/` dir to Buckaroo's
68
+ `/load_expr` endpoint (PR 776) — sort/search push down to the xorq
69
+ backend rather than paging over a materialised parquet. The build dir
70
+ is expanded into a tmp copy first so `${TALLYMAN_PROJECT_ROOT}`
71
+ placeholders are resolved before xorq's loader sees them. Sessions are
72
+ persisted under `catalog/buckaroo_sessions.json` and invalidated by
73
+ start-time when Buckaroo restarts; tmp dirs are cleaned on
74
+ `BuckarooManager.stop()`. Tear-down rides along with the companion.
75
+ Disable with `--no-buckaroo`.
76
+ - **Build artifacts are portable.** xorq's absolute filesystem paths are
77
+ rewritten to `${TALLYMAN_PROJECT_ROOT}` on write and expanded back on load.
78
+ - **`tallyman serve <project_dir>`** — read-only companion against a project
79
+ directory that may live anywhere on disk. Mutation routes return 403.
80
+
81
+ What's NOT yet implemented (see `TICKETS.md` for the full punchlist):
82
+
83
+ 1. SortableJS drag-reorder for the notebook (current ↑/↓ buttons are the
84
+ accessibility fallback; drag is the headline UX).
85
+ 2. Column-level lineage (xorq has the data; current view is op-level only).
86
+ 3. `tallyman pack` / `tallyman replay` (today's hand-off is `cp -r` / `tar`).
87
+ 4. ML training pipeline (storyboard beats 7-8).
88
+
89
+ ## Running the spike
90
+
91
+ ```sh
92
+ uv sync
93
+ uv run tallyman init spike # creates ~/.tallyman/projects/spike/ + fixture
94
+ uv run tallyman run --project spike # edit-mode companion on http://127.0.0.1:7860
95
+ ```
96
+
97
+ The companion's dataframe embed is a Vite React library in `packages/embed/`
98
+ that builds into `src/tallyman_companion/static/buckaroo-embed.{js,css}`. The
99
+ build artifact is **not** committed — Node + pnpm are install-time
100
+ prerequisites:
101
+
102
+ ```sh
103
+ cd packages/embed && pnpm install && pnpm build # one-time per checkout
104
+ # or, for active embed development:
105
+ cd packages/embed && pnpm dev # vite build --watch
106
+ ```
107
+
108
+ Bump `buckaroo-js-core` in `packages/embed/package.json` to match the
109
+ Python `buckaroo` pin in `pyproject.toml` and rebuild whenever either
110
+ side moves.
111
+
112
+ In another terminal, launch Claude Code from this directory; it picks up
113
+ `.mcp.json` and exposes the `tallyman` MCP server.
114
+
115
+ Recommended prompts:
116
+
117
+ > Use catalog_load_parquet to load `orders.parquet`.
118
+ >
119
+ > Now use catalog_create to make a named entry `shoe_sales` that groups orders
120
+ > by region and totals the price.
121
+ >
122
+ > Now revise shoe_sales to filter to category == "boots" only.
123
+
124
+ Watch the browser update live as each tool call lands. Named entries float to
125
+ the top of the catalog with a V_n chip; the previous version sticks around in
126
+ forensic history.
127
+
128
+ ### Serving a project as an artifact
129
+
130
+ Once you've authored a project, hand it off:
131
+
132
+ ```sh
133
+ tar czf my-project.tgz -C ~/.tallyman/projects spike
134
+ # colleague extracts somewhere
135
+ tar xzf my-project.tgz -C ~/projects/
136
+ uv run tallyman serve ~/projects/spike
137
+ ```
138
+
139
+ The companion runs read-only: same catalog, same forensic history, no edit
140
+ affordances. Mutation routes return 403.
141
+
142
+ ## Conventions worth knowing
143
+
144
+ - xorq 0.3.x reads use `xo.deferred_read_parquet` (NOT `xo.read_parquet` — that
145
+ resolves through ibis's backend loader and fails). Use
146
+ `import xorq.api as xo` and `import xorq.vendor.ibis as ibis`. Do NOT
147
+ `import ibis` directly.
148
+ - Prefer `from tallyman_xorq.io import from_project; t = from_project("name.parquet")`
149
+ over absolute paths — the catalog records project-relative intent and the
150
+ build is portable across machines/users.
151
+ - Content hash is xorq's build hash — same code + same inputs → same hash → same
152
+ entry dir (idempotent).
153
+ - All catalog state lives on disk. The MCP server holds no in-memory state; the
154
+ companion only holds the SSE subscriber list.
155
+ - `TALLYMAN_PROJECT_PATH` overrides project_dir() resolution for the active project.
156
+ Used by `tallyman serve` to point at a project directory anywhere on disk.
157
+
158
+ ## Tests
159
+
160
+ ```sh
161
+ uv run pytest # 76 tests, ~5s
162
+ uv run pytest tests/test_portable.py # the portability proof
163
+ ```
@@ -0,0 +1,132 @@
1
+ # tallyman-notebooks
2
+
3
+ Spike for the Tallyman London 2026 talk *"The Future of Notebooks in a Claude Code World"*.
4
+ The plan and proposal live in `plan.md` / `proposal.md`. This README covers the V0 spike only.
5
+
6
+ ## V0 scope
7
+
8
+ End-to-end: a Claude Code MCP tool that compiles a xorq expression, materializes
9
+ a parquet to a content-hashed catalog entry on disk, and pushes a live update
10
+ to a browser companion via SSE.
11
+
12
+ What's working:
13
+
14
+ - **MCP tools:**
15
+ - Catalog: `catalog_run`, `catalog_load_parquet`, `catalog_create`,
16
+ `catalog_revise`, `catalog_alias`, `catalog_rename`, `catalog_unalias`,
17
+ `catalog_list`, `catalog_diff`.
18
+ - Notebook: `notebook_reorder`, `notebook_remove`, `notebook_edit_markdown`.
19
+ - **Companion** (FastAPI on `:7860`):
20
+ - `/catalog`, `/catalog/<hash>` or `/catalog/<alias>` — entry list and detail
21
+ with V_n chips, forensic history, and a link to internal lineage.
22
+ - `/notebook` — curated narrative: cells anchored on aliases, vertical layout,
23
+ inline markdown editor, ↑/↓ reorder, × remove.
24
+ - `/lineage` and `/lineage/<hash>` — catalog DAG (cross-entry parents derived
25
+ from `from_catalog`) and per-entry internal expression DAG. Pure SVG, no
26
+ Cytoscape dep.
27
+ - `/diff/<alias>[/<va>/<vb>]` — version diff with code diff, schema diff,
28
+ per-column stats, key-joined side-by-side, and head() side-by-side.
29
+ - `/errors/<id>` — build-failure detail.
30
+ - `/api/{entries,aliases,errors,notebook,lineage,catalog_dag}` — JSON.
31
+ - `/api/sse` — live updates (`new_entry`, `build_failed`, `alias_changed`,
32
+ `notebook_changed`).
33
+ - **Buckaroo subprocess** — `tallyman run` spawns `python -m buckaroo.server`
34
+ on `:8700` (falls back to a random port if busy), watches for the
35
+ `BUCKAROO_PORT=...` handshake, and lazily creates per-entry sessions on
36
+ first view by POSTing the entry's `xorq_build/` dir to Buckaroo's
37
+ `/load_expr` endpoint (PR 776) — sort/search push down to the xorq
38
+ backend rather than paging over a materialised parquet. The build dir
39
+ is expanded into a tmp copy first so `${TALLYMAN_PROJECT_ROOT}`
40
+ placeholders are resolved before xorq's loader sees them. Sessions are
41
+ persisted under `catalog/buckaroo_sessions.json` and invalidated by
42
+ start-time when Buckaroo restarts; tmp dirs are cleaned on
43
+ `BuckarooManager.stop()`. Tear-down rides along with the companion.
44
+ Disable with `--no-buckaroo`.
45
+ - **Build artifacts are portable.** xorq's absolute filesystem paths are
46
+ rewritten to `${TALLYMAN_PROJECT_ROOT}` on write and expanded back on load.
47
+ - **`tallyman serve <project_dir>`** — read-only companion against a project
48
+ directory that may live anywhere on disk. Mutation routes return 403.
49
+
50
+ What's NOT yet implemented (see `TICKETS.md` for the full punchlist):
51
+
52
+ 1. SortableJS drag-reorder for the notebook (current ↑/↓ buttons are the
53
+ accessibility fallback; drag is the headline UX).
54
+ 2. Column-level lineage (xorq has the data; current view is op-level only).
55
+ 3. `tallyman pack` / `tallyman replay` (today's hand-off is `cp -r` / `tar`).
56
+ 4. ML training pipeline (storyboard beats 7-8).
57
+
58
+ ## Running the spike
59
+
60
+ ```sh
61
+ uv sync
62
+ uv run tallyman init spike # creates ~/.tallyman/projects/spike/ + fixture
63
+ uv run tallyman run --project spike # edit-mode companion on http://127.0.0.1:7860
64
+ ```
65
+
66
+ The companion's dataframe embed is a Vite React library in `packages/embed/`
67
+ that builds into `src/tallyman_companion/static/buckaroo-embed.{js,css}`. The
68
+ build artifact is **not** committed — Node + pnpm are install-time
69
+ prerequisites:
70
+
71
+ ```sh
72
+ cd packages/embed && pnpm install && pnpm build # one-time per checkout
73
+ # or, for active embed development:
74
+ cd packages/embed && pnpm dev # vite build --watch
75
+ ```
76
+
77
+ Bump `buckaroo-js-core` in `packages/embed/package.json` to match the
78
+ Python `buckaroo` pin in `pyproject.toml` and rebuild whenever either
79
+ side moves.
80
+
81
+ In another terminal, launch Claude Code from this directory; it picks up
82
+ `.mcp.json` and exposes the `tallyman` MCP server.
83
+
84
+ Recommended prompts:
85
+
86
+ > Use catalog_load_parquet to load `orders.parquet`.
87
+ >
88
+ > Now use catalog_create to make a named entry `shoe_sales` that groups orders
89
+ > by region and totals the price.
90
+ >
91
+ > Now revise shoe_sales to filter to category == "boots" only.
92
+
93
+ Watch the browser update live as each tool call lands. Named entries float to
94
+ the top of the catalog with a V_n chip; the previous version sticks around in
95
+ forensic history.
96
+
97
+ ### Serving a project as an artifact
98
+
99
+ Once you've authored a project, hand it off:
100
+
101
+ ```sh
102
+ tar czf my-project.tgz -C ~/.tallyman/projects spike
103
+ # colleague extracts somewhere
104
+ tar xzf my-project.tgz -C ~/projects/
105
+ uv run tallyman serve ~/projects/spike
106
+ ```
107
+
108
+ The companion runs read-only: same catalog, same forensic history, no edit
109
+ affordances. Mutation routes return 403.
110
+
111
+ ## Conventions worth knowing
112
+
113
+ - xorq 0.3.x reads use `xo.deferred_read_parquet` (NOT `xo.read_parquet` — that
114
+ resolves through ibis's backend loader and fails). Use
115
+ `import xorq.api as xo` and `import xorq.vendor.ibis as ibis`. Do NOT
116
+ `import ibis` directly.
117
+ - Prefer `from tallyman_xorq.io import from_project; t = from_project("name.parquet")`
118
+ over absolute paths — the catalog records project-relative intent and the
119
+ build is portable across machines/users.
120
+ - Content hash is xorq's build hash — same code + same inputs → same hash → same
121
+ entry dir (idempotent).
122
+ - All catalog state lives on disk. The MCP server holds no in-memory state; the
123
+ companion only holds the SSE subscriber list.
124
+ - `TALLYMAN_PROJECT_PATH` overrides project_dir() resolution for the active project.
125
+ Used by `tallyman serve` to point at a project directory anywhere on disk.
126
+
127
+ ## Tests
128
+
129
+ ```sh
130
+ uv run pytest # 76 tests, ~5s
131
+ uv run pytest tests/test_portable.py # the portability proof
132
+ ```