stipple 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- stipple-0.1.0/.gitignore +32 -0
- stipple-0.1.0/LICENSE +21 -0
- stipple-0.1.0/PKG-INFO +87 -0
- stipple-0.1.0/README.md +42 -0
- stipple-0.1.0/TESTING.md +52 -0
- stipple-0.1.0/examples/dedup.ipynb +163 -0
- stipple-0.1.0/examples/embeddings.ipynb +176 -0
- stipple-0.1.0/examples/g0_smoke.ipynb +70 -0
- stipple-0.1.0/examples/g1_smoke.ipynb +80 -0
- stipple-0.1.0/examples/g2_smoke.ipynb +110 -0
- stipple-0.1.0/examples/g3_smoke.ipynb +152 -0
- stipple-0.1.0/examples/g4_smoke.ipynb +155 -0
- stipple-0.1.0/js/package-lock.json +1289 -0
- stipple-0.1.0/js/package.json +19 -0
- stipple-0.1.0/js/src/index.ts +701 -0
- stipple-0.1.0/js/src/shaders.ts +127 -0
- stipple-0.1.0/js/tsconfig.json +21 -0
- stipple-0.1.0/js/vite.config.ts +23 -0
- stipple-0.1.0/pyproject.toml +67 -0
- stipple-0.1.0/scripts/probe_examples.py +403 -0
- stipple-0.1.0/scripts/probe_g0.py +190 -0
- stipple-0.1.0/scripts/probe_g1.py +209 -0
- stipple-0.1.0/scripts/probe_g2.py +244 -0
- stipple-0.1.0/scripts/probe_g3.py +218 -0
- stipple-0.1.0/scripts/probe_g4.py +273 -0
- stipple-0.1.0/src/stipple/__init__.py +4 -0
- stipple-0.1.0/src/stipple/_static/index.js +11426 -0
- stipple-0.1.0/src/stipple/_static/index.js.map +1 -0
- stipple-0.1.0/src/stipple/widget.py +153 -0
- stipple-0.1.0/uv.lock +3020 -0
stipple-0.1.0/.gitignore
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.egg-info/
|
|
5
|
+
.pytest_cache/
|
|
6
|
+
.ruff_cache/
|
|
7
|
+
.venv/
|
|
8
|
+
dist/
|
|
9
|
+
build/
|
|
10
|
+
|
|
11
|
+
# Node
|
|
12
|
+
node_modules/
|
|
13
|
+
*.tsbuildinfo
|
|
14
|
+
|
|
15
|
+
# Vite output is built artifact, but tracked because anywidget loads from it
|
|
16
|
+
# Uncomment to exclude from git if we switch to a release-only build:
|
|
17
|
+
# src/stipple/_static/
|
|
18
|
+
|
|
19
|
+
# IDE / OS
|
|
20
|
+
.DS_Store
|
|
21
|
+
.vscode/
|
|
22
|
+
.idea/
|
|
23
|
+
|
|
24
|
+
# Jupyter
|
|
25
|
+
.ipynb_checkpoints/
|
|
26
|
+
.jupyter-probe/
|
|
27
|
+
|
|
28
|
+
# Probe diagnostic screenshots — regenerable via scripts/probe_*.py
|
|
29
|
+
scripts/*.png
|
|
30
|
+
scripts/g*-result.png
|
|
31
|
+
scripts/ex-*.png
|
|
32
|
+
scripts/sticky-*.png
|
stipple-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Steve Ledbetter
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
stipple-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: stipple
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: WebGPU scatter widget for Jupyter — render millions of points, lasso, return row indices to Python.
|
|
5
|
+
Project-URL: Homepage, https://github.com/smledbetter/stipple
|
|
6
|
+
Project-URL: Repository, https://github.com/smledbetter/stipple
|
|
7
|
+
Project-URL: Issues, https://github.com/smledbetter/stipple/issues
|
|
8
|
+
Project-URL: Changelog, https://github.com/smledbetter/stipple/releases
|
|
9
|
+
Author-email: Steve Ledbetter <smledbetter@gmail.com>
|
|
10
|
+
License: MIT
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Keywords: anywidget,jupyter,lasso,ml,scatter,visualization,webgpu
|
|
13
|
+
Classifier: Development Status :: 3 - Alpha
|
|
14
|
+
Classifier: Framework :: Jupyter
|
|
15
|
+
Classifier: Framework :: Jupyter :: JupyterLab
|
|
16
|
+
Classifier: Intended Audience :: Science/Research
|
|
17
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
18
|
+
Classifier: Operating System :: OS Independent
|
|
19
|
+
Classifier: Programming Language :: Python :: 3
|
|
20
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
24
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
25
|
+
Classifier: Topic :: Scientific/Engineering :: Visualization
|
|
26
|
+
Requires-Python: >=3.10
|
|
27
|
+
Requires-Dist: anywidget>=0.9
|
|
28
|
+
Requires-Dist: numpy>=1.26
|
|
29
|
+
Requires-Dist: pyarrow>=15
|
|
30
|
+
Requires-Dist: traitlets>=5
|
|
31
|
+
Provides-Extra: dev
|
|
32
|
+
Requires-Dist: build>=1.2; extra == 'dev'
|
|
33
|
+
Requires-Dist: ipywidgets>=8.1; extra == 'dev'
|
|
34
|
+
Requires-Dist: jupyterlab>=4.4; extra == 'dev'
|
|
35
|
+
Requires-Dist: pandas>=2.2; extra == 'dev'
|
|
36
|
+
Requires-Dist: playwright>=1.55; extra == 'dev'
|
|
37
|
+
Requires-Dist: pytest>=8; extra == 'dev'
|
|
38
|
+
Requires-Dist: ruff>=0.5; extra == 'dev'
|
|
39
|
+
Requires-Dist: twine>=5.0; extra == 'dev'
|
|
40
|
+
Provides-Extra: examples
|
|
41
|
+
Requires-Dist: ipywidgets>=8.1; extra == 'examples'
|
|
42
|
+
Requires-Dist: jupyterlab>=4.4; extra == 'examples'
|
|
43
|
+
Requires-Dist: scikit-learn>=1.4; extra == 'examples'
|
|
44
|
+
Description-Content-Type: text/markdown
|
|
45
|
+
|
|
46
|
+
# Stipple
|
|
47
|
+
|
|
48
|
+
WebGPU scatter widget for Jupyter. Render millions of points, lasso a region, get the row indices back in Python.
|
|
49
|
+
|
|
50
|
+
```python
|
|
51
|
+
from stipple import Stipple
|
|
52
|
+
w = Stipple(df, x="umap_0", y="umap_1", color="label")
|
|
53
|
+
w
|
|
54
|
+
# ... user lassos in widget ...
|
|
55
|
+
w.selected_indices # numpy array
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Status
|
|
59
|
+
|
|
60
|
+
Pre-release. Gate ladder:
|
|
61
|
+
|
|
62
|
+
- [x] **G0** — Package skeleton + 100-point WebGPU render
|
|
63
|
+
- [ ] **G1** — Arrow-over-comm transport (1M rows)
|
|
64
|
+
- [ ] **G2** — 1M scatter with pan/zoom + discrete color
|
|
65
|
+
- [ ] **G3** — 10M scale via chunked buffers
|
|
66
|
+
- [ ] **G4** — Polygon lasso → `selected_indices`
|
|
67
|
+
- [ ] **G5** — Example notebooks + cross-env test matrix
|
|
68
|
+
- [ ] **G6** — Wheel packaging
|
|
69
|
+
|
|
70
|
+
## Dev setup
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
uv sync --extra dev
|
|
74
|
+
cd js && npm install && npm run build && cd ..
|
|
75
|
+
uv run jupyter lab examples/
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
## Architecture
|
|
79
|
+
|
|
80
|
+
- Python: `anywidget.AnyWidget` subclass with `traitlets`. Arrow IPC over the comm channel for tensor/DataFrame transport.
|
|
81
|
+
- JS: Vite-built ESM bundle at `src/stipple/_static/index.js`. WebGPU scatter pipeline with instanced unit quads.
|
|
82
|
+
|
|
83
|
+
See `Projects/Research/Stipple/JUPYTER-WIDGET-RESEARCH.md` in the upstream Thinking vault for design rationale.
|
|
84
|
+
|
|
85
|
+
## License
|
|
86
|
+
|
|
87
|
+
MIT
|
stipple-0.1.0/README.md
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# Stipple
|
|
2
|
+
|
|
3
|
+
WebGPU scatter widget for Jupyter. Render millions of points, lasso a region, get the row indices back in Python.
|
|
4
|
+
|
|
5
|
+
```python
|
|
6
|
+
from stipple import Stipple
|
|
7
|
+
w = Stipple(df, x="umap_0", y="umap_1", color="label")
|
|
8
|
+
w
|
|
9
|
+
# ... user lassos in widget ...
|
|
10
|
+
w.selected_indices # numpy array
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Status
|
|
14
|
+
|
|
15
|
+
Pre-release. Gate ladder:
|
|
16
|
+
|
|
17
|
+
- [x] **G0** — Package skeleton + 100-point WebGPU render
|
|
18
|
+
- [ ] **G1** — Arrow-over-comm transport (1M rows)
|
|
19
|
+
- [ ] **G2** — 1M scatter with pan/zoom + discrete color
|
|
20
|
+
- [ ] **G3** — 10M scale via chunked buffers
|
|
21
|
+
- [ ] **G4** — Polygon lasso → `selected_indices`
|
|
22
|
+
- [ ] **G5** — Example notebooks + cross-env test matrix
|
|
23
|
+
- [ ] **G6** — Wheel packaging
|
|
24
|
+
|
|
25
|
+
## Dev setup
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
uv sync --extra dev
|
|
29
|
+
cd js && npm install && npm run build && cd ..
|
|
30
|
+
uv run jupyter lab examples/
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Architecture
|
|
34
|
+
|
|
35
|
+
- Python: `anywidget.AnyWidget` subclass with `traitlets`. Arrow IPC over the comm channel for tensor/DataFrame transport.
|
|
36
|
+
- JS: Vite-built ESM bundle at `src/stipple/_static/index.js`. WebGPU scatter pipeline with instanced unit quads.
|
|
37
|
+
|
|
38
|
+
See `Projects/Research/Stipple/JUPYTER-WIDGET-RESEARCH.md` in the upstream Thinking vault for design rationale.
|
|
39
|
+
|
|
40
|
+
## License
|
|
41
|
+
|
|
42
|
+
MIT
|
stipple-0.1.0/TESTING.md
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# Stipple env support matrix
|
|
2
|
+
|
|
3
|
+
The automated probes in `scripts/probe_g*.py` cover **JupyterLab on M4
|
|
4
|
+
Chromium with WebGPU flags** end-to-end. The matrix below is the manual
|
|
5
|
+
verification grid for the remaining environments. Fill in as tested.
|
|
6
|
+
|
|
7
|
+
| Environment | Status | Notes |
|
|
8
|
+
|---|---|---|
|
|
9
|
+
| JupyterLab / Chromium (probe) | ✓ verified | `scripts/probe_g4.py` green; FPS 60+ at 10M |
|
|
10
|
+
| JupyterLab / real Chrome 134+ | _pending_ | Open `examples/embeddings.ipynb`; check the lasso |
|
|
11
|
+
| JupyterLab / real Safari 26+ | _pending_ | Safari 26 ships WebGPU on Apple Silicon |
|
|
12
|
+
| VSCode notebook | _pending_ | Webview is Chromium-based; expected to work |
|
|
13
|
+
| Google Colab | _pending_ | Cross-origin output iframe — may fail; not load-bearing |
|
|
14
|
+
| JupyterHub | _pending_ | `anywidget` must be installed in the kernel env, not just the launch env |
|
|
15
|
+
|
|
16
|
+
## How to fill in a row manually
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
# Install the dev extras (jupyterlab + sklearn + ipywidgets, etc.)
|
|
20
|
+
uv sync --extra dev --extra examples
|
|
21
|
+
# Build the JS bundle if you haven't yet
|
|
22
|
+
cd js && npm install && npm run build && cd ..
|
|
23
|
+
# Launch
|
|
24
|
+
uv run jupyter lab examples/embeddings.ipynb
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
Then in the browser:
|
|
28
|
+
1. Run all cells
|
|
29
|
+
2. Shift+drag in the widget canvas
|
|
30
|
+
3. Run the inspection cell — `w.selected_indices` should be populated
|
|
31
|
+
|
|
32
|
+
For Safari, navigate manually to the JupyterLab URL printed in the terminal.
|
|
33
|
+
|
|
34
|
+
For Colab: `pip install stipple` inside a Colab cell, then upload the example
|
|
35
|
+
notebook. Per the upstream research note this row is the load-bearing one
|
|
36
|
+
for "covers most ML researchers"; treat any failure as expected and document
|
|
37
|
+
the symptom.
|
|
38
|
+
|
|
39
|
+
For VSCode: open the example notebook directly in VSCode (Python +
|
|
40
|
+
Jupyter extensions installed), select the `.venv` interpreter, run cells.
|
|
41
|
+
|
|
42
|
+
## Known caveats
|
|
43
|
+
|
|
44
|
+
- `widget model not found` console errors during initial widget setup are
|
|
45
|
+
benign — JupyterLab's manager probes for the model before the comm
|
|
46
|
+
channel completes registration. The final view is properly connected,
|
|
47
|
+
proven by the bidirectional trait sync.
|
|
48
|
+
- WebGPU is gated behind a permissions banner in some Chrome
|
|
49
|
+
configurations; if no adapter is found, check `chrome://flags` for
|
|
50
|
+
`#enable-unsafe-webgpu` and Linux users may also need `#enable-vulkan`.
|
|
51
|
+
- The widget requires HTTPS or `localhost` — WebGPU is blocked on plain
|
|
52
|
+
HTTP origins outside localhost.
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
{
|
|
2
|
+
"cells": [
|
|
3
|
+
{
|
|
4
|
+
"cell_type": "markdown",
|
|
5
|
+
"id": "422f3f70",
|
|
6
|
+
"metadata": {},
|
|
7
|
+
"source": [
|
|
8
|
+
"# Near-duplicate discovery with Stipple\n",
|
|
9
|
+
"\n",
|
|
10
|
+
"**Recipe:** render an embedding of your dataset, look for tight micro-clusters in the projection, lasso them to recover the row indices of suspected near-duplicates.\n",
|
|
11
|
+
"\n",
|
|
12
|
+
"This example uses **synthetic** 2D embeddings with planted near-duplicate clusters so the visualization is reproducible. In real use you'd substitute MinHash-LSH projections, sentence-embedding projections, etc.\n",
|
|
13
|
+
"\n",
|
|
14
|
+
"## Setup\n",
|
|
15
|
+
"200k synthetic 'documents' embedded in 2D. 195k are diffuse Gaussian noise; 5k form 50 tight planted near-duplicate clusters of size ~100, scattered across the embedding space."
|
|
16
|
+
]
|
|
17
|
+
},
|
|
18
|
+
{
|
|
19
|
+
"cell_type": "code",
|
|
20
|
+
"execution_count": 1,
|
|
21
|
+
"id": "74795c66",
|
|
22
|
+
"metadata": {},
|
|
23
|
+
"outputs": [],
|
|
24
|
+
"source": [
|
|
25
|
+
"import numpy as np\n",
|
|
26
|
+
"from stipple import Stipple"
|
|
27
|
+
]
|
|
28
|
+
},
|
|
29
|
+
{
|
|
30
|
+
"cell_type": "code",
|
|
31
|
+
"execution_count": 2,
|
|
32
|
+
"id": "5b1051e5",
|
|
33
|
+
"metadata": {},
|
|
34
|
+
"outputs": [
|
|
35
|
+
{
|
|
36
|
+
"name": "stdout",
|
|
37
|
+
"output_type": "stream",
|
|
38
|
+
"text": [
|
|
39
|
+
"200,000 total points · 195,000 background · 5,000 planted duplicates in 50 clusters\n",
|
|
40
|
+
"Planted-dup indices: [195,000, 200,000)\n"
|
|
41
|
+
]
|
|
42
|
+
}
|
|
43
|
+
],
|
|
44
|
+
"source": [
|
|
45
|
+
"rng = np.random.default_rng(0)\n",
|
|
46
|
+
"n_noise = 195_000\n",
|
|
47
|
+
"n_clusters = 50\n",
|
|
48
|
+
"per_cluster = 100\n",
|
|
49
|
+
"n_dups = n_clusters * per_cluster\n",
|
|
50
|
+
"n = n_noise + n_dups\n",
|
|
51
|
+
"\n",
|
|
52
|
+
"# Diffuse 'unique' documents (background)\n",
|
|
53
|
+
"x_noise = rng.standard_normal(n_noise).astype(np.float32) * 3.0\n",
|
|
54
|
+
"y_noise = rng.standard_normal(n_noise).astype(np.float32) * 3.0\n",
|
|
55
|
+
"\n",
|
|
56
|
+
"# Planted near-duplicate clusters: 50 cluster centers in the same range,\n",
|
|
57
|
+
"# each producing ~100 points within a tight (~0.05) Gaussian.\n",
|
|
58
|
+
"centers = rng.standard_normal((n_clusters, 2)).astype(np.float32) * 3.0\n",
|
|
59
|
+
"x_dups = np.empty(n_dups, dtype=np.float32)\n",
|
|
60
|
+
"y_dups = np.empty(n_dups, dtype=np.float32)\n",
|
|
61
|
+
"for k in range(n_clusters):\n",
|
|
62
|
+
" sl = slice(k * per_cluster, (k + 1) * per_cluster)\n",
|
|
63
|
+
" x_dups[sl] = centers[k, 0] + rng.standard_normal(per_cluster).astype(np.float32) * 0.04\n",
|
|
64
|
+
" y_dups[sl] = centers[k, 1] + rng.standard_normal(per_cluster).astype(np.float32) * 0.04\n",
|
|
65
|
+
"\n",
|
|
66
|
+
"x = np.concatenate([x_noise, x_dups])\n",
|
|
67
|
+
"y = np.concatenate([y_noise, y_dups])\n",
|
|
68
|
+
"labels = np.concatenate([\n",
|
|
69
|
+
" np.full(n_noise, 'noise'),\n",
|
|
70
|
+
" np.full(n_dups, 'planted-dup'),\n",
|
|
71
|
+
"])\n",
|
|
72
|
+
"\n",
|
|
73
|
+
"print(f'{n:,} total points · {n_noise:,} background · {n_dups:,} planted duplicates in {n_clusters} clusters')\n",
|
|
74
|
+
"print(f'Planted-dup indices: [{n_noise:,}, {n:,})')"
|
|
75
|
+
]
|
|
76
|
+
},
|
|
77
|
+
{
|
|
78
|
+
"cell_type": "code",
|
|
79
|
+
"execution_count": 3,
|
|
80
|
+
"id": "f8b250d8",
|
|
81
|
+
"metadata": {},
|
|
82
|
+
"outputs": [
|
|
83
|
+
{
|
|
84
|
+
"data": {
|
|
85
|
+
"application/vnd.jupyter.widget-view+json": {
|
|
86
|
+
"model_id": "78b3bef30ddc4f1ca49b2f5421a706f6",
|
|
87
|
+
"version_major": 2,
|
|
88
|
+
"version_minor": 1
|
|
89
|
+
},
|
|
90
|
+
"text/plain": [
|
|
91
|
+
"<stipple.widget.Stipple object at 0x1096a0680>"
|
|
92
|
+
]
|
|
93
|
+
},
|
|
94
|
+
"execution_count": 3,
|
|
95
|
+
"metadata": {},
|
|
96
|
+
"output_type": "execute_result"
|
|
97
|
+
}
|
|
98
|
+
],
|
|
99
|
+
"source": [
|
|
100
|
+
"w = Stipple(x=x, y=y, color=labels)\n",
|
|
101
|
+
"w"
|
|
102
|
+
]
|
|
103
|
+
},
|
|
104
|
+
{
|
|
105
|
+
"cell_type": "markdown",
|
|
106
|
+
"id": "d5e64cbe",
|
|
107
|
+
"metadata": {},
|
|
108
|
+
"source": [
|
|
109
|
+
"**Try:** zoom into a high-density spot (wheel up to zoom). The planted duplicate clusters appear as tight orange specks against the blue background haze. Shift+drag around one to lasso it."
|
|
110
|
+
]
|
|
111
|
+
},
|
|
112
|
+
{
|
|
113
|
+
"cell_type": "code",
|
|
114
|
+
"execution_count": 4,
|
|
115
|
+
"id": "ae4ca0bb",
|
|
116
|
+
"metadata": {},
|
|
117
|
+
"outputs": [
|
|
118
|
+
{
|
|
119
|
+
"name": "stdout",
|
|
120
|
+
"output_type": "stream",
|
|
121
|
+
"text": [
|
|
122
|
+
"No selection yet. Zoom into a tight cluster and shift+drag to lasso it.\n"
|
|
123
|
+
]
|
|
124
|
+
}
|
|
125
|
+
],
|
|
126
|
+
"source": [
|
|
127
|
+
"idx = w.selected_indices\n",
|
|
128
|
+
"if len(idx) == 0:\n",
|
|
129
|
+
" print('No selection yet. Zoom into a tight cluster and shift+drag to lasso it.')\n",
|
|
130
|
+
"else:\n",
|
|
131
|
+
" planted = (idx >= n_noise).sum()\n",
|
|
132
|
+
" purity = planted / max(1, len(idx))\n",
|
|
133
|
+
" print(f'Selected {len(idx):,} points · {purity:.1%} from planted-dup region')\n",
|
|
134
|
+
" if planted > 0:\n",
|
|
135
|
+
" # Show which planted cluster(s) we hit\n",
|
|
136
|
+
" local = idx[idx >= n_noise] - n_noise\n",
|
|
137
|
+
" clusters = np.unique(local // per_cluster)\n",
|
|
138
|
+
" print(f'Planted clusters hit: {clusters.tolist()}')"
|
|
139
|
+
]
|
|
140
|
+
}
|
|
141
|
+
],
|
|
142
|
+
"metadata": {
|
|
143
|
+
"kernelspec": {
|
|
144
|
+
"display_name": "Python 3 (ipykernel)",
|
|
145
|
+
"language": "python",
|
|
146
|
+
"name": "python3"
|
|
147
|
+
},
|
|
148
|
+
"language_info": {
|
|
149
|
+
"codemirror_mode": {
|
|
150
|
+
"name": "ipython",
|
|
151
|
+
"version": 3
|
|
152
|
+
},
|
|
153
|
+
"file_extension": ".py",
|
|
154
|
+
"mimetype": "text/x-python",
|
|
155
|
+
"name": "python",
|
|
156
|
+
"nbconvert_exporter": "python",
|
|
157
|
+
"pygments_lexer": "ipython3",
|
|
158
|
+
"version": "3.12.13"
|
|
159
|
+
}
|
|
160
|
+
},
|
|
161
|
+
"nbformat": 4,
|
|
162
|
+
"nbformat_minor": 5
|
|
163
|
+
}
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
{
|
|
2
|
+
"cells": [
|
|
3
|
+
{
|
|
4
|
+
"cell_type": "markdown",
|
|
5
|
+
"id": "82f186fa",
|
|
6
|
+
"metadata": {},
|
|
7
|
+
"source": [
|
|
8
|
+
"# Embedding visualization with Stipple\n",
|
|
9
|
+
"\n",
|
|
10
|
+
"Render an embedding projection, lasso a region of interest, get the row indices back in Python — the canonical ML-researcher workflow.\n",
|
|
11
|
+
"\n",
|
|
12
|
+
"This example uses the `sklearn` digits dataset (1797 samples, 8×8 grayscale digits 0–9). PCA → 2D for display. Replace with your own embedding + UMAP / t-SNE / PCA in real use.\n",
|
|
13
|
+
"\n",
|
|
14
|
+
"## Workflow\n",
|
|
15
|
+
"1. Compute (or load) a 2-D embedding\n",
|
|
16
|
+
"2. Pass `x`, `y`, `color` to `Stipple(...)`\n",
|
|
17
|
+
"3. Shift+drag in the canvas to lasso a region\n",
|
|
18
|
+
"4. Read `w.selected_indices` back in Python\n",
|
|
19
|
+
"5. Do something with those rows: inspect, retrain on, etc."
|
|
20
|
+
]
|
|
21
|
+
},
|
|
22
|
+
{
|
|
23
|
+
"cell_type": "code",
|
|
24
|
+
"execution_count": 10,
|
|
25
|
+
"id": "6362f475",
|
|
26
|
+
"metadata": {},
|
|
27
|
+
"outputs": [],
|
|
28
|
+
"source": [
|
|
29
|
+
"import numpy as np\n",
|
|
30
|
+
"from sklearn.datasets import load_digits\n",
|
|
31
|
+
"from sklearn.decomposition import PCA\n",
|
|
32
|
+
"from stipple import Stipple"
|
|
33
|
+
]
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
"cell_type": "code",
|
|
37
|
+
"execution_count": 11,
|
|
38
|
+
"id": "9baf56cf",
|
|
39
|
+
"metadata": {},
|
|
40
|
+
"outputs": [
|
|
41
|
+
{
|
|
42
|
+
"name": "stdout",
|
|
43
|
+
"output_type": "stream",
|
|
44
|
+
"text": [
|
|
45
|
+
"1,797 samples · 64 features → 2D projection\n"
|
|
46
|
+
]
|
|
47
|
+
}
|
|
48
|
+
],
|
|
49
|
+
"source": [
|
|
50
|
+
"digits = load_digits()\n",
|
|
51
|
+
"X, y = digits.data, digits.target\n",
|
|
52
|
+
"proj = PCA(n_components=2).fit_transform(X)\n",
|
|
53
|
+
"labels = np.array([str(d) for d in y])\n",
|
|
54
|
+
"print(f'{X.shape[0]:,} samples · {X.shape[1]} features → 2D projection')"
|
|
55
|
+
]
|
|
56
|
+
},
|
|
57
|
+
{
|
|
58
|
+
"cell_type": "code",
|
|
59
|
+
"execution_count": 12,
|
|
60
|
+
"id": "4c0ae8ac",
|
|
61
|
+
"metadata": {},
|
|
62
|
+
"outputs": [
|
|
63
|
+
{
|
|
64
|
+
"data": {
|
|
65
|
+
"application/vnd.jupyter.widget-view+json": {
|
|
66
|
+
"model_id": "9e0ae961241f43ae8f5207d354537922",
|
|
67
|
+
"version_major": 2,
|
|
68
|
+
"version_minor": 1
|
|
69
|
+
},
|
|
70
|
+
"text/plain": [
|
|
71
|
+
"<stipple.widget.Stipple object at 0x114711430>"
|
|
72
|
+
]
|
|
73
|
+
},
|
|
74
|
+
"execution_count": 12,
|
|
75
|
+
"metadata": {},
|
|
76
|
+
"output_type": "execute_result"
|
|
77
|
+
}
|
|
78
|
+
],
|
|
79
|
+
"source": [
|
|
80
|
+
"w = Stipple(x=proj[:, 0], y=proj[:, 1], color=labels)\n",
|
|
81
|
+
"w"
|
|
82
|
+
]
|
|
83
|
+
},
|
|
84
|
+
{
|
|
85
|
+
"cell_type": "code",
|
|
86
|
+
"execution_count": 13,
|
|
87
|
+
"id": "039f6ecb-8488-486a-969a-09a82ca3bf2d",
|
|
88
|
+
"metadata": {},
|
|
89
|
+
"outputs": [
|
|
90
|
+
{
|
|
91
|
+
"name": "stdout",
|
|
92
|
+
"output_type": "stream",
|
|
93
|
+
"text": [
|
|
94
|
+
"len: 235 count: 1 id: 4637922352\n"
|
|
95
|
+
]
|
|
96
|
+
}
|
|
97
|
+
],
|
|
98
|
+
"source": [
|
|
99
|
+
" print(\"len:\", len(w.selected_indices), \"count:\", w.selection_count, \"id:\", id(w))"
|
|
100
|
+
]
|
|
101
|
+
},
|
|
102
|
+
{
|
|
103
|
+
"cell_type": "markdown",
|
|
104
|
+
"id": "8691db73",
|
|
105
|
+
"metadata": {},
|
|
106
|
+
"source": [
|
|
107
|
+
"Shift+drag in the canvas above to lasso a cluster of digits. Then run the next cell to inspect what you selected."
|
|
108
|
+
]
|
|
109
|
+
},
|
|
110
|
+
{
|
|
111
|
+
"cell_type": "code",
|
|
112
|
+
"execution_count": 9,
|
|
113
|
+
"id": "8ed86bac",
|
|
114
|
+
"metadata": {},
|
|
115
|
+
"outputs": [
|
|
116
|
+
{
|
|
117
|
+
"name": "stdout",
|
|
118
|
+
"output_type": "stream",
|
|
119
|
+
"text": [
|
|
120
|
+
"No selection yet. Shift+drag in the widget above to lasso a region, then re-run this cell.\n"
|
|
121
|
+
]
|
|
122
|
+
}
|
|
123
|
+
],
|
|
124
|
+
"source": [
|
|
125
|
+
"import collections\n",
|
|
126
|
+
"idx = w.selected_indices\n",
|
|
127
|
+
"if len(idx) == 0:\n",
|
|
128
|
+
" print('No selection yet. Shift+drag in the widget above to lasso a region, then re-run this cell.')\n",
|
|
129
|
+
"else:\n",
|
|
130
|
+
" counts = collections.Counter(y[idx].tolist())\n",
|
|
131
|
+
" print(f'Selected {len(idx):,} samples')\n",
|
|
132
|
+
" print('Digit distribution:')\n",
|
|
133
|
+
" for digit, c in sorted(counts.items()):\n",
|
|
134
|
+
" bar = '█' * int(40 * c / max(counts.values()))\n",
|
|
135
|
+
" print(f' {digit}: {c:>4} {bar}')"
|
|
136
|
+
]
|
|
137
|
+
},
|
|
138
|
+
{
|
|
139
|
+
"cell_type": "code",
|
|
140
|
+
"execution_count": null,
|
|
141
|
+
"id": "c80f9f41-ed32-467e-8500-50bf745c2108",
|
|
142
|
+
"metadata": {},
|
|
143
|
+
"outputs": [],
|
|
144
|
+
"source": []
|
|
145
|
+
},
|
|
146
|
+
{
|
|
147
|
+
"cell_type": "code",
|
|
148
|
+
"execution_count": null,
|
|
149
|
+
"id": "b104fd5c-ab21-4202-8dd4-e41c8fb064e9",
|
|
150
|
+
"metadata": {},
|
|
151
|
+
"outputs": [],
|
|
152
|
+
"source": []
|
|
153
|
+
}
|
|
154
|
+
],
|
|
155
|
+
"metadata": {
|
|
156
|
+
"kernelspec": {
|
|
157
|
+
"display_name": "Python 3 (ipykernel)",
|
|
158
|
+
"language": "python",
|
|
159
|
+
"name": "python3"
|
|
160
|
+
},
|
|
161
|
+
"language_info": {
|
|
162
|
+
"codemirror_mode": {
|
|
163
|
+
"name": "ipython",
|
|
164
|
+
"version": 3
|
|
165
|
+
},
|
|
166
|
+
"file_extension": ".py",
|
|
167
|
+
"mimetype": "text/x-python",
|
|
168
|
+
"name": "python",
|
|
169
|
+
"nbconvert_exporter": "python",
|
|
170
|
+
"pygments_lexer": "ipython3",
|
|
171
|
+
"version": "3.12.13"
|
|
172
|
+
}
|
|
173
|
+
},
|
|
174
|
+
"nbformat": 4,
|
|
175
|
+
"nbformat_minor": 5
|
|
176
|
+
}
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
{
|
|
2
|
+
"cells": [
|
|
3
|
+
{
|
|
4
|
+
"cell_type": "markdown",
|
|
5
|
+
"id": "467aa9d1",
|
|
6
|
+
"metadata": {},
|
|
7
|
+
"source": [
|
|
8
|
+
"# Stipple G0 smoke test\n",
|
|
9
|
+
"\n",
|
|
10
|
+
"Verifies the package skeleton + WebGPU pipeline render 100 points."
|
|
11
|
+
]
|
|
12
|
+
},
|
|
13
|
+
{
|
|
14
|
+
"cell_type": "code",
|
|
15
|
+
"execution_count": null,
|
|
16
|
+
"id": "1607176b",
|
|
17
|
+
"metadata": {},
|
|
18
|
+
"outputs": [],
|
|
19
|
+
"source": [
|
|
20
|
+
"from stipple import Stipple\n",
|
|
21
|
+
"import stipple\n",
|
|
22
|
+
"print(f\"stipple {stipple.__version__}\")"
|
|
23
|
+
]
|
|
24
|
+
},
|
|
25
|
+
{
|
|
26
|
+
"cell_type": "code",
|
|
27
|
+
"execution_count": null,
|
|
28
|
+
"id": "273f5a85",
|
|
29
|
+
"metadata": {},
|
|
30
|
+
"outputs": [],
|
|
31
|
+
"source": [
|
|
32
|
+
"w = Stipple()\n",
|
|
33
|
+
"w"
|
|
34
|
+
]
|
|
35
|
+
},
|
|
36
|
+
{
|
|
37
|
+
"cell_type": "code",
|
|
38
|
+
"execution_count": null,
|
|
39
|
+
"id": "9e0c662c",
|
|
40
|
+
"metadata": {},
|
|
41
|
+
"outputs": [],
|
|
42
|
+
"source": [
|
|
43
|
+
"print(f\"status: {w.status}\")\n",
|
|
44
|
+
"print(f\"adapter_info: {w.adapter_info}\")\n",
|
|
45
|
+
"print(f\"error: {w.error}\")"
|
|
46
|
+
]
|
|
47
|
+
}
|
|
48
|
+
],
|
|
49
|
+
"metadata": {
|
|
50
|
+
"kernelspec": {
|
|
51
|
+
"display_name": "Python 3 (ipykernel)",
|
|
52
|
+
"language": "python",
|
|
53
|
+
"name": "python3"
|
|
54
|
+
},
|
|
55
|
+
"language_info": {
|
|
56
|
+
"codemirror_mode": {
|
|
57
|
+
"name": "ipython",
|
|
58
|
+
"version": 3
|
|
59
|
+
},
|
|
60
|
+
"file_extension": ".py",
|
|
61
|
+
"mimetype": "text/x-python",
|
|
62
|
+
"name": "python",
|
|
63
|
+
"nbconvert_exporter": "python",
|
|
64
|
+
"pygments_lexer": "ipython3",
|
|
65
|
+
"version": "3.12.13"
|
|
66
|
+
}
|
|
67
|
+
},
|
|
68
|
+
"nbformat": 4,
|
|
69
|
+
"nbformat_minor": 5
|
|
70
|
+
}
|