tb-like 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tb_like-0.1.0/.gitignore +17 -0
- tb_like-0.1.0/.python-version +1 -0
- tb_like-0.1.0/LICENSE +21 -0
- tb_like-0.1.0/PKG-INFO +121 -0
- tb_like-0.1.0/README.md +92 -0
- tb_like-0.1.0/pyproject.toml +44 -0
- tb_like-0.1.0/tblike/__init__.py +8 -0
- tb_like-0.1.0/tblike/cli.py +201 -0
- tb_like-0.1.0/tblike/convert.py +264 -0
- tb_like-0.1.0/tblike/downsample.py +50 -0
- tb_like-0.1.0/tblike/events.py +195 -0
- tb_like-0.1.0/tblike/server.py +109 -0
- tb_like-0.1.0/tblike/static/app.js +893 -0
- tb_like-0.1.0/tblike/static/index.html +84 -0
- tb_like-0.1.0/tblike/static/style.css +195 -0
- tb_like-0.1.0/tblike/store.py +181 -0
- tb_like-0.1.0/tblike/watcher.py +102 -0
- tb_like-0.1.0/uv.lock +961 -0
tb_like-0.1.0/.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.12
|
tb_like-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Misha Kindulov
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
tb_like-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tb-like
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A faster, columnar TensorBoard-style scalar viewer for many series and many runs.
|
|
5
|
+
Project-URL: Homepage, https://github.com/b0nce/tb_like
|
|
6
|
+
Project-URL: Repository, https://github.com/b0nce/tb_like
|
|
7
|
+
Project-URL: Issues, https://github.com/b0nce/tb_like/issues
|
|
8
|
+
Author-email: Misha Kindulov <kindulov.ml@gmail.com>
|
|
9
|
+
License-Expression: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: machine-learning,metrics,parquet,plotly,tensorboard,visualization
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Framework :: FastAPI
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering :: Visualization
|
|
18
|
+
Requires-Python: >=3.12
|
|
19
|
+
Requires-Dist: fastapi>=0.137.1
|
|
20
|
+
Requires-Dist: joblib>=1.5.3
|
|
21
|
+
Requires-Dist: numpy>=1.26
|
|
22
|
+
Requires-Dist: plotly>=6.8.0
|
|
23
|
+
Requires-Dist: polars>=1.41.2
|
|
24
|
+
Requires-Dist: pyarrow>=24.0.0
|
|
25
|
+
Requires-Dist: tensorboard>=2.20.0
|
|
26
|
+
Requires-Dist: tqdm>=4.68.2
|
|
27
|
+
Requires-Dist: uvicorn[standard]>=0.49.0
|
|
28
|
+
Description-Content-Type: text/markdown
|
|
29
|
+
|
|
30
|
+
# tb_like
|
|
31
|
+
|
|
32
|
+
A faster, **columnar** TensorBoard-style scalar viewer — built for **many series and many runs**.
|
|
33
|
+
|
|
34
|
+
TensorBoard re-parses event files on demand and gets slow when a run has tens of
|
|
35
|
+
thousands of scalar series across hundreds of experiments. `tb_like` instead
|
|
36
|
+
converts TensorBoard event files into per-run **Parquet** once, then serves
|
|
37
|
+
downsampled series on demand to a fast Plotly dashboard. New events are picked
|
|
38
|
+
up incrementally in the background.
|
|
39
|
+
|
|
40
|
+
## Why it's fast
|
|
41
|
+
|
|
42
|
+
- **Convert once, read many.** Each run's `events.out.tfevents.*` are parsed into
|
|
43
|
+
a columnar Parquet file sorted by `(tag, step)`, with row-group statistics so a
|
|
44
|
+
query for a few tags only touches the matching row groups — even when a run has
|
|
45
|
+
~18k series.
|
|
46
|
+
- **Incremental & idempotent.** Ingestion tracks each event file's size and record
|
|
47
|
+
count, so re-scans only parse new data. A background watcher keeps the cache in
|
|
48
|
+
sync; parsing is parallelized across event files with `joblib`.
|
|
49
|
+
- **Lazy, prioritized rendering.** The dashboard renders charts only as they
|
|
50
|
+
scroll into view, fetched through a priority queue (visible first, then
|
|
51
|
+
neighbors, biased toward the scroll direction).
|
|
52
|
+
- **LTTB downsampling** keeps long curves cheap to draw without losing their shape.
|
|
53
|
+
|
|
54
|
+
## Install
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
pip install tb-like
|
|
58
|
+
# or
|
|
59
|
+
uv tool install tb-like
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## Quick start
|
|
63
|
+
|
|
64
|
+
A "run" is a directory containing `events.out.tfevents.*` files. Point `tb_like`
|
|
65
|
+
at a directory of runs and open the dashboard — that's it:
|
|
66
|
+
|
|
67
|
+
```
|
|
68
|
+
my_runs/
|
|
69
|
+
run_a/ events.out.tfevents.* config.yaml
|
|
70
|
+
run_b/ events.out.tfevents.*
|
|
71
|
+
...
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
tblike my_runs --port 8000 --jobs 8
|
|
76
|
+
# open http://127.0.0.1:8000
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
The background watcher discovers runs under the folder, converts any that
|
|
80
|
+
changed to Parquet (parsing event files across `--jobs` worker processes), keeps
|
|
81
|
+
the cache in sync, and serves them — no separate build step. The cache lives in
|
|
82
|
+
`<runs_dir>/.tblike_cache` by default (override with `--cache-dir`).
|
|
83
|
+
|
|
84
|
+
## Dashboard features
|
|
85
|
+
|
|
86
|
+
- Hierarchical, searchable **tag tree** (regex filter) with smart grouping:
|
|
87
|
+
path compression of `a.b.c` chains, numeric-enumeration collapsing
|
|
88
|
+
(`…expert_idx_∗`), and layer indices kept as their own levels.
|
|
89
|
+
- Multi-run overlay, unified hover, EMA smoothing, log-y, step vs. relative-time
|
|
90
|
+
x-axis, and **outlier clipping** by value percentiles.
|
|
91
|
+
- Collapsible per-group chart sections, resizable sidebar, and a one-click
|
|
92
|
+
**Refresh selected** that re-ingests from disk and rebuilds the plots.
|
|
93
|
+
|
|
94
|
+
## CLI
|
|
95
|
+
|
|
96
|
+
```
|
|
97
|
+
tblike <runs_dir> [--port P] [--host H] [--cache-dir D] [-j JOBS] [--no-watch]
|
|
98
|
+
# the main command: serve + auto-ingest
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
Advanced / scriptable subcommands:
|
|
102
|
+
|
|
103
|
+
```
|
|
104
|
+
tblike convert RUN_DIR [RUN_ID] [-j JOBS] # ingest one run into Parquet (one-off)
|
|
105
|
+
tblike scan # one incremental ingest pass, no server
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
## How it stores data
|
|
109
|
+
|
|
110
|
+
```
|
|
111
|
+
cache/<run_id>/
|
|
112
|
+
data/seg-00000.parquet # one immutable segment per ingest pass
|
|
113
|
+
index.json # tags, per-file ingest state, metadata
|
|
114
|
+
meta.json # tiny summary used for fast run listing
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
Reads union all segments and de-duplicate `(tag, step)` by latest `wall_time`.
|
|
118
|
+
|
|
119
|
+
## License
|
|
120
|
+
|
|
121
|
+
MIT — see [LICENSE](LICENSE).
|
tb_like-0.1.0/README.md
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# tb_like
|
|
2
|
+
|
|
3
|
+
A faster, **columnar** TensorBoard-style scalar viewer — built for **many series and many runs**.
|
|
4
|
+
|
|
5
|
+
TensorBoard re-parses event files on demand and gets slow when a run has tens of
|
|
6
|
+
thousands of scalar series across hundreds of experiments. `tb_like` instead
|
|
7
|
+
converts TensorBoard event files into per-run **Parquet** once, then serves
|
|
8
|
+
downsampled series on demand to a fast Plotly dashboard. New events are picked
|
|
9
|
+
up incrementally in the background.
|
|
10
|
+
|
|
11
|
+
## Why it's fast
|
|
12
|
+
|
|
13
|
+
- **Convert once, read many.** Each run's `events.out.tfevents.*` are parsed into
|
|
14
|
+
a columnar Parquet file sorted by `(tag, step)`, with row-group statistics so a
|
|
15
|
+
query for a few tags only touches the matching row groups — even when a run has
|
|
16
|
+
~18k series.
|
|
17
|
+
- **Incremental & idempotent.** Ingestion tracks each event file's size and record
|
|
18
|
+
count, so re-scans only parse new data. A background watcher keeps the cache in
|
|
19
|
+
sync; parsing is parallelized across event files with `joblib`.
|
|
20
|
+
- **Lazy, prioritized rendering.** The dashboard renders charts only as they
|
|
21
|
+
scroll into view, fetched through a priority queue (visible first, then
|
|
22
|
+
neighbors, biased toward the scroll direction).
|
|
23
|
+
- **LTTB downsampling** keeps long curves cheap to draw without losing their shape.
|
|
24
|
+
|
|
25
|
+
## Install
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
pip install tb-like
|
|
29
|
+
# or
|
|
30
|
+
uv tool install tb-like
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Quick start
|
|
34
|
+
|
|
35
|
+
A "run" is a directory containing `events.out.tfevents.*` files. Point `tb_like`
|
|
36
|
+
at a directory of runs and open the dashboard — that's it:
|
|
37
|
+
|
|
38
|
+
```
|
|
39
|
+
my_runs/
|
|
40
|
+
run_a/ events.out.tfevents.* config.yaml
|
|
41
|
+
run_b/ events.out.tfevents.*
|
|
42
|
+
...
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
tblike my_runs --port 8000 --jobs 8
|
|
47
|
+
# open http://127.0.0.1:8000
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
The background watcher discovers runs under the folder, converts any that
|
|
51
|
+
changed to Parquet (parsing event files across `--jobs` worker processes), keeps
|
|
52
|
+
the cache in sync, and serves them — no separate build step. The cache lives in
|
|
53
|
+
`<runs_dir>/.tblike_cache` by default (override with `--cache-dir`).
|
|
54
|
+
|
|
55
|
+
## Dashboard features
|
|
56
|
+
|
|
57
|
+
- Hierarchical, searchable **tag tree** (regex filter) with smart grouping:
|
|
58
|
+
path compression of `a.b.c` chains, numeric-enumeration collapsing
|
|
59
|
+
(`…expert_idx_∗`), and layer indices kept as their own levels.
|
|
60
|
+
- Multi-run overlay, unified hover, EMA smoothing, log-y, step vs. relative-time
|
|
61
|
+
x-axis, and **outlier clipping** by value percentiles.
|
|
62
|
+
- Collapsible per-group chart sections, resizable sidebar, and a one-click
|
|
63
|
+
**Refresh selected** that re-ingests from disk and rebuilds the plots.
|
|
64
|
+
|
|
65
|
+
## CLI
|
|
66
|
+
|
|
67
|
+
```
|
|
68
|
+
tblike <runs_dir> [--port P] [--host H] [--cache-dir D] [-j JOBS] [--no-watch]
|
|
69
|
+
# the main command: serve + auto-ingest
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
Advanced / scriptable subcommands:
|
|
73
|
+
|
|
74
|
+
```
|
|
75
|
+
tblike convert RUN_DIR [RUN_ID] [-j JOBS] # ingest one run into Parquet (one-off)
|
|
76
|
+
tblike scan # one incremental ingest pass, no server
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
## How it stores data
|
|
80
|
+
|
|
81
|
+
```
|
|
82
|
+
cache/<run_id>/
|
|
83
|
+
data/seg-00000.parquet # one immutable segment per ingest pass
|
|
84
|
+
index.json # tags, per-file ingest state, metadata
|
|
85
|
+
meta.json # tiny summary used for fast run listing
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Reads union all segments and de-duplicate `(tag, step)` by latest `wall_time`.
|
|
89
|
+
|
|
90
|
+
## License
|
|
91
|
+
|
|
92
|
+
MIT — see [LICENSE](LICENSE).
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "tb-like"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "A faster, columnar TensorBoard-style scalar viewer for many series and many runs."
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.12"
|
|
7
|
+
license = "MIT"
|
|
8
|
+
license-files = ["LICENSE"]
|
|
9
|
+
authors = [{ name = "Misha Kindulov", email = "kindulov.ml@gmail.com" }]
|
|
10
|
+
keywords = ["tensorboard", "parquet", "plotly", "machine-learning", "visualization", "metrics"]
|
|
11
|
+
classifiers = [
|
|
12
|
+
"Development Status :: 4 - Beta",
|
|
13
|
+
"Intended Audience :: Science/Research",
|
|
14
|
+
"Programming Language :: Python :: 3",
|
|
15
|
+
"Topic :: Scientific/Engineering :: Visualization",
|
|
16
|
+
"Framework :: FastAPI",
|
|
17
|
+
"Operating System :: OS Independent",
|
|
18
|
+
]
|
|
19
|
+
dependencies = [
|
|
20
|
+
"fastapi>=0.137.1",
|
|
21
|
+
"joblib>=1.5.3",
|
|
22
|
+
"numpy>=1.26",
|
|
23
|
+
"plotly>=6.8.0",
|
|
24
|
+
"polars>=1.41.2",
|
|
25
|
+
"pyarrow>=24.0.0",
|
|
26
|
+
"tensorboard>=2.20.0",
|
|
27
|
+
"tqdm>=4.68.2",
|
|
28
|
+
"uvicorn[standard]>=0.49.0",
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
[project.urls]
|
|
32
|
+
Homepage = "https://github.com/b0nce/tb_like"
|
|
33
|
+
Repository = "https://github.com/b0nce/tb_like"
|
|
34
|
+
Issues = "https://github.com/b0nce/tb_like/issues"
|
|
35
|
+
|
|
36
|
+
[project.scripts]
|
|
37
|
+
tblike = "tblike.cli:main"
|
|
38
|
+
|
|
39
|
+
[build-system]
|
|
40
|
+
requires = ["hatchling"]
|
|
41
|
+
build-backend = "hatchling.build"
|
|
42
|
+
|
|
43
|
+
[tool.hatch.build.targets.wheel]
|
|
44
|
+
packages = ["tblike"]
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
"""tb_like: a faster, columnar TensorBoard-style scalar viewer.
|
|
2
|
+
|
|
3
|
+
Pipeline: TensorBoard event files -> per-run columnar Parquet -> on-demand,
|
|
4
|
+
downsampled reads served to a Plotly dashboard. New events are picked up
|
|
5
|
+
incrementally in a background watcher.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
"""Command line entrypoints for tb_like.
|
|
2
|
+
|
|
3
|
+
Primary usage — just point it at a folder of runs and open the dashboard:
|
|
4
|
+
|
|
5
|
+
tblike <runs_dir> [--port 8000] [--jobs 8]
|
|
6
|
+
|
|
7
|
+
The background watcher converts runs to Parquet automatically and keeps the
|
|
8
|
+
cache in sync. Advanced/scriptable subcommands are also available:
|
|
9
|
+
|
|
10
|
+
tblike convert <run_dir> [run_id] # ingest one run -> parquet (one-off)
|
|
11
|
+
tblike scan # one incremental ingest pass, no server
|
|
12
|
+
tblike build-runs --count 200 # make symlinked test runs (dev)
|
|
13
|
+
tblike clone <src_run_id> --count N # fan a converted run out (dev)
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import argparse
|
|
19
|
+
import json
|
|
20
|
+
import os
|
|
21
|
+
import shutil
|
|
22
|
+
import sys
|
|
23
|
+
import time
|
|
24
|
+
|
|
25
|
+
from .convert import convert_run, load_index, meta_from_index
|
|
26
|
+
from .watcher import Watcher, discover_runs
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def cmd_build_runs(args: argparse.Namespace) -> None:
|
|
30
|
+
src = os.path.abspath(args.source)
|
|
31
|
+
os.makedirs(args.runs_dir, exist_ok=True)
|
|
32
|
+
for i in range(args.count):
|
|
33
|
+
link = os.path.join(args.runs_dir, f"{args.prefix}{i:03d}")
|
|
34
|
+
if os.path.islink(link) or os.path.exists(link):
|
|
35
|
+
os.remove(link) if os.path.islink(link) else None
|
|
36
|
+
if not os.path.exists(link):
|
|
37
|
+
os.symlink(src, link)
|
|
38
|
+
print(f"created {args.count} symlinked runs in {args.runs_dir}/ -> {src}")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _file_progress(prefix: str):
|
|
42
|
+
"""Return an on_file(done, total, name) callback backed by a tqdm bar."""
|
|
43
|
+
from tqdm import tqdm
|
|
44
|
+
|
|
45
|
+
bar = {"t": None}
|
|
46
|
+
|
|
47
|
+
def cb(done: int, total: int, name: str) -> None:
|
|
48
|
+
if bar["t"] is None:
|
|
49
|
+
bar["t"] = tqdm(total=total, desc=prefix, unit="file", dynamic_ncols=True)
|
|
50
|
+
bar["t"].n = done
|
|
51
|
+
bar["t"].refresh()
|
|
52
|
+
if done >= total:
|
|
53
|
+
bar["t"].close()
|
|
54
|
+
|
|
55
|
+
return cb
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def cmd_convert(args: argparse.Namespace) -> None:
|
|
59
|
+
run_id = args.run_id or os.path.basename(os.path.normpath(args.run_dir))
|
|
60
|
+
cache_run_dir = os.path.join(args.cache_dir, run_id)
|
|
61
|
+
t0 = time.time()
|
|
62
|
+
res = convert_run(
|
|
63
|
+
args.run_dir, cache_run_dir, run_id,
|
|
64
|
+
on_file=_file_progress(run_id), n_jobs=args.jobs,
|
|
65
|
+
)
|
|
66
|
+
dt = time.time() - t0
|
|
67
|
+
print(
|
|
68
|
+
f"[{run_id}] +{res.new_rows:,} rows (total {res.total_rows:,}), "
|
|
69
|
+
f"{res.num_tags} tags, segment={res.segment} in {dt:.1f}s ({args.jobs} jobs)"
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def cmd_clone(args: argparse.Namespace) -> None:
|
|
74
|
+
"""Fan a single converted run out into N runs by hardlinking its Parquet.
|
|
75
|
+
|
|
76
|
+
Used only for the scale test: the 200 runs are symlinks to identical bytes,
|
|
77
|
+
so their columnar output is identical too. Hardlinks keep disk ~flat while
|
|
78
|
+
giving each run its own independent index.json / display name.
|
|
79
|
+
"""
|
|
80
|
+
src_dir = os.path.join(args.cache_dir, args.src_run_id)
|
|
81
|
+
src_index = load_index(src_dir)
|
|
82
|
+
if not src_index:
|
|
83
|
+
print(f"source run {args.src_run_id!r} not converted yet", file=sys.stderr)
|
|
84
|
+
sys.exit(1)
|
|
85
|
+
src_data = os.path.join(src_dir, "data")
|
|
86
|
+
segments = src_index.get("segments", [])
|
|
87
|
+
|
|
88
|
+
for i in range(args.count):
|
|
89
|
+
rid = f"{args.prefix}{i:03d}"
|
|
90
|
+
dst = os.path.join(args.cache_dir, rid)
|
|
91
|
+
dst_data = os.path.join(dst, "data")
|
|
92
|
+
os.makedirs(dst_data, exist_ok=True)
|
|
93
|
+
for seg in segments:
|
|
94
|
+
link = os.path.join(dst_data, seg)
|
|
95
|
+
if os.path.exists(link):
|
|
96
|
+
os.remove(link)
|
|
97
|
+
try:
|
|
98
|
+
os.link(os.path.join(src_data, seg), link) # hardlink, ~free
|
|
99
|
+
except OSError:
|
|
100
|
+
shutil.copy2(os.path.join(src_data, seg), link)
|
|
101
|
+
idx = dict(src_index)
|
|
102
|
+
idx["run_id"] = rid
|
|
103
|
+
base = src_index.get("display_name", args.src_run_id)
|
|
104
|
+
idx["display_name"] = f"{base} #{i:03d}"
|
|
105
|
+
idx["source_dir"] = os.path.abspath(os.path.join(args.runs_dir, rid))
|
|
106
|
+
with open(os.path.join(dst, "index.json"), "w") as fh:
|
|
107
|
+
json.dump(idx, fh)
|
|
108
|
+
with open(os.path.join(dst, "meta.json"), "w") as fh:
|
|
109
|
+
json.dump(meta_from_index(idx), fh)
|
|
110
|
+
print(f"cloned {args.src_run_id} -> {args.count} runs ({len(segments)} segments each)")
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def cmd_scan(args: argparse.Namespace) -> None:
|
|
114
|
+
w = Watcher(args.runs_dir, args.cache_dir)
|
|
115
|
+
runs = discover_runs(args.runs_dir)
|
|
116
|
+
print(f"discovered {len(runs)} runs; scanning...")
|
|
117
|
+
t0 = time.time()
|
|
118
|
+
res = w.scan_once()
|
|
119
|
+
print(f"scan done in {time.time()-t0:.1f}s: {res}")
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
DEFAULT_JOBS = max(1, (os.cpu_count() or 2) - 1)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def run_serve(argv: list[str]) -> None:
|
|
126
|
+
import uvicorn
|
|
127
|
+
|
|
128
|
+
p = argparse.ArgumentParser(
|
|
129
|
+
prog="tblike",
|
|
130
|
+
description="Serve the tb_like dashboard for a folder of runs (auto-ingests in the background).",
|
|
131
|
+
)
|
|
132
|
+
p.add_argument("runs_dir", help="folder containing run subdirs with events.out.tfevents.* files")
|
|
133
|
+
p.add_argument("--port", type=int, default=8000)
|
|
134
|
+
p.add_argument("--host", default="127.0.0.1")
|
|
135
|
+
p.add_argument("--cache-dir", default=None,
|
|
136
|
+
help="Parquet cache directory (default: <runs_dir>/.tblike_cache)")
|
|
137
|
+
p.add_argument("-j", "--jobs", type=int, default=DEFAULT_JOBS,
|
|
138
|
+
help=f"parallel parse workers for background conversion (default: {DEFAULT_JOBS})")
|
|
139
|
+
p.add_argument("--interval", type=float, default=10.0, help="watcher poll interval, seconds")
|
|
140
|
+
p.add_argument("--no-watch", action="store_true", help="serve only; do not ingest in the background")
|
|
141
|
+
args = p.parse_args(argv)
|
|
142
|
+
|
|
143
|
+
if not os.path.isdir(args.runs_dir):
|
|
144
|
+
p.error(f"runs_dir not found: {args.runs_dir}")
|
|
145
|
+
cache = args.cache_dir or os.path.join(args.runs_dir, ".tblike_cache")
|
|
146
|
+
os.environ["TBLIKE_RUNS"] = os.path.abspath(args.runs_dir)
|
|
147
|
+
os.environ["TBLIKE_CACHE"] = os.path.abspath(cache)
|
|
148
|
+
os.environ["TBLIKE_WATCH"] = "0" if args.no_watch else "1"
|
|
149
|
+
os.environ["TBLIKE_INTERVAL"] = str(args.interval)
|
|
150
|
+
os.environ["TBLIKE_JOBS"] = str(args.jobs)
|
|
151
|
+
print(f"tb_like → http://{args.host}:{args.port} runs={args.runs_dir} cache={cache}")
|
|
152
|
+
uvicorn.run("tblike.server:app", host=args.host, port=args.port, reload=False)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def run_advanced(argv: list[str]) -> None:
|
|
156
|
+
p = argparse.ArgumentParser(prog="tblike")
|
|
157
|
+
p.add_argument("--runs-dir", default="runs")
|
|
158
|
+
p.add_argument("--cache-dir", default="cache")
|
|
159
|
+
sub = p.add_subparsers(dest="cmd", required=True)
|
|
160
|
+
|
|
161
|
+
c = sub.add_parser("convert", help="ingest one run into parquet")
|
|
162
|
+
c.add_argument("run_dir")
|
|
163
|
+
c.add_argument("run_id", nargs="?")
|
|
164
|
+
c.add_argument("-j", "--jobs", type=int, default=DEFAULT_JOBS,
|
|
165
|
+
help="parallel worker processes for event-file parsing")
|
|
166
|
+
c.set_defaults(func=cmd_convert)
|
|
167
|
+
|
|
168
|
+
s = sub.add_parser("scan", help="one incremental ingest pass, no server")
|
|
169
|
+
s.set_defaults(func=cmd_scan)
|
|
170
|
+
|
|
171
|
+
b = sub.add_parser("build-runs", help="create symlinked test runs (dev)")
|
|
172
|
+
b.add_argument("--source", default="data")
|
|
173
|
+
b.add_argument("--count", type=int, default=200)
|
|
174
|
+
b.add_argument("--prefix", default="run_")
|
|
175
|
+
b.set_defaults(func=cmd_build_runs)
|
|
176
|
+
|
|
177
|
+
cl = sub.add_parser("clone", help="fan a converted run out into N runs (dev)")
|
|
178
|
+
cl.add_argument("src_run_id")
|
|
179
|
+
cl.add_argument("--count", type=int, default=200)
|
|
180
|
+
cl.add_argument("--prefix", default="run_")
|
|
181
|
+
cl.set_defaults(func=cmd_clone)
|
|
182
|
+
|
|
183
|
+
args = p.parse_args(argv)
|
|
184
|
+
args.func(args)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
ADVANCED = {"convert", "scan", "build-runs", "clone"}
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def main(argv: list[str] | None = None) -> None:
|
|
191
|
+
argv = list(sys.argv[1:] if argv is None else argv)
|
|
192
|
+
if argv and argv[0] in ADVANCED:
|
|
193
|
+
run_advanced(argv) # power-user subcommands
|
|
194
|
+
elif not argv or argv[0] in ("-h", "--help"):
|
|
195
|
+
run_serve(["--help"]) # default command's help
|
|
196
|
+
else:
|
|
197
|
+
run_serve(argv) # `tblike <runs_dir> [opts]`
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
if __name__ == "__main__":
|
|
201
|
+
main()
|