PyPI - tlog-ml - Versions diffs - 0.1.0__py3-none-any.whl - Mend

tlog-ml 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

tlog/__init__.py +89 -0
tlog/cli.py +183 -0
tlog/console.py +71 -0
tlog/export.py +93 -0
tlog/frontend/app.js +494 -0
tlog/frontend/index.html +29 -0
tlog/frontend/style.css +181 -0
tlog/frontend/vendor/uplot.min.css +1 -0
tlog/frontend/vendor/uplot.min.js +2 -0
tlog/media.py +113 -0
tlog/meta.py +152 -0
tlog/payload.py +72 -0
tlog/run.py +282 -0
tlog/server.py +133 -0
tlog/store.py +354 -0
tlog/system.py +132 -0
tlog/tui.py +446 -0
tlog/writer.py +95 -0
tlog_ml-0.1.0.dist-info/METADATA +266 -0
tlog_ml-0.1.0.dist-info/RECORD +24 -0
tlog_ml-0.1.0.dist-info/WHEEL +5 -0
tlog_ml-0.1.0.dist-info/entry_points.txt +2 -0
tlog_ml-0.1.0.dist-info/licenses/LICENSE +21 -0
tlog_ml-0.1.0.dist-info/top_level.txt +1 -0

tlog/__init__.py ADDED Viewed

@@ -0,0 +1,89 @@
+"""tlog — lightweight, local-first experiment logger for neural net training.
+Drop-in wandb-shaped API:
+    import tlog
+    run = tlog.init(project="vitok", name="vae-L", config=vars(args))
+    tlog.log({"loss/total": 0.41, "training/lr": 3e-4}, step=step)
+    tlog.log_images("eval/recon", [orig, recon], step=step)
+    tlog.finish()
+View runs with `tlog watch` (terminal), `tlog serve` (browser via port
+forward), or `tlog export -o report.html` (single shareable file).
+"""
+from __future__ import annotations
+import os
+from typing import Any
+from .run import NoopRun, Run
+__version__ = "0.1.0"
+__all__ = ["init", "log", "log_images", "finish", "run", "Run", "NoopRun"]
+run: Run | NoopRun | None = None  # the active run, set by init()
+def init(
+    project: str = "default",
+    name: str | None = None,
+    config: dict | None = None,
+    dir: str | None = None,
+    id: str | None = None,
+    resume: str = "auto",
+    capture_console: bool = True,
+    system_metrics: bool = True,
+    rank_zero_only: bool = True,
+) -> Run | NoopRun:
+    """Start (or resume) a run. On non-zero ranks (per the RANK env var set by
+    torchrun/SLURM) returns a no-op run unless rank_zero_only=False.
+    resume: "auto"  — resume iff an explicit `id` is given or this process is a
+                      SLURM requeue (SLURM_RESTART_COUNT > 0) of a job that
+                      already created a run; otherwise start fresh.
+            "must"  — resume an existing run or raise.
+            "never" — always start a fresh run.
+    """
+    global run
+    if rank_zero_only and int(os.environ.get("RANK", "0") or 0) != 0:
+        run = NoopRun()
+        return run
+    if run is not None and not isinstance(run, NoopRun):
+        run.finish()
+    run = Run(
+        project=project,
+        name=name,
+        config=config,
+        dir=dir,
+        id=id,
+        resume=resume,
+        capture_console=capture_console,
+        system_metrics=system_metrics,
+    )
+    print(f"tlog: logging to {run.dir}" + (" (resumed)" if run.resumed else ""))
+    return run
+def _require_run() -> Run | NoopRun:
+    if run is None:
+        raise RuntimeError("tlog.init() must be called before logging")
+    return run
+def log(metrics: dict[str, Any], step: int | None = None) -> None:
+    """Log a dict of scalar metrics at a training step."""
+    _require_run().log(metrics, step=step)
+def log_images(key: str, images: Any, step: int | None = None, caption: str | None = None) -> None:
+    """Log one image or a list of images (PIL / torch tensor / numpy array)."""
+    _require_run().log_images(key, images, step=step, caption=caption)
+def finish() -> None:
+    """Mark the active run finished and flush all files."""
+    global run
+    if run is not None:
+        run.finish()

tlog/cli.py ADDED Viewed

@@ -0,0 +1,183 @@
+"""`tlog` command line: watch (default), ls, tail, export, serve, rm."""
+from __future__ import annotations
+import argparse
+import datetime
+import os
+import shutil
+import sys
+from pathlib import Path
+from . import store
+DIM = "\x1b[2m"
+BOLD = "\x1b[1m"
+RESET = "\x1b[0m"
+_STATUS_COLOR = {"running": "\x1b[38;5;114m", "finished": "\x1b[38;5;75m", "dead": "\x1b[38;5;203m"}
+def default_root() -> str:
+    return os.environ.get("TLOG_DIR", "./runs")
+def _resolve_or_die(spec: str | None, root: str) -> store.RunInfo:
+    if spec is None:
+        info = store.latest_run(root)
+        if info is None:
+            sys.exit(f"tlog: no runs found under {root!r} (set --dir or TLOG_DIR)")
+        return info
+    info = store.resolve_run(spec, root)
+    if info is None:
+        sys.exit(f"tlog: no run matching {spec!r} under {root!r}")
+    return info
+def cmd_ls(args: argparse.Namespace) -> None:
+    runs = store.find_runs(args.root or default_root())
+    if not runs:
+        print(f"no runs under {args.root or default_root()!r}")
+        return
+    color = sys.stdout.isatty()
+    rows = [("", "PROJECT/NAME", "ID", "STEP", "LAST LOSS", "STARTED", "SLURM", "STATUS")]
+    for r in runs:
+        last = store.last_record(r.path / "metrics.jsonl") or {}
+        step = last.get("_step")
+        loss_rec = store.last_record(
+            r.path / "metrics.jsonl",
+            predicate=lambda rec: any(k.startswith("loss") for k in rec),
+        ) or {}
+        loss = next(
+            (v for k, v in loss_rec.items() if k.startswith("loss")),
+            next((v for k, v in last.items() if not k.startswith("_")), None),
+        )
+        started = datetime.datetime.fromtimestamp(r.created_at).strftime("%m-%d %H:%M")
+        slurm = r.meta.get("env", {}).get("slurm", {}).get("SLURM_JOB_ID", "")
+        status = r.status
+        dot = "●"
+        if color:
+            dot = _STATUS_COLOR.get(status, "") + "●" + RESET
+        rows.append(
+            (
+                dot,
+                f"{r.project}/{r.name}",
+                r.id,
+                f"{step:,}" if step is not None else "-",
+                f"{loss:.4g}" if isinstance(loss, (int, float)) else "-",
+                started,
+                slurm,
+                status,
+            )
+        )
+    plain = [tuple(c if i or not color else "●" for i, c in enumerate(row)) for row in rows]
+    widths = [max(len(str(r[i])) for r in plain) for i in range(len(rows[0]))]
+    for row, p in zip(rows, plain):
+        line = "  ".join(
+            str(c) + " " * (widths[i] - len(str(p[i]))) for i, c in enumerate(row)
+        )
+        print(line.rstrip())
+def cmd_watch(args: argparse.Namespace) -> None:
+    from .tui import watch
+    root = args.dir or default_root()
+    info = _resolve_or_die(args.run, root)
+    watch(info, interval=args.interval, ncols=args.cols)
+def cmd_tail(args: argparse.Namespace) -> None:
+    info = _resolve_or_die(args.run, args.dir or default_root())
+    for line in store.read_console(info, max_lines=args.lines):
+        print(line)
+def cmd_export(args: argparse.Namespace) -> None:
+    from .export import export_html
+    root = args.dir or default_root()
+    runs = [_resolve_or_die(spec, root) for spec in args.runs] or None
+    if runs is None:
+        runs = store.find_runs(root)
+        if not runs:
+            sys.exit(f"tlog: no runs under {root!r}")
+    out = export_html(runs, Path(args.output), max_image_px=args.max_image_px)
+    size_kb = out.stat().st_size / 1024
+    print(f"wrote {out} ({size_kb:,.0f} KB, {len(runs)} run{'s' * (len(runs) != 1)})")
+def cmd_serve(args: argparse.Namespace) -> None:
+    from .server import serve
+    serve(args.root or default_root(), host=args.host, port=args.port)
+def cmd_rm(args: argparse.Namespace) -> None:
+    info = _resolve_or_die(args.run, args.dir or default_root())
+    if not args.yes:
+        answer = input(f"delete {info.path}? [y/N] ")
+        if answer.strip().lower() not in ("y", "yes"):
+            print("aborted")
+            return
+    shutil.rmtree(info.path)
+    print(f"deleted {info.path}")
+def main(argv: list[str] | None = None) -> None:
+    parser = argparse.ArgumentParser(
+        prog="tlog",
+        description="lightweight local experiment logger — view training runs in "
+        "the terminal, a browser, or a self-contained HTML file",
+    )
+    sub = parser.add_subparsers(dest="command")
+    p_watch = sub.add_parser("watch", help="live terminal dashboard (default command)")
+    p_watch.add_argument("run", nargs="?", help="run dir, id, or name (default: latest run)")
+    p_watch.add_argument("--dir", help="runs root (default: $TLOG_DIR or ./runs)")
+    p_watch.add_argument("--interval", type=float, default=2.0, help="refresh seconds")
+    p_watch.add_argument(
+        "--cols", type=int, default=None,
+        help="chart columns (default: auto from pane width; keys 1-9/0 at runtime)",
+    )
+    p_watch.set_defaults(func=cmd_watch)
+    p_ls = sub.add_parser("ls", help="list runs")
+    p_ls.add_argument("root", nargs="?", help="runs root (default: $TLOG_DIR or ./runs)")
+    p_ls.set_defaults(func=cmd_ls)
+    p_tail = sub.add_parser("tail", help="show a run's captured console log")
+    p_tail.add_argument("run", nargs="?", help="run dir, id, or name (default: latest)")
+    p_tail.add_argument("-n", "--lines", type=int, default=50)
+    p_tail.add_argument("--dir", help="runs root")
+    p_tail.set_defaults(func=cmd_tail)
+    p_export = sub.add_parser("export", help="write a self-contained HTML report")
+    p_export.add_argument("runs", nargs="*", help="runs to include (default: all)")
+    p_export.add_argument("-o", "--output", default="tlog_report.html")
+    p_export.add_argument("--dir", help="runs root")
+    p_export.add_argument(
+        "--max-image-px", type=int, default=512,
+        help="downscale embedded images to this max side (0 = keep original)",
+    )
+    p_export.set_defaults(func=cmd_export)
+    p_serve = sub.add_parser("serve", help="live web dashboard (port-forward friendly)")
+    p_serve.add_argument("root", nargs="?", help="runs root (default: $TLOG_DIR or ./runs)")
+    p_serve.add_argument("-p", "--port", type=int, default=8585)
+    p_serve.add_argument("--host", default="127.0.0.1")
+    p_serve.set_defaults(func=cmd_serve)
+    p_rm = sub.add_parser("rm", help="delete a run directory")
+    p_rm.add_argument("run", help="run dir, id, or name")
+    p_rm.add_argument("-y", "--yes", action="store_true", help="skip confirmation")
+    p_rm.add_argument("--dir", help="runs root")
+    p_rm.set_defaults(func=cmd_rm)
+    args = parser.parse_args(argv)
+    if args.command is None:  # bare `tlog` -> watch latest
+        args = parser.parse_args(["watch"] + (argv or sys.argv[1:]))
+    args.func(args)
+if __name__ == "__main__":
+    main()

tlog/console.py ADDED Viewed

@@ -0,0 +1,71 @@
+"""Tee stdout/stderr of the training process into the run directory."""
+from __future__ import annotations
+import sys
+import threading
+from pathlib import Path
+from typing import TextIO
+class _Tee:
+    """File-like wrapper that mirrors writes to the original stream and a log
+    file. Exposes enough of the TextIO surface for print/tqdm/logging."""
+    def __init__(self, stream: TextIO, logfile: TextIO, lock: threading.Lock):
+        self._stream = stream
+        self._logfile = logfile
+        self._lock = lock
+    def write(self, data: str) -> int:
+        n = self._stream.write(data)
+        with self._lock:
+            if not self._logfile.closed:
+                try:
+                    self._logfile.write(data)
+                except (OSError, ValueError):
+                    pass
+        return n
+    def flush(self) -> None:
+        self._stream.flush()
+        with self._lock:
+            if not self._logfile.closed:
+                try:
+                    self._logfile.flush()
+                except (OSError, ValueError):
+                    pass
+    def isatty(self) -> bool:
+        return self._stream.isatty()
+    def fileno(self) -> int:
+        return self._stream.fileno()
+    @property
+    def encoding(self):
+        return getattr(self._stream, "encoding", "utf-8")
+    def __getattr__(self, name):
+        return getattr(self._stream, name)
+class ConsoleCapture:
+    def __init__(self, path: Path):
+        # line-buffered so `tlog tail`/viewers see output promptly
+        self._logfile = open(path, "a", buffering=1, encoding="utf-8", errors="replace")
+        self._lock = threading.Lock()
+        self._orig_stdout = sys.stdout
+        self._orig_stderr = sys.stderr
+        sys.stdout = _Tee(self._orig_stdout, self._logfile, self._lock)
+        sys.stderr = _Tee(self._orig_stderr, self._logfile, self._lock)
+    def stop(self) -> None:
+        if isinstance(sys.stdout, _Tee):
+            sys.stdout = self._orig_stdout
+        if isinstance(sys.stderr, _Tee):
+            sys.stderr = self._orig_stderr
+        with self._lock:
+            if not self._logfile.closed:
+                self._logfile.flush()
+                self._logfile.close()

tlog/export.py ADDED Viewed

@@ -0,0 +1,93 @@
+"""`tlog export` — render runs into one self-contained HTML file.
+Everything (frontend, uPlot, metric data, images as base64) is inlined, so the
+file can be opened in VS Code's preview, scp'd to a laptop, or attached to a
+message with no server and no internet access.
+"""
+from __future__ import annotations
+import base64
+import datetime
+import io
+import json
+from pathlib import Path
+from .payload import run_media, run_metrics, run_summary
+from .store import RunInfo, read_console
+FRONTEND = Path(__file__).parent / "frontend"
+def _data_uri(png_path: Path, max_px: int) -> str | None:
+    try:
+        raw = png_path.read_bytes()
+    except OSError:
+        return None
+    if max_px > 0:
+        try:  # downscale with PIL if available to keep the report small
+            from PIL import Image
+            img = Image.open(io.BytesIO(raw))
+            if max(img.size) > max_px:
+                img.thumbnail((max_px, max_px))
+                buf = io.BytesIO()
+                img.save(buf, format="PNG")
+                raw = buf.getvalue()
+        except ImportError:
+            pass
+        except Exception:
+            pass
+    return "data:image/png;base64," + base64.b64encode(raw).decode("ascii")
+def build_data(runs: list[RunInfo], max_image_px: int = 512) -> dict:
+    payload_runs = []
+    for info in runs:
+        summary = run_summary(info)
+        summary["metrics"] = run_metrics(info)
+        media = []
+        for rec in run_media(info):
+            files = []
+            for rel in rec["files"]:
+                uri = _data_uri(info.path / "media" / rel, max_image_px)
+                if uri:
+                    files.append(uri)
+            if files:
+                rec = dict(rec, files=files)
+                media.append(rec)
+        summary["media"] = media
+        summary["console"] = "\n".join(read_console(info, max_lines=300))
+        payload_runs.append(summary)
+    return {
+        "generated_at": datetime.datetime.now().isoformat(timespec="seconds"),
+        "runs": payload_runs,
+    }
+def render_template(mode: str, data: dict | None, title: str = "tlog") -> str:
+    html = (FRONTEND / "index.html").read_text()
+    data_json = "null" if data is None else json.dumps(
+        data, separators=(",", ":")
+    ).replace("</", "<\\/")
+    return (
+        html.replace("{{TITLE}}", title)
+        .replace("{{UPLOT_CSS}}", (FRONTEND / "vendor" / "uplot.min.css").read_text())
+        .replace("{{CSS}}", (FRONTEND / "style.css").read_text())
+        .replace("{{UPLOT_JS}}", (FRONTEND / "vendor" / "uplot.min.js").read_text())
+        .replace("{{MODE}}", mode)
+        .replace("{{DATA}}", data_json)
+        .replace("{{APP_JS}}", (FRONTEND / "app.js").read_text())
+    )
+def export_html(
+    runs: list[RunInfo], output: Path, max_image_px: int = 512
+) -> Path:
+    data = build_data(runs, max_image_px=max_image_px)
+    title = "tlog — " + ", ".join(r.name for r in runs[:3]) + (
+        f" +{len(runs) - 3}" if len(runs) > 3 else ""
+    )
+    output = Path(output)
+    output.write_text(render_template("export", data, title), encoding="utf-8")
+    return output