verlet 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,20 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+
7
+ jobs:
8
+ publish:
9
+ runs-on: ubuntu-latest
10
+ environment: pypi
11
+ permissions:
12
+ id-token: write
13
+ steps:
14
+ - uses: actions/checkout@v4
15
+ - uses: actions/setup-python@v5
16
+ with:
17
+ python-version: "3.12"
18
+ - run: pip install build
19
+ - run: python -m build
20
+ - uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,10 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *.egg-info/
4
+ dist/
5
+ build/
6
+ .eggs/
7
+ *.egg
8
+ .venv/
9
+ venv/
10
+ .env
verlet-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Verlet
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
verlet-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,101 @@
1
+ Metadata-Version: 2.4
2
+ Name: verlet
3
+ Version: 0.1.0
4
+ Summary: Download and explore Verlet datasets
5
+ License-Expression: MIT
6
+ License-File: LICENSE
7
+ Requires-Python: >=3.10
8
+ Requires-Dist: click>=8.0
9
+ Requires-Dist: httpx>=0.24
10
+ Requires-Dist: rich>=13.0
11
+ Description-Content-Type: text/markdown
12
+
13
+ # verlet
14
+
15
+ Download and explore Verlet datasets from the command line.
16
+
17
+ ## Install
18
+
19
+ ```bash
20
+ curl -sSL https://raw.githubusercontent.com/verlet/verlet-cli/main/install.sh | bash
21
+ ```
22
+
23
+ This creates an isolated venv at `~/.verlet/venv` and symlinks `verlet` into `~/.local/bin`.
24
+
25
+ Or install manually:
26
+
27
+ ```bash
28
+ pip install verlet
29
+ ```
30
+
31
+ For development:
32
+
33
+ ```bash
34
+ git clone https://github.com/verlet-robotics/verlet-cli.git
35
+ cd verlet-cli
36
+ pip install -e .
37
+ ```
38
+
39
+ ## Quick Start
40
+
41
+ ```bash
42
+ # Authenticate with your access code
43
+ verlet login
44
+
45
+ # Browse available data
46
+ verlet ego list
47
+ verlet ego list --detailed --category Kitchen
48
+
49
+ # Download data
50
+ verlet ego download -o ./data --category Kitchen
51
+
52
+ # Check segment details
53
+ verlet ego info station-1__episode_042_seg5
54
+ ```
55
+
56
+ ## Commands
57
+
58
+ ### Authentication
59
+
60
+ ```bash
61
+ verlet login # Prompt for access code, store JWT
62
+ verlet logout # Remove stored credentials
63
+ ```
64
+
65
+ ### EgoDex Hand Pose Data (`verlet ego`)
66
+
67
+ ```bash
68
+ # List categories and segment counts
69
+ verlet ego list
70
+ verlet ego list --task station-1
71
+ verlet ego list --category Kitchen
72
+ verlet ego list --detailed
73
+
74
+ # Download segments
75
+ verlet ego download # all segments to ./verlet-data/ego/
76
+ verlet ego download -o ./data # custom output directory
77
+ verlet ego download --category Kitchen # filter by category
78
+ verlet ego download --include "*.hdf5,*.mp4" # only specific file types
79
+ verlet ego download --exclude "*.egorec,*.rrd" # skip large files
80
+ verlet ego download --parallel 16 # concurrency (default 8)
81
+ verlet ego download --dry-run # show download plan only
82
+
83
+ # Segment info
84
+ verlet ego info SEGMENT_ID
85
+ ```
86
+
87
+ ## Data Layout
88
+
89
+ Downloaded data is organized by station and episode:
90
+
91
+ ```
92
+ verlet-data/ego/
93
+ station-1/
94
+ episode_042_seg5/
95
+ segment.egorec
96
+ hands.npz
97
+ overlay.mp4
98
+ recording.rrd
99
+ egodex/manipulation/0.hdf5
100
+ egodex/manipulation/0.mp4
101
+ ```
verlet-0.1.0/README.md ADDED
@@ -0,0 +1,89 @@
1
+ # verlet
2
+
3
+ Download and explore Verlet datasets from the command line.
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ curl -sSL https://raw.githubusercontent.com/verlet/verlet-cli/main/install.sh | bash
9
+ ```
10
+
11
+ This creates an isolated venv at `~/.verlet/venv` and symlinks `verlet` into `~/.local/bin`.
12
+
13
+ Or install manually:
14
+
15
+ ```bash
16
+ pip install verlet
17
+ ```
18
+
19
+ For development:
20
+
21
+ ```bash
22
+ git clone https://github.com/verlet-robotics/verlet-cli.git
23
+ cd verlet-cli
24
+ pip install -e .
25
+ ```
26
+
27
+ ## Quick Start
28
+
29
+ ```bash
30
+ # Authenticate with your access code
31
+ verlet login
32
+
33
+ # Browse available data
34
+ verlet ego list
35
+ verlet ego list --detailed --category Kitchen
36
+
37
+ # Download data
38
+ verlet ego download -o ./data --category Kitchen
39
+
40
+ # Check segment details
41
+ verlet ego info station-1__episode_042_seg5
42
+ ```
43
+
44
+ ## Commands
45
+
46
+ ### Authentication
47
+
48
+ ```bash
49
+ verlet login # Prompt for access code, store JWT
50
+ verlet logout # Remove stored credentials
51
+ ```
52
+
53
+ ### EgoDex Hand Pose Data (`verlet ego`)
54
+
55
+ ```bash
56
+ # List categories and segment counts
57
+ verlet ego list
58
+ verlet ego list --task station-1
59
+ verlet ego list --category Kitchen
60
+ verlet ego list --detailed
61
+
62
+ # Download segments
63
+ verlet ego download # all segments to ./verlet-data/ego/
64
+ verlet ego download -o ./data # custom output directory
65
+ verlet ego download --category Kitchen # filter by category
66
+ verlet ego download --include "*.hdf5,*.mp4" # only specific file types
67
+ verlet ego download --exclude "*.egorec,*.rrd" # skip large files
68
+ verlet ego download --parallel 16 # concurrency (default 8)
69
+ verlet ego download --dry-run # show download plan only
70
+
71
+ # Segment info
72
+ verlet ego info SEGMENT_ID
73
+ ```
74
+
75
+ ## Data Layout
76
+
77
+ Downloaded data is organized by station and episode:
78
+
79
+ ```
80
+ verlet-data/ego/
81
+ station-1/
82
+ episode_042_seg5/
83
+ segment.egorec
84
+ hands.npz
85
+ overlay.mp4
86
+ recording.rrd
87
+ egodex/manipulation/0.hdf5
88
+ egodex/manipulation/0.mp4
89
+ ```
@@ -0,0 +1,74 @@
1
+ #!/usr/bin/env bash
2
+ # Install verlet CLI into ~/.verlet/venv and symlink to ~/.local/bin/verlet
3
+ # Usage: curl -sSL https://raw.githubusercontent.com/verlet/verlet-cli/main/install.sh | bash
4
+ set -euo pipefail
5
+
6
+ INSTALL_DIR="$HOME/.verlet"
7
+ VENV_DIR="$INSTALL_DIR/venv"
8
+ BIN_DIR="$HOME/.local/bin"
9
+ PACKAGE="verlet"
10
+ REPO="https://github.com/verlet-robotics/verlet-cli.git"
11
+
12
+ info() { printf '\033[1;34m==>\033[0m %s\n' "$*"; }
13
+ ok() { printf '\033[1;32m==>\033[0m %s\n' "$*"; }
14
+ err() { printf '\033[1;31merror:\033[0m %s\n' "$*" >&2; exit 1; }
15
+
16
+ # --- Check python ---
17
+ PYTHON=""
18
+ for cmd in python3 python; do
19
+ if command -v "$cmd" &>/dev/null; then
20
+ version=$("$cmd" -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')" 2>/dev/null || true)
21
+ major=$(echo "$version" | cut -d. -f1)
22
+ minor=$(echo "$version" | cut -d. -f2)
23
+ if [ "$major" = "3" ] && [ "$minor" -ge 10 ]; then
24
+ PYTHON="$cmd"
25
+ break
26
+ fi
27
+ fi
28
+ done
29
+ [ -n "$PYTHON" ] || err "Python 3.10+ is required but not found. Install it first."
30
+ info "Using $PYTHON ($version)"
31
+
32
+ # --- Create venv ---
33
+ if [ -d "$VENV_DIR" ]; then
34
+ info "Removing existing venv..."
35
+ rm -rf "$VENV_DIR"
36
+ fi
37
+
38
+ info "Creating virtual environment at $VENV_DIR..."
39
+ "$PYTHON" -m venv "$VENV_DIR"
40
+
41
+ # --- Install ---
42
+ info "Installing verlet..."
43
+ "$VENV_DIR/bin/pip" install --quiet --upgrade pip
44
+ "$VENV_DIR/bin/pip" install --quiet "$PACKAGE" 2>/dev/null \
45
+ || "$VENV_DIR/bin/pip" install --quiet "git+${REPO}"
46
+
47
+ # --- Symlink ---
48
+ mkdir -p "$BIN_DIR"
49
+ ln -sf "$VENV_DIR/bin/verlet" "$BIN_DIR/verlet"
50
+
51
+ # --- Check PATH ---
52
+ if ! echo "$PATH" | tr ':' '\n' | grep -qx "$BIN_DIR"; then
53
+ SHELL_NAME=$(basename "$SHELL")
54
+ case "$SHELL_NAME" in
55
+ zsh) RC="$HOME/.zshrc" ;;
56
+ bash) RC="$HOME/.bashrc" ;;
57
+ fish) RC="$HOME/.config/fish/config.fish" ;;
58
+ *) RC="" ;;
59
+ esac
60
+
61
+ if [ -n "$RC" ]; then
62
+ if [ "$SHELL_NAME" = "fish" ]; then
63
+ echo "fish_add_path $BIN_DIR" >> "$RC"
64
+ else
65
+ echo "export PATH=\"$BIN_DIR:\$PATH\"" >> "$RC"
66
+ fi
67
+ ok "Added $BIN_DIR to PATH in $RC"
68
+ info "Restart your shell or run: export PATH=\"$BIN_DIR:\$PATH\""
69
+ else
70
+ info "Add $BIN_DIR to your PATH manually."
71
+ fi
72
+ fi
73
+
74
+ ok "Installed! Run 'verlet login' to get started."
@@ -0,0 +1,22 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "verlet"
7
+ version = "0.1.0"
8
+ description = "Download and explore Verlet datasets"
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ requires-python = ">=3.10"
12
+ dependencies = [
13
+ "click>=8.0",
14
+ "httpx>=0.24",
15
+ "rich>=13.0",
16
+ ]
17
+
18
+ [project.scripts]
19
+ verlet = "verlet.cli:cli"
20
+
21
+ [tool.hatch.build.targets.wheel]
22
+ packages = ["src/verlet"]
@@ -0,0 +1,3 @@
1
+ """Verlet CLI — download and explore Verlet datasets."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,102 @@
1
+ """Authentication: login, logout, token validation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import base64
6
+ import json
7
+ import time
8
+
9
+ import click
10
+ import httpx
11
+
12
+ from verlet.config import DEFAULT_API_BASE, clear_config, load_config, save_config
13
+
14
+
15
+ def _decode_jwt_payload(token: str) -> dict:
16
+ """Decode JWT payload (no signature verification — server is authoritative)."""
17
+ parts = token.split(".")
18
+ if len(parts) != 3:
19
+ raise ValueError("Invalid JWT format")
20
+ # Add padding
21
+ payload_b64 = parts[1] + "=" * (-len(parts[1]) % 4)
22
+ return json.loads(base64.urlsafe_b64decode(payload_b64))
23
+
24
+
25
+ def get_auth_headers() -> dict[str, str]:
26
+ """Return headers for authenticated API requests, or raise click.ClickException."""
27
+ cfg = load_config()
28
+ token = cfg.get("token")
29
+ if not token:
30
+ raise click.ClickException(
31
+ "Not logged in. Run 'verlet login' first."
32
+ )
33
+ expires_at = cfg.get("expires_at", 0)
34
+ if time.time() > expires_at:
35
+ raise click.ClickException(
36
+ "Session expired. Run 'verlet login' to re-authenticate."
37
+ )
38
+ return {
39
+ "Cookie": f"access_token={token}",
40
+ "Authorization": f"Bearer {token}",
41
+ }
42
+
43
+
44
+ def get_api_base() -> str:
45
+ return load_config().get("api_base", DEFAULT_API_BASE)
46
+
47
+
48
+ def login(api_base: str | None = None) -> None:
49
+ """Prompt for access code and authenticate."""
50
+ api_base = api_base or DEFAULT_API_BASE
51
+ code = click.prompt("Access code", hide_input=True)
52
+
53
+ try:
54
+ resp = httpx.post(
55
+ f"{api_base}/api/auth",
56
+ json={"code": code},
57
+ timeout=15,
58
+ )
59
+ except httpx.RequestError as e:
60
+ raise click.ClickException(f"Connection failed: {e}")
61
+
62
+ if resp.status_code != 200:
63
+ detail = resp.json().get("error", resp.text)
64
+ raise click.ClickException(f"Authentication failed: {detail}")
65
+
66
+ body = resp.json()
67
+ customer = body.get("customer", "unknown")
68
+
69
+ # Get token from response body (preferred) or Set-Cookie header (fallback)
70
+ token = body.get("token")
71
+ if not token:
72
+ cookie_header = resp.headers.get("set-cookie", "")
73
+ for part in cookie_header.split(";"):
74
+ part = part.strip()
75
+ if part.startswith("access_token="):
76
+ token = part.split("=", 1)[1]
77
+ break
78
+
79
+ if not token:
80
+ raise click.ClickException("No token received from server.")
81
+
82
+ # Decode expiry from JWT
83
+ try:
84
+ payload = _decode_jwt_payload(token)
85
+ expires_at = payload.get("exp", time.time() + 604800)
86
+ except Exception:
87
+ expires_at = time.time() + 604800 # fallback: 7 days
88
+
89
+ save_config({
90
+ "token": token,
91
+ "customer": customer,
92
+ "api_base": api_base,
93
+ "expires_at": expires_at,
94
+ })
95
+
96
+ click.echo(f"Logged in as {click.style(customer, bold=True)}")
97
+
98
+
99
+ def logout() -> None:
100
+ """Remove stored credentials."""
101
+ clear_config()
102
+ click.echo("Logged out.")
@@ -0,0 +1,39 @@
1
+ """Top-level CLI group — registers auth commands and modality subgroups."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import click
6
+
7
+ from verlet import __version__
8
+ from verlet.auth import login, logout
9
+ from verlet.config import DEFAULT_API_BASE
10
+ from verlet.ego.commands import ego_group
11
+
12
+
13
+ @click.group()
14
+ @click.version_option(__version__, prog_name="verlet")
15
+ def cli():
16
+ """Verlet CLI — download and explore Verlet datasets."""
17
+
18
+
19
+ @cli.command()
20
+ @click.option("--api-base", default=None, help=f"API base URL (default: {DEFAULT_API_BASE}).")
21
+ def login_cmd(api_base: str | None):
22
+ """Authenticate with your access code."""
23
+ login(api_base)
24
+
25
+
26
+ # Register as 'login' not 'login-cmd'
27
+ login_cmd.name = "login"
28
+
29
+
30
+ @cli.command()
31
+ def logout_cmd():
32
+ """Remove stored credentials."""
33
+ logout()
34
+
35
+
36
+ logout_cmd.name = "logout"
37
+
38
+ # Register modality subgroups
39
+ cli.add_command(ego_group)
@@ -0,0 +1,33 @@
1
+ """Config file management for ~/.verlet/config.json."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+ CONFIG_DIR = Path.home() / ".verlet"
10
+ CONFIG_FILE = CONFIG_DIR / "config.json"
11
+
12
+ DEFAULT_API_BASE = "https://ego.verlet.co"
13
+
14
+
15
+ def ensure_config_dir() -> Path:
16
+ CONFIG_DIR.mkdir(parents=True, exist_ok=True)
17
+ return CONFIG_DIR
18
+
19
+
20
+ def load_config() -> dict[str, Any]:
21
+ if not CONFIG_FILE.exists():
22
+ return {}
23
+ return json.loads(CONFIG_FILE.read_text())
24
+
25
+
26
+ def save_config(data: dict[str, Any]) -> None:
27
+ ensure_config_dir()
28
+ CONFIG_FILE.write_text(json.dumps(data, indent=2) + "\n")
29
+
30
+
31
+ def clear_config() -> None:
32
+ if CONFIG_FILE.exists():
33
+ CONFIG_FILE.unlink()
@@ -0,0 +1,79 @@
1
+ """Shared Rich display utilities for tables and formatting."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from rich.console import Console
6
+ from rich.table import Table
7
+
8
+ console = Console()
9
+
10
+
11
+ def format_duration(seconds: float) -> str:
12
+ """Format seconds as human-readable duration."""
13
+ if seconds < 60:
14
+ return f"{seconds:.0f}s"
15
+ minutes = int(seconds // 60)
16
+ secs = int(seconds % 60)
17
+ if minutes < 60:
18
+ return f"{minutes}m {secs:02d}s"
19
+ hours = minutes // 60
20
+ mins = minutes % 60
21
+ return f"{hours}h {mins:02d}m"
22
+
23
+
24
+ def format_size(nbytes: int) -> str:
25
+ """Format bytes as human-readable size."""
26
+ for unit in ("B", "KB", "MB", "GB", "TB"):
27
+ if nbytes < 1024:
28
+ return f"{nbytes:.1f} {unit}"
29
+ nbytes /= 1024
30
+ return f"{nbytes:.1f} PB"
31
+
32
+
33
+ def category_table(categories: list[dict]) -> Table:
34
+ """Build a Rich table for category listing."""
35
+ table = Table(title="EgoDex Catalog")
36
+ table.add_column("Category", style="bold cyan")
37
+ table.add_column("Subcategory", style="white")
38
+ table.add_column("Segments", justify="right", style="green")
39
+ table.add_column("Duration", justify="right", style="yellow")
40
+
41
+ for cat in categories:
42
+ first = True
43
+ for sub in cat.get("subcategories", []):
44
+ table.add_row(
45
+ cat["category"] if first else "",
46
+ sub["subcategory"],
47
+ str(sub["segmentCount"]),
48
+ format_duration(sub["totalDurationSec"]),
49
+ )
50
+ first = False
51
+ if not cat.get("subcategories"):
52
+ table.add_row(
53
+ cat["category"],
54
+ "-",
55
+ str(cat.get("segmentCount", 0)),
56
+ format_duration(cat.get("totalDurationSec", 0)),
57
+ )
58
+
59
+ return table
60
+
61
+
62
+ def segment_table(segments: list[dict]) -> Table:
63
+ """Build a Rich table for detailed segment listing."""
64
+ table = Table(title="Segments")
65
+ table.add_column("ID", style="bold")
66
+ table.add_column("Category", style="cyan")
67
+ table.add_column("Subcategory", style="white")
68
+ table.add_column("Station", style="magenta")
69
+ table.add_column("Duration", justify="right", style="yellow")
70
+
71
+ for seg in segments:
72
+ table.add_row(
73
+ seg["id"],
74
+ seg.get("category", ""),
75
+ seg.get("subcategory", ""),
76
+ seg.get("station", ""),
77
+ format_duration(seg.get("durationSec", 0)),
78
+ )
79
+ return table
@@ -0,0 +1 @@
1
+ """Ego modality commands."""
@@ -0,0 +1,142 @@
1
+ """Fetch and filter the EgoDex catalog from the API."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from fnmatch import fnmatch
6
+
7
+ import click
8
+ import httpx
9
+
10
+ from verlet.auth import get_api_base, get_auth_headers
11
+
12
+ # Known files per segment in the output bucket.
13
+ # Key format: {station}/{episode}_seg{id}/{filename}
14
+ SEGMENT_FILES = [
15
+ "segment.egorec",
16
+ "hands.npz",
17
+ "overlay.mp4",
18
+ "recording.rrd",
19
+ "egodex/manipulation/0.hdf5",
20
+ "egodex/manipulation/0.mp4",
21
+ ]
22
+
23
+
24
+ def fetch_catalog() -> dict:
25
+ """Fetch the full catalog from the API."""
26
+ base = get_api_base()
27
+ headers = get_auth_headers()
28
+ try:
29
+ resp = httpx.get(f"{base}/api/catalog", headers=headers, timeout=30)
30
+ except httpx.RequestError as e:
31
+ raise click.ClickException(f"Connection failed: {e}")
32
+
33
+ if resp.status_code == 401:
34
+ raise click.ClickException("Session expired. Run 'verlet login' to re-authenticate.")
35
+ if resp.status_code != 200:
36
+ raise click.ClickException(f"Catalog request failed ({resp.status_code}): {resp.text}")
37
+
38
+ return resp.json()
39
+
40
+
41
+ def flatten_segments(catalog: dict) -> list[dict]:
42
+ """Extract a flat list of segments from the catalog."""
43
+ segments = []
44
+ for cat in catalog.get("categories", []):
45
+ for sub in cat.get("subcategories", []):
46
+ for seg in sub.get("segments", []):
47
+ segments.append(seg)
48
+ return segments
49
+
50
+
51
+ def filter_segments(
52
+ segments: list[dict],
53
+ *,
54
+ task: str | None = None,
55
+ category: str | None = None,
56
+ ) -> list[dict]:
57
+ """Filter segments by task (station) and/or category."""
58
+ out = segments
59
+ if task:
60
+ task_lower = task.lower()
61
+ out = [s for s in out if s.get("station", "").lower() == task_lower]
62
+ if category:
63
+ cat_lower = category.lower()
64
+ out = [s for s in out if s.get("category", "").lower() == cat_lower]
65
+ return out
66
+
67
+
68
+ def filter_categories(
69
+ categories: list[dict],
70
+ *,
71
+ task: str | None = None,
72
+ category: str | None = None,
73
+ ) -> list[dict]:
74
+ """Filter category groups. Returns a new list with matching entries."""
75
+ out = categories
76
+ if category:
77
+ cat_lower = category.lower()
78
+ out = [c for c in out if c["category"].lower() == cat_lower]
79
+ if task:
80
+ # Filter segments within each category/subcategory by station
81
+ task_lower = task.lower()
82
+ filtered = []
83
+ for cat in out:
84
+ new_subs = []
85
+ for sub in cat.get("subcategories", []):
86
+ matching = [
87
+ s for s in sub.get("segments", [])
88
+ if s.get("station", "").lower() == task_lower
89
+ ]
90
+ if matching:
91
+ new_subs.append({
92
+ **sub,
93
+ "segments": matching,
94
+ "segmentCount": len(matching),
95
+ "totalDurationSec": sum(s.get("durationSec", 0) for s in matching),
96
+ })
97
+ if new_subs:
98
+ filtered.append({
99
+ **cat,
100
+ "subcategories": new_subs,
101
+ "segmentCount": sum(s["segmentCount"] for s in new_subs),
102
+ "totalDurationSec": sum(s["totalDurationSec"] for s in new_subs),
103
+ })
104
+ out = filtered
105
+ return out
106
+
107
+
108
+ def segment_r2_prefix(seg: dict) -> str:
109
+ """Build the output-bucket prefix for a segment.
110
+
111
+ Segment IDs are like 'station-1__episode_042_seg5'.
112
+ Output bucket layout: station-1/episode_042_seg5/
113
+ """
114
+ sid = seg["id"]
115
+ parts = sid.split("__", 1)
116
+ if len(parts) == 2:
117
+ return f"{parts[0]}/{parts[1]}"
118
+ return sid
119
+
120
+
121
+ def segment_file_list(
122
+ seg: dict,
123
+ *,
124
+ include: list[str] | None = None,
125
+ exclude: list[str] | None = None,
126
+ ) -> list[str]:
127
+ """Return list of R2 keys for a segment, after include/exclude filtering."""
128
+ prefix = segment_r2_prefix(seg)
129
+ keys = [f"{prefix}/{f}" for f in SEGMENT_FILES]
130
+
131
+ if include:
132
+ keys = [
133
+ k for k in keys
134
+ if any(fnmatch(k.rsplit("/", 1)[-1], pat) for pat in include)
135
+ ]
136
+ if exclude:
137
+ keys = [
138
+ k for k in keys
139
+ if not any(fnmatch(k.rsplit("/", 1)[-1], pat) for pat in exclude)
140
+ ]
141
+
142
+ return keys
@@ -0,0 +1,151 @@
1
+ """Click commands for the ego modality."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ from pathlib import Path
7
+
8
+ import click
9
+
10
+ from verlet.display import category_table, console, format_duration, segment_table
11
+ from verlet.ego.catalog import (
12
+ fetch_catalog,
13
+ filter_categories,
14
+ filter_segments,
15
+ flatten_segments,
16
+ segment_file_list,
17
+ segment_r2_prefix,
18
+ )
19
+
20
+
21
+ @click.group("ego")
22
+ def ego_group():
23
+ """EgoDex hand-pose data."""
24
+
25
+
26
+ @ego_group.command("list")
27
+ @click.option("--task", default=None, help="Filter by station/task name.")
28
+ @click.option("--category", default=None, help="Filter by category.")
29
+ @click.option("--detailed", is_flag=True, help="Show individual segments.")
30
+ def list_cmd(task: str | None, category: str | None, detailed: bool):
31
+ """List available EgoDex data."""
32
+ catalog = fetch_catalog()
33
+
34
+ if detailed:
35
+ segments = flatten_segments(catalog)
36
+ segments = filter_segments(segments, task=task, category=category)
37
+ if not segments:
38
+ console.print("No segments found matching filters.")
39
+ return
40
+ console.print(segment_table(segments))
41
+ console.print(f"\n[bold]{len(segments)}[/bold] segments, "
42
+ f"{format_duration(sum(s.get('durationSec', 0) for s in segments))} total")
43
+ else:
44
+ categories = catalog.get("categories", [])
45
+ categories = filter_categories(categories, task=task, category=category)
46
+ if not categories:
47
+ console.print("No categories found matching filters.")
48
+ return
49
+ console.print(category_table(categories))
50
+ total_segs = sum(c.get("segmentCount", 0) for c in categories)
51
+ total_dur = sum(c.get("totalDurationSec", 0) for c in categories)
52
+ console.print(f"\n[bold]{total_segs}[/bold] segments, {format_duration(total_dur)} total")
53
+
54
+
55
+ @ego_group.command("info")
56
+ @click.argument("segment_id")
57
+ def info_cmd(segment_id: str):
58
+ """Show details for a specific segment."""
59
+ catalog = fetch_catalog()
60
+ segments = flatten_segments(catalog)
61
+
62
+ seg = next((s for s in segments if s["id"] == segment_id), None)
63
+ if not seg:
64
+ raise click.ClickException(f"Segment '{segment_id}' not found.")
65
+
66
+ console.print(f"[bold]Segment:[/bold] {seg['id']}")
67
+ console.print(f"[bold]Station:[/bold] {seg.get('station', 'N/A')}")
68
+ console.print(f"[bold]Category:[/bold] {seg.get('category', 'N/A')}")
69
+ console.print(f"[bold]Subcategory:[/bold] {seg.get('subcategory', 'N/A')}")
70
+ console.print(f"[bold]Duration:[/bold] {format_duration(seg.get('durationSec', 0))}")
71
+ console.print(f"[bold]Time range:[/bold] {seg.get('startSec', 0):.1f}s — {seg.get('endSec', 0):.1f}s")
72
+
73
+ cam = seg.get("cameraInfo")
74
+ if cam:
75
+ console.print(f"[bold]Camera:[/bold] fx={cam['fx']:.1f} fy={cam['fy']:.1f} "
76
+ f"ppx={cam['ppx']:.1f} ppy={cam['ppy']:.1f}")
77
+
78
+ prefix = segment_r2_prefix(seg)
79
+ console.print(f"[bold]R2 prefix:[/bold] {prefix}/")
80
+
81
+ console.print("\n[bold]Files:[/bold]")
82
+ for key in segment_file_list(seg):
83
+ console.print(f" {key}")
84
+
85
+
86
+ @ego_group.command("download")
87
+ @click.option("-o", "--output", default="./verlet-data", type=click.Path(), help="Output directory.")
88
+ @click.option("--task", default=None, help="Filter by station/task name.")
89
+ @click.option("--category", default=None, help="Filter by category.")
90
+ @click.option("--include", default=None, help="Comma-separated glob patterns for files to include.")
91
+ @click.option("--exclude", default=None, help="Comma-separated glob patterns for files to exclude.")
92
+ @click.option("--parallel", default=8, type=int, help="Number of concurrent downloads.")
93
+ @click.option("--dry-run", is_flag=True, help="Show download plan without downloading.")
94
+ def download_cmd(
95
+ output: str,
96
+ task: str | None,
97
+ category: str | None,
98
+ include: str | None,
99
+ exclude: str | None,
100
+ parallel: int,
101
+ dry_run: bool,
102
+ ):
103
+ """Download EgoDex data segments."""
104
+ catalog = fetch_catalog()
105
+ segments = flatten_segments(catalog)
106
+ segments = filter_segments(segments, task=task, category=category)
107
+
108
+ if not segments:
109
+ console.print("No segments found matching filters.")
110
+ return
111
+
112
+ include_pats = [p.strip() for p in include.split(",")] if include else None
113
+ exclude_pats = [p.strip() for p in exclude.split(",")] if exclude else None
114
+
115
+ # Build download plan: list of (r2_key, local_path)
116
+ base_dir = Path(output) / "ego"
117
+ file_plan: list[tuple[str, Path]] = []
118
+
119
+ for seg in segments:
120
+ keys = segment_file_list(seg, include=include_pats, exclude=exclude_pats)
121
+ prefix = segment_r2_prefix(seg)
122
+ for key in keys:
123
+ # key is like "station-1/episode_042_seg5/hands.npz"
124
+ rel = key # already relative
125
+ local_path = base_dir / rel
126
+ file_plan.append((key, local_path))
127
+
128
+ # Skip already-downloaded files (size check happens during download)
129
+ existing = sum(1 for _, p in file_plan if p.exists())
130
+
131
+ console.print(f"[bold]{len(segments)}[/bold] segments, "
132
+ f"[bold]{len(file_plan)}[/bold] files to download")
133
+ if existing:
134
+ console.print(f" ({existing} files already exist locally — will verify sizes)")
135
+ console.print(f" Output: {base_dir.resolve()}")
136
+
137
+ if dry_run:
138
+ console.print("\n[bold]Download plan:[/bold]")
139
+ for key, local in file_plan:
140
+ status = "[green]exists[/]" if local.exists() else "[dim]pending[/]"
141
+ console.print(f" {status} {key}")
142
+ return
143
+
144
+ from verlet.ego.download import download_files
145
+
146
+ ok, fail = asyncio.run(download_files(file_plan, parallel=parallel))
147
+
148
+ console.print(f"\n[bold green]{ok}[/] downloaded", end="")
149
+ if fail:
150
+ console.print(f", [bold red]{fail}[/] failed", end="")
151
+ console.print()
@@ -0,0 +1,145 @@
1
+ """Async download engine with presigned URLs and Rich progress."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ from pathlib import Path
7
+
8
+ import httpx
9
+ from rich.progress import (
10
+ BarColumn,
11
+ DownloadColumn,
12
+ Progress,
13
+ TaskID,
14
+ TextColumn,
15
+ TimeRemainingColumn,
16
+ TransferSpeedColumn,
17
+ )
18
+
19
+ from verlet.auth import get_api_base, get_auth_headers
20
+ from verlet.display import console
21
+
22
+ CHUNK_SIZE = 256 * 1024 # 256 KB
23
+
24
+
25
+ async def _presign(
26
+ client: httpx.AsyncClient,
27
+ api_base: str,
28
+ headers: dict[str, str],
29
+ r2_key: str,
30
+ ) -> str:
31
+ """Get a presigned URL for an R2 key."""
32
+ resp = await client.get(
33
+ f"{api_base}/api/presign",
34
+ params={"key": r2_key, "bucket": "output"},
35
+ headers=headers,
36
+ )
37
+ if resp.status_code == 401:
38
+ raise RuntimeError("Session expired. Run 'verlet login' to re-authenticate.")
39
+ resp.raise_for_status()
40
+ return resp.json()["url"]
41
+
42
+
43
+ async def _download_file(
44
+ client: httpx.AsyncClient,
45
+ api_base: str,
46
+ auth_headers: dict[str, str],
47
+ r2_key: str,
48
+ local_path: Path,
49
+ semaphore: asyncio.Semaphore,
50
+ progress: Progress,
51
+ overall_task: TaskID,
52
+ retries: int = 3,
53
+ ) -> bool:
54
+ """Download a single file with retries and progress tracking."""
55
+ tmp_path = local_path.with_suffix(local_path.suffix + ".tmp")
56
+
57
+ for attempt in range(1, retries + 1):
58
+ try:
59
+ async with semaphore:
60
+ url = await _presign(client, api_base, auth_headers, r2_key)
61
+
62
+ async with client.stream("GET", url) as resp:
63
+ resp.raise_for_status()
64
+ total = int(resp.headers.get("content-length", 0))
65
+
66
+ # Skip if local file matches remote size
67
+ if local_path.exists() and local_path.stat().st_size == total and total > 0:
68
+ progress.advance(overall_task, total)
69
+ return True
70
+
71
+ local_path.parent.mkdir(parents=True, exist_ok=True)
72
+
73
+ file_task = progress.add_task(
74
+ f" {r2_key.rsplit('/', 1)[-1]}",
75
+ total=total or None,
76
+ )
77
+
78
+ with open(tmp_path, "wb") as f:
79
+ async for chunk in resp.aiter_bytes(CHUNK_SIZE):
80
+ f.write(chunk)
81
+ progress.advance(file_task, len(chunk))
82
+ progress.advance(overall_task, len(chunk))
83
+
84
+ progress.remove_task(file_task)
85
+
86
+ # Atomic rename
87
+ tmp_path.rename(local_path)
88
+ return True
89
+
90
+ except Exception as e:
91
+ if tmp_path.exists():
92
+ tmp_path.unlink()
93
+ if attempt == retries:
94
+ console.print(f"[red]Failed[/] {r2_key}: {e}")
95
+ return False
96
+ await asyncio.sleep(2 ** attempt)
97
+
98
+ return False
99
+
100
+
101
+ async def download_files(
102
+ file_plan: list[tuple[str, Path]],
103
+ parallel: int = 8,
104
+ ) -> tuple[int, int]:
105
+ """Download a list of (r2_key, local_path) pairs concurrently.
106
+
107
+ Returns (success_count, fail_count).
108
+ """
109
+ if not file_plan:
110
+ console.print("Nothing to download.")
111
+ return 0, 0
112
+
113
+ # Check which files can be skipped (already downloaded with correct size)
114
+ api_base = get_api_base()
115
+ auth_headers = get_auth_headers()
116
+ semaphore = asyncio.Semaphore(parallel)
117
+
118
+ # Estimate total bytes (unknown until presign, so we track dynamically)
119
+ with Progress(
120
+ TextColumn("[bold blue]{task.description}"),
121
+ BarColumn(),
122
+ DownloadColumn(),
123
+ TransferSpeedColumn(),
124
+ TimeRemainingColumn(),
125
+ console=console,
126
+ ) as progress:
127
+ overall_task = progress.add_task("Downloading", total=0)
128
+
129
+ # First pass: get total size by presigning all files
130
+ # (done lazily during download to avoid expiry)
131
+
132
+ async with httpx.AsyncClient(timeout=httpx.Timeout(60, connect=15)) as client:
133
+ tasks = [
134
+ _download_file(
135
+ client, api_base, auth_headers,
136
+ r2_key, local_path,
137
+ semaphore, progress, overall_task,
138
+ )
139
+ for r2_key, local_path in file_plan
140
+ ]
141
+ results = await asyncio.gather(*tasks)
142
+
143
+ ok = sum(1 for r in results if r)
144
+ fail = sum(1 for r in results if not r)
145
+ return ok, fail