verlet 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- verlet-0.1.0/.github/workflows/publish.yml +20 -0
- verlet-0.1.0/.gitignore +10 -0
- verlet-0.1.0/LICENSE +21 -0
- verlet-0.1.0/PKG-INFO +101 -0
- verlet-0.1.0/README.md +89 -0
- verlet-0.1.0/install.sh +74 -0
- verlet-0.1.0/pyproject.toml +22 -0
- verlet-0.1.0/src/verlet/__init__.py +3 -0
- verlet-0.1.0/src/verlet/auth.py +102 -0
- verlet-0.1.0/src/verlet/cli.py +39 -0
- verlet-0.1.0/src/verlet/config.py +33 -0
- verlet-0.1.0/src/verlet/display.py +79 -0
- verlet-0.1.0/src/verlet/ego/__init__.py +1 -0
- verlet-0.1.0/src/verlet/ego/catalog.py +142 -0
- verlet-0.1.0/src/verlet/ego/commands.py +151 -0
- verlet-0.1.0/src/verlet/ego/download.py +145 -0
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
|
|
7
|
+
jobs:
|
|
8
|
+
publish:
|
|
9
|
+
runs-on: ubuntu-latest
|
|
10
|
+
environment: pypi
|
|
11
|
+
permissions:
|
|
12
|
+
id-token: write
|
|
13
|
+
steps:
|
|
14
|
+
- uses: actions/checkout@v4
|
|
15
|
+
- uses: actions/setup-python@v5
|
|
16
|
+
with:
|
|
17
|
+
python-version: "3.12"
|
|
18
|
+
- run: pip install build
|
|
19
|
+
- run: python -m build
|
|
20
|
+
- uses: pypa/gh-action-pypi-publish@release/v1
|
verlet-0.1.0/.gitignore
ADDED
verlet-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Verlet
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
verlet-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: verlet
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Download and explore Verlet datasets
|
|
5
|
+
License-Expression: MIT
|
|
6
|
+
License-File: LICENSE
|
|
7
|
+
Requires-Python: >=3.10
|
|
8
|
+
Requires-Dist: click>=8.0
|
|
9
|
+
Requires-Dist: httpx>=0.24
|
|
10
|
+
Requires-Dist: rich>=13.0
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
|
|
13
|
+
# verlet
|
|
14
|
+
|
|
15
|
+
Download and explore Verlet datasets from the command line.
|
|
16
|
+
|
|
17
|
+
## Install
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
curl -sSL https://raw.githubusercontent.com/verlet/verlet-cli/main/install.sh | bash
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
This creates an isolated venv at `~/.verlet/venv` and symlinks `verlet` into `~/.local/bin`.
|
|
24
|
+
|
|
25
|
+
Or install manually:
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
pip install verlet
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
For development:
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
git clone https://github.com/verlet-robotics/verlet-cli.git
|
|
35
|
+
cd verlet-cli
|
|
36
|
+
pip install -e .
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## Quick Start
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
# Authenticate with your access code
|
|
43
|
+
verlet login
|
|
44
|
+
|
|
45
|
+
# Browse available data
|
|
46
|
+
verlet ego list
|
|
47
|
+
verlet ego list --detailed --category Kitchen
|
|
48
|
+
|
|
49
|
+
# Download data
|
|
50
|
+
verlet ego download -o ./data --category Kitchen
|
|
51
|
+
|
|
52
|
+
# Check segment details
|
|
53
|
+
verlet ego info station-1__episode_042_seg5
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## Commands
|
|
57
|
+
|
|
58
|
+
### Authentication
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
verlet login # Prompt for access code, store JWT
|
|
62
|
+
verlet logout # Remove stored credentials
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
### EgoDex Hand Pose Data (`verlet ego`)
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
# List categories and segment counts
|
|
69
|
+
verlet ego list
|
|
70
|
+
verlet ego list --task station-1
|
|
71
|
+
verlet ego list --category Kitchen
|
|
72
|
+
verlet ego list --detailed
|
|
73
|
+
|
|
74
|
+
# Download segments
|
|
75
|
+
verlet ego download # all segments to ./verlet-data/ego/
|
|
76
|
+
verlet ego download -o ./data # custom output directory
|
|
77
|
+
verlet ego download --category Kitchen # filter by category
|
|
78
|
+
verlet ego download --include "*.hdf5,*.mp4" # only specific file types
|
|
79
|
+
verlet ego download --exclude "*.egorec,*.rrd" # skip large files
|
|
80
|
+
verlet ego download --parallel 16 # concurrency (default 8)
|
|
81
|
+
verlet ego download --dry-run # show download plan only
|
|
82
|
+
|
|
83
|
+
# Segment info
|
|
84
|
+
verlet ego info SEGMENT_ID
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## Data Layout
|
|
88
|
+
|
|
89
|
+
Downloaded data is organized by station and episode:
|
|
90
|
+
|
|
91
|
+
```
|
|
92
|
+
verlet-data/ego/
|
|
93
|
+
station-1/
|
|
94
|
+
episode_042_seg5/
|
|
95
|
+
segment.egorec
|
|
96
|
+
hands.npz
|
|
97
|
+
overlay.mp4
|
|
98
|
+
recording.rrd
|
|
99
|
+
egodex/manipulation/0.hdf5
|
|
100
|
+
egodex/manipulation/0.mp4
|
|
101
|
+
```
|
verlet-0.1.0/README.md
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
# verlet
|
|
2
|
+
|
|
3
|
+
Download and explore Verlet datasets from the command line.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
curl -sSL https://raw.githubusercontent.com/verlet/verlet-cli/main/install.sh | bash
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
This creates an isolated venv at `~/.verlet/venv` and symlinks `verlet` into `~/.local/bin`.
|
|
12
|
+
|
|
13
|
+
Or install manually:
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
pip install verlet
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
For development:
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
git clone https://github.com/verlet-robotics/verlet-cli.git
|
|
23
|
+
cd verlet-cli
|
|
24
|
+
pip install -e .
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Quick Start
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
# Authenticate with your access code
|
|
31
|
+
verlet login
|
|
32
|
+
|
|
33
|
+
# Browse available data
|
|
34
|
+
verlet ego list
|
|
35
|
+
verlet ego list --detailed --category Kitchen
|
|
36
|
+
|
|
37
|
+
# Download data
|
|
38
|
+
verlet ego download -o ./data --category Kitchen
|
|
39
|
+
|
|
40
|
+
# Check segment details
|
|
41
|
+
verlet ego info station-1__episode_042_seg5
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Commands
|
|
45
|
+
|
|
46
|
+
### Authentication
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
verlet login # Prompt for access code, store JWT
|
|
50
|
+
verlet logout # Remove stored credentials
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
### EgoDex Hand Pose Data (`verlet ego`)
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
# List categories and segment counts
|
|
57
|
+
verlet ego list
|
|
58
|
+
verlet ego list --task station-1
|
|
59
|
+
verlet ego list --category Kitchen
|
|
60
|
+
verlet ego list --detailed
|
|
61
|
+
|
|
62
|
+
# Download segments
|
|
63
|
+
verlet ego download # all segments to ./verlet-data/ego/
|
|
64
|
+
verlet ego download -o ./data # custom output directory
|
|
65
|
+
verlet ego download --category Kitchen # filter by category
|
|
66
|
+
verlet ego download --include "*.hdf5,*.mp4" # only specific file types
|
|
67
|
+
verlet ego download --exclude "*.egorec,*.rrd" # skip large files
|
|
68
|
+
verlet ego download --parallel 16 # concurrency (default 8)
|
|
69
|
+
verlet ego download --dry-run # show download plan only
|
|
70
|
+
|
|
71
|
+
# Segment info
|
|
72
|
+
verlet ego info SEGMENT_ID
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## Data Layout
|
|
76
|
+
|
|
77
|
+
Downloaded data is organized by station and episode:
|
|
78
|
+
|
|
79
|
+
```
|
|
80
|
+
verlet-data/ego/
|
|
81
|
+
station-1/
|
|
82
|
+
episode_042_seg5/
|
|
83
|
+
segment.egorec
|
|
84
|
+
hands.npz
|
|
85
|
+
overlay.mp4
|
|
86
|
+
recording.rrd
|
|
87
|
+
egodex/manipulation/0.hdf5
|
|
88
|
+
egodex/manipulation/0.mp4
|
|
89
|
+
```
|
verlet-0.1.0/install.sh
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Install verlet CLI into ~/.verlet/venv and symlink to ~/.local/bin/verlet
|
|
3
|
+
# Usage: curl -sSL https://raw.githubusercontent.com/verlet/verlet-cli/main/install.sh | bash
|
|
4
|
+
set -euo pipefail
|
|
5
|
+
|
|
6
|
+
INSTALL_DIR="$HOME/.verlet"
|
|
7
|
+
VENV_DIR="$INSTALL_DIR/venv"
|
|
8
|
+
BIN_DIR="$HOME/.local/bin"
|
|
9
|
+
PACKAGE="verlet"
|
|
10
|
+
REPO="https://github.com/verlet-robotics/verlet-cli.git"
|
|
11
|
+
|
|
12
|
+
info() { printf '\033[1;34m==>\033[0m %s\n' "$*"; }
|
|
13
|
+
ok() { printf '\033[1;32m==>\033[0m %s\n' "$*"; }
|
|
14
|
+
err() { printf '\033[1;31merror:\033[0m %s\n' "$*" >&2; exit 1; }
|
|
15
|
+
|
|
16
|
+
# --- Check python ---
|
|
17
|
+
PYTHON=""
|
|
18
|
+
for cmd in python3 python; do
|
|
19
|
+
if command -v "$cmd" &>/dev/null; then
|
|
20
|
+
version=$("$cmd" -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')" 2>/dev/null || true)
|
|
21
|
+
major=$(echo "$version" | cut -d. -f1)
|
|
22
|
+
minor=$(echo "$version" | cut -d. -f2)
|
|
23
|
+
if [ "$major" = "3" ] && [ "$minor" -ge 10 ]; then
|
|
24
|
+
PYTHON="$cmd"
|
|
25
|
+
break
|
|
26
|
+
fi
|
|
27
|
+
fi
|
|
28
|
+
done
|
|
29
|
+
[ -n "$PYTHON" ] || err "Python 3.10+ is required but not found. Install it first."
|
|
30
|
+
info "Using $PYTHON ($version)"
|
|
31
|
+
|
|
32
|
+
# --- Create venv ---
|
|
33
|
+
if [ -d "$VENV_DIR" ]; then
|
|
34
|
+
info "Removing existing venv..."
|
|
35
|
+
rm -rf "$VENV_DIR"
|
|
36
|
+
fi
|
|
37
|
+
|
|
38
|
+
info "Creating virtual environment at $VENV_DIR..."
|
|
39
|
+
"$PYTHON" -m venv "$VENV_DIR"
|
|
40
|
+
|
|
41
|
+
# --- Install ---
|
|
42
|
+
info "Installing verlet..."
|
|
43
|
+
"$VENV_DIR/bin/pip" install --quiet --upgrade pip
|
|
44
|
+
"$VENV_DIR/bin/pip" install --quiet "$PACKAGE" 2>/dev/null \
|
|
45
|
+
|| "$VENV_DIR/bin/pip" install --quiet "git+${REPO}"
|
|
46
|
+
|
|
47
|
+
# --- Symlink ---
|
|
48
|
+
mkdir -p "$BIN_DIR"
|
|
49
|
+
ln -sf "$VENV_DIR/bin/verlet" "$BIN_DIR/verlet"
|
|
50
|
+
|
|
51
|
+
# --- Check PATH ---
|
|
52
|
+
if ! echo "$PATH" | tr ':' '\n' | grep -qx "$BIN_DIR"; then
|
|
53
|
+
SHELL_NAME=$(basename "$SHELL")
|
|
54
|
+
case "$SHELL_NAME" in
|
|
55
|
+
zsh) RC="$HOME/.zshrc" ;;
|
|
56
|
+
bash) RC="$HOME/.bashrc" ;;
|
|
57
|
+
fish) RC="$HOME/.config/fish/config.fish" ;;
|
|
58
|
+
*) RC="" ;;
|
|
59
|
+
esac
|
|
60
|
+
|
|
61
|
+
if [ -n "$RC" ]; then
|
|
62
|
+
if [ "$SHELL_NAME" = "fish" ]; then
|
|
63
|
+
echo "fish_add_path $BIN_DIR" >> "$RC"
|
|
64
|
+
else
|
|
65
|
+
echo "export PATH=\"$BIN_DIR:\$PATH\"" >> "$RC"
|
|
66
|
+
fi
|
|
67
|
+
ok "Added $BIN_DIR to PATH in $RC"
|
|
68
|
+
info "Restart your shell or run: export PATH=\"$BIN_DIR:\$PATH\""
|
|
69
|
+
else
|
|
70
|
+
info "Add $BIN_DIR to your PATH manually."
|
|
71
|
+
fi
|
|
72
|
+
fi
|
|
73
|
+
|
|
74
|
+
ok "Installed! Run 'verlet login' to get started."
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "verlet"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Download and explore Verlet datasets"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "MIT"
|
|
11
|
+
requires-python = ">=3.10"
|
|
12
|
+
dependencies = [
|
|
13
|
+
"click>=8.0",
|
|
14
|
+
"httpx>=0.24",
|
|
15
|
+
"rich>=13.0",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
[project.scripts]
|
|
19
|
+
verlet = "verlet.cli:cli"
|
|
20
|
+
|
|
21
|
+
[tool.hatch.build.targets.wheel]
|
|
22
|
+
packages = ["src/verlet"]
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
"""Authentication: login, logout, token validation."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import base64
|
|
6
|
+
import json
|
|
7
|
+
import time
|
|
8
|
+
|
|
9
|
+
import click
|
|
10
|
+
import httpx
|
|
11
|
+
|
|
12
|
+
from verlet.config import DEFAULT_API_BASE, clear_config, load_config, save_config
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _decode_jwt_payload(token: str) -> dict:
|
|
16
|
+
"""Decode JWT payload (no signature verification — server is authoritative)."""
|
|
17
|
+
parts = token.split(".")
|
|
18
|
+
if len(parts) != 3:
|
|
19
|
+
raise ValueError("Invalid JWT format")
|
|
20
|
+
# Add padding
|
|
21
|
+
payload_b64 = parts[1] + "=" * (-len(parts[1]) % 4)
|
|
22
|
+
return json.loads(base64.urlsafe_b64decode(payload_b64))
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def get_auth_headers() -> dict[str, str]:
|
|
26
|
+
"""Return headers for authenticated API requests, or raise click.ClickException."""
|
|
27
|
+
cfg = load_config()
|
|
28
|
+
token = cfg.get("token")
|
|
29
|
+
if not token:
|
|
30
|
+
raise click.ClickException(
|
|
31
|
+
"Not logged in. Run 'verlet login' first."
|
|
32
|
+
)
|
|
33
|
+
expires_at = cfg.get("expires_at", 0)
|
|
34
|
+
if time.time() > expires_at:
|
|
35
|
+
raise click.ClickException(
|
|
36
|
+
"Session expired. Run 'verlet login' to re-authenticate."
|
|
37
|
+
)
|
|
38
|
+
return {
|
|
39
|
+
"Cookie": f"access_token={token}",
|
|
40
|
+
"Authorization": f"Bearer {token}",
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def get_api_base() -> str:
|
|
45
|
+
return load_config().get("api_base", DEFAULT_API_BASE)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def login(api_base: str | None = None) -> None:
|
|
49
|
+
"""Prompt for access code and authenticate."""
|
|
50
|
+
api_base = api_base or DEFAULT_API_BASE
|
|
51
|
+
code = click.prompt("Access code", hide_input=True)
|
|
52
|
+
|
|
53
|
+
try:
|
|
54
|
+
resp = httpx.post(
|
|
55
|
+
f"{api_base}/api/auth",
|
|
56
|
+
json={"code": code},
|
|
57
|
+
timeout=15,
|
|
58
|
+
)
|
|
59
|
+
except httpx.RequestError as e:
|
|
60
|
+
raise click.ClickException(f"Connection failed: {e}")
|
|
61
|
+
|
|
62
|
+
if resp.status_code != 200:
|
|
63
|
+
detail = resp.json().get("error", resp.text)
|
|
64
|
+
raise click.ClickException(f"Authentication failed: {detail}")
|
|
65
|
+
|
|
66
|
+
body = resp.json()
|
|
67
|
+
customer = body.get("customer", "unknown")
|
|
68
|
+
|
|
69
|
+
# Get token from response body (preferred) or Set-Cookie header (fallback)
|
|
70
|
+
token = body.get("token")
|
|
71
|
+
if not token:
|
|
72
|
+
cookie_header = resp.headers.get("set-cookie", "")
|
|
73
|
+
for part in cookie_header.split(";"):
|
|
74
|
+
part = part.strip()
|
|
75
|
+
if part.startswith("access_token="):
|
|
76
|
+
token = part.split("=", 1)[1]
|
|
77
|
+
break
|
|
78
|
+
|
|
79
|
+
if not token:
|
|
80
|
+
raise click.ClickException("No token received from server.")
|
|
81
|
+
|
|
82
|
+
# Decode expiry from JWT
|
|
83
|
+
try:
|
|
84
|
+
payload = _decode_jwt_payload(token)
|
|
85
|
+
expires_at = payload.get("exp", time.time() + 604800)
|
|
86
|
+
except Exception:
|
|
87
|
+
expires_at = time.time() + 604800 # fallback: 7 days
|
|
88
|
+
|
|
89
|
+
save_config({
|
|
90
|
+
"token": token,
|
|
91
|
+
"customer": customer,
|
|
92
|
+
"api_base": api_base,
|
|
93
|
+
"expires_at": expires_at,
|
|
94
|
+
})
|
|
95
|
+
|
|
96
|
+
click.echo(f"Logged in as {click.style(customer, bold=True)}")
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def logout() -> None:
|
|
100
|
+
"""Remove stored credentials."""
|
|
101
|
+
clear_config()
|
|
102
|
+
click.echo("Logged out.")
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""Top-level CLI group — registers auth commands and modality subgroups."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import click
|
|
6
|
+
|
|
7
|
+
from verlet import __version__
|
|
8
|
+
from verlet.auth import login, logout
|
|
9
|
+
from verlet.config import DEFAULT_API_BASE
|
|
10
|
+
from verlet.ego.commands import ego_group
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@click.group()
|
|
14
|
+
@click.version_option(__version__, prog_name="verlet")
|
|
15
|
+
def cli():
|
|
16
|
+
"""Verlet CLI — download and explore Verlet datasets."""
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@cli.command()
|
|
20
|
+
@click.option("--api-base", default=None, help=f"API base URL (default: {DEFAULT_API_BASE}).")
|
|
21
|
+
def login_cmd(api_base: str | None):
|
|
22
|
+
"""Authenticate with your access code."""
|
|
23
|
+
login(api_base)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# Register as 'login' not 'login-cmd'
|
|
27
|
+
login_cmd.name = "login"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@cli.command()
|
|
31
|
+
def logout_cmd():
|
|
32
|
+
"""Remove stored credentials."""
|
|
33
|
+
logout()
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
logout_cmd.name = "logout"
|
|
37
|
+
|
|
38
|
+
# Register modality subgroups
|
|
39
|
+
cli.add_command(ego_group)
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""Config file management for ~/.verlet/config.json."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
CONFIG_DIR = Path.home() / ".verlet"
|
|
10
|
+
CONFIG_FILE = CONFIG_DIR / "config.json"
|
|
11
|
+
|
|
12
|
+
DEFAULT_API_BASE = "https://ego.verlet.co"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def ensure_config_dir() -> Path:
|
|
16
|
+
CONFIG_DIR.mkdir(parents=True, exist_ok=True)
|
|
17
|
+
return CONFIG_DIR
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def load_config() -> dict[str, Any]:
|
|
21
|
+
if not CONFIG_FILE.exists():
|
|
22
|
+
return {}
|
|
23
|
+
return json.loads(CONFIG_FILE.read_text())
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def save_config(data: dict[str, Any]) -> None:
|
|
27
|
+
ensure_config_dir()
|
|
28
|
+
CONFIG_FILE.write_text(json.dumps(data, indent=2) + "\n")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def clear_config() -> None:
|
|
32
|
+
if CONFIG_FILE.exists():
|
|
33
|
+
CONFIG_FILE.unlink()
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"""Shared Rich display utilities for tables and formatting."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from rich.console import Console
|
|
6
|
+
from rich.table import Table
|
|
7
|
+
|
|
8
|
+
console = Console()
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def format_duration(seconds: float) -> str:
|
|
12
|
+
"""Format seconds as human-readable duration."""
|
|
13
|
+
if seconds < 60:
|
|
14
|
+
return f"{seconds:.0f}s"
|
|
15
|
+
minutes = int(seconds // 60)
|
|
16
|
+
secs = int(seconds % 60)
|
|
17
|
+
if minutes < 60:
|
|
18
|
+
return f"{minutes}m {secs:02d}s"
|
|
19
|
+
hours = minutes // 60
|
|
20
|
+
mins = minutes % 60
|
|
21
|
+
return f"{hours}h {mins:02d}m"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def format_size(nbytes: int) -> str:
|
|
25
|
+
"""Format bytes as human-readable size."""
|
|
26
|
+
for unit in ("B", "KB", "MB", "GB", "TB"):
|
|
27
|
+
if nbytes < 1024:
|
|
28
|
+
return f"{nbytes:.1f} {unit}"
|
|
29
|
+
nbytes /= 1024
|
|
30
|
+
return f"{nbytes:.1f} PB"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def category_table(categories: list[dict]) -> Table:
|
|
34
|
+
"""Build a Rich table for category listing."""
|
|
35
|
+
table = Table(title="EgoDex Catalog")
|
|
36
|
+
table.add_column("Category", style="bold cyan")
|
|
37
|
+
table.add_column("Subcategory", style="white")
|
|
38
|
+
table.add_column("Segments", justify="right", style="green")
|
|
39
|
+
table.add_column("Duration", justify="right", style="yellow")
|
|
40
|
+
|
|
41
|
+
for cat in categories:
|
|
42
|
+
first = True
|
|
43
|
+
for sub in cat.get("subcategories", []):
|
|
44
|
+
table.add_row(
|
|
45
|
+
cat["category"] if first else "",
|
|
46
|
+
sub["subcategory"],
|
|
47
|
+
str(sub["segmentCount"]),
|
|
48
|
+
format_duration(sub["totalDurationSec"]),
|
|
49
|
+
)
|
|
50
|
+
first = False
|
|
51
|
+
if not cat.get("subcategories"):
|
|
52
|
+
table.add_row(
|
|
53
|
+
cat["category"],
|
|
54
|
+
"-",
|
|
55
|
+
str(cat.get("segmentCount", 0)),
|
|
56
|
+
format_duration(cat.get("totalDurationSec", 0)),
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
return table
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def segment_table(segments: list[dict]) -> Table:
|
|
63
|
+
"""Build a Rich table for detailed segment listing."""
|
|
64
|
+
table = Table(title="Segments")
|
|
65
|
+
table.add_column("ID", style="bold")
|
|
66
|
+
table.add_column("Category", style="cyan")
|
|
67
|
+
table.add_column("Subcategory", style="white")
|
|
68
|
+
table.add_column("Station", style="magenta")
|
|
69
|
+
table.add_column("Duration", justify="right", style="yellow")
|
|
70
|
+
|
|
71
|
+
for seg in segments:
|
|
72
|
+
table.add_row(
|
|
73
|
+
seg["id"],
|
|
74
|
+
seg.get("category", ""),
|
|
75
|
+
seg.get("subcategory", ""),
|
|
76
|
+
seg.get("station", ""),
|
|
77
|
+
format_duration(seg.get("durationSec", 0)),
|
|
78
|
+
)
|
|
79
|
+
return table
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Ego modality commands."""
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
"""Fetch and filter the EgoDex catalog from the API."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from fnmatch import fnmatch
|
|
6
|
+
|
|
7
|
+
import click
|
|
8
|
+
import httpx
|
|
9
|
+
|
|
10
|
+
from verlet.auth import get_api_base, get_auth_headers
|
|
11
|
+
|
|
12
|
+
# Known files per segment in the output bucket.
|
|
13
|
+
# Key format: {station}/{episode}_seg{id}/{filename}
|
|
14
|
+
SEGMENT_FILES = [
|
|
15
|
+
"segment.egorec",
|
|
16
|
+
"hands.npz",
|
|
17
|
+
"overlay.mp4",
|
|
18
|
+
"recording.rrd",
|
|
19
|
+
"egodex/manipulation/0.hdf5",
|
|
20
|
+
"egodex/manipulation/0.mp4",
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def fetch_catalog() -> dict:
|
|
25
|
+
"""Fetch the full catalog from the API."""
|
|
26
|
+
base = get_api_base()
|
|
27
|
+
headers = get_auth_headers()
|
|
28
|
+
try:
|
|
29
|
+
resp = httpx.get(f"{base}/api/catalog", headers=headers, timeout=30)
|
|
30
|
+
except httpx.RequestError as e:
|
|
31
|
+
raise click.ClickException(f"Connection failed: {e}")
|
|
32
|
+
|
|
33
|
+
if resp.status_code == 401:
|
|
34
|
+
raise click.ClickException("Session expired. Run 'verlet login' to re-authenticate.")
|
|
35
|
+
if resp.status_code != 200:
|
|
36
|
+
raise click.ClickException(f"Catalog request failed ({resp.status_code}): {resp.text}")
|
|
37
|
+
|
|
38
|
+
return resp.json()
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def flatten_segments(catalog: dict) -> list[dict]:
|
|
42
|
+
"""Extract a flat list of segments from the catalog."""
|
|
43
|
+
segments = []
|
|
44
|
+
for cat in catalog.get("categories", []):
|
|
45
|
+
for sub in cat.get("subcategories", []):
|
|
46
|
+
for seg in sub.get("segments", []):
|
|
47
|
+
segments.append(seg)
|
|
48
|
+
return segments
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def filter_segments(
|
|
52
|
+
segments: list[dict],
|
|
53
|
+
*,
|
|
54
|
+
task: str | None = None,
|
|
55
|
+
category: str | None = None,
|
|
56
|
+
) -> list[dict]:
|
|
57
|
+
"""Filter segments by task (station) and/or category."""
|
|
58
|
+
out = segments
|
|
59
|
+
if task:
|
|
60
|
+
task_lower = task.lower()
|
|
61
|
+
out = [s for s in out if s.get("station", "").lower() == task_lower]
|
|
62
|
+
if category:
|
|
63
|
+
cat_lower = category.lower()
|
|
64
|
+
out = [s for s in out if s.get("category", "").lower() == cat_lower]
|
|
65
|
+
return out
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def filter_categories(
|
|
69
|
+
categories: list[dict],
|
|
70
|
+
*,
|
|
71
|
+
task: str | None = None,
|
|
72
|
+
category: str | None = None,
|
|
73
|
+
) -> list[dict]:
|
|
74
|
+
"""Filter category groups. Returns a new list with matching entries."""
|
|
75
|
+
out = categories
|
|
76
|
+
if category:
|
|
77
|
+
cat_lower = category.lower()
|
|
78
|
+
out = [c for c in out if c["category"].lower() == cat_lower]
|
|
79
|
+
if task:
|
|
80
|
+
# Filter segments within each category/subcategory by station
|
|
81
|
+
task_lower = task.lower()
|
|
82
|
+
filtered = []
|
|
83
|
+
for cat in out:
|
|
84
|
+
new_subs = []
|
|
85
|
+
for sub in cat.get("subcategories", []):
|
|
86
|
+
matching = [
|
|
87
|
+
s for s in sub.get("segments", [])
|
|
88
|
+
if s.get("station", "").lower() == task_lower
|
|
89
|
+
]
|
|
90
|
+
if matching:
|
|
91
|
+
new_subs.append({
|
|
92
|
+
**sub,
|
|
93
|
+
"segments": matching,
|
|
94
|
+
"segmentCount": len(matching),
|
|
95
|
+
"totalDurationSec": sum(s.get("durationSec", 0) for s in matching),
|
|
96
|
+
})
|
|
97
|
+
if new_subs:
|
|
98
|
+
filtered.append({
|
|
99
|
+
**cat,
|
|
100
|
+
"subcategories": new_subs,
|
|
101
|
+
"segmentCount": sum(s["segmentCount"] for s in new_subs),
|
|
102
|
+
"totalDurationSec": sum(s["totalDurationSec"] for s in new_subs),
|
|
103
|
+
})
|
|
104
|
+
out = filtered
|
|
105
|
+
return out
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def segment_r2_prefix(seg: dict) -> str:
|
|
109
|
+
"""Build the output-bucket prefix for a segment.
|
|
110
|
+
|
|
111
|
+
Segment IDs are like 'station-1__episode_042_seg5'.
|
|
112
|
+
Output bucket layout: station-1/episode_042_seg5/
|
|
113
|
+
"""
|
|
114
|
+
sid = seg["id"]
|
|
115
|
+
parts = sid.split("__", 1)
|
|
116
|
+
if len(parts) == 2:
|
|
117
|
+
return f"{parts[0]}/{parts[1]}"
|
|
118
|
+
return sid
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def segment_file_list(
|
|
122
|
+
seg: dict,
|
|
123
|
+
*,
|
|
124
|
+
include: list[str] | None = None,
|
|
125
|
+
exclude: list[str] | None = None,
|
|
126
|
+
) -> list[str]:
|
|
127
|
+
"""Return list of R2 keys for a segment, after include/exclude filtering."""
|
|
128
|
+
prefix = segment_r2_prefix(seg)
|
|
129
|
+
keys = [f"{prefix}/{f}" for f in SEGMENT_FILES]
|
|
130
|
+
|
|
131
|
+
if include:
|
|
132
|
+
keys = [
|
|
133
|
+
k for k in keys
|
|
134
|
+
if any(fnmatch(k.rsplit("/", 1)[-1], pat) for pat in include)
|
|
135
|
+
]
|
|
136
|
+
if exclude:
|
|
137
|
+
keys = [
|
|
138
|
+
k for k in keys
|
|
139
|
+
if not any(fnmatch(k.rsplit("/", 1)[-1], pat) for pat in exclude)
|
|
140
|
+
]
|
|
141
|
+
|
|
142
|
+
return keys
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
"""Click commands for the ego modality."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
import click
|
|
9
|
+
|
|
10
|
+
from verlet.display import category_table, console, format_duration, segment_table
|
|
11
|
+
from verlet.ego.catalog import (
|
|
12
|
+
fetch_catalog,
|
|
13
|
+
filter_categories,
|
|
14
|
+
filter_segments,
|
|
15
|
+
flatten_segments,
|
|
16
|
+
segment_file_list,
|
|
17
|
+
segment_r2_prefix,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@click.group("ego")
|
|
22
|
+
def ego_group():
|
|
23
|
+
"""EgoDex hand-pose data."""
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@ego_group.command("list")
|
|
27
|
+
@click.option("--task", default=None, help="Filter by station/task name.")
|
|
28
|
+
@click.option("--category", default=None, help="Filter by category.")
|
|
29
|
+
@click.option("--detailed", is_flag=True, help="Show individual segments.")
|
|
30
|
+
def list_cmd(task: str | None, category: str | None, detailed: bool):
|
|
31
|
+
"""List available EgoDex data."""
|
|
32
|
+
catalog = fetch_catalog()
|
|
33
|
+
|
|
34
|
+
if detailed:
|
|
35
|
+
segments = flatten_segments(catalog)
|
|
36
|
+
segments = filter_segments(segments, task=task, category=category)
|
|
37
|
+
if not segments:
|
|
38
|
+
console.print("No segments found matching filters.")
|
|
39
|
+
return
|
|
40
|
+
console.print(segment_table(segments))
|
|
41
|
+
console.print(f"\n[bold]{len(segments)}[/bold] segments, "
|
|
42
|
+
f"{format_duration(sum(s.get('durationSec', 0) for s in segments))} total")
|
|
43
|
+
else:
|
|
44
|
+
categories = catalog.get("categories", [])
|
|
45
|
+
categories = filter_categories(categories, task=task, category=category)
|
|
46
|
+
if not categories:
|
|
47
|
+
console.print("No categories found matching filters.")
|
|
48
|
+
return
|
|
49
|
+
console.print(category_table(categories))
|
|
50
|
+
total_segs = sum(c.get("segmentCount", 0) for c in categories)
|
|
51
|
+
total_dur = sum(c.get("totalDurationSec", 0) for c in categories)
|
|
52
|
+
console.print(f"\n[bold]{total_segs}[/bold] segments, {format_duration(total_dur)} total")
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@ego_group.command("info")
|
|
56
|
+
@click.argument("segment_id")
|
|
57
|
+
def info_cmd(segment_id: str):
|
|
58
|
+
"""Show details for a specific segment."""
|
|
59
|
+
catalog = fetch_catalog()
|
|
60
|
+
segments = flatten_segments(catalog)
|
|
61
|
+
|
|
62
|
+
seg = next((s for s in segments if s["id"] == segment_id), None)
|
|
63
|
+
if not seg:
|
|
64
|
+
raise click.ClickException(f"Segment '{segment_id}' not found.")
|
|
65
|
+
|
|
66
|
+
console.print(f"[bold]Segment:[/bold] {seg['id']}")
|
|
67
|
+
console.print(f"[bold]Station:[/bold] {seg.get('station', 'N/A')}")
|
|
68
|
+
console.print(f"[bold]Category:[/bold] {seg.get('category', 'N/A')}")
|
|
69
|
+
console.print(f"[bold]Subcategory:[/bold] {seg.get('subcategory', 'N/A')}")
|
|
70
|
+
console.print(f"[bold]Duration:[/bold] {format_duration(seg.get('durationSec', 0))}")
|
|
71
|
+
console.print(f"[bold]Time range:[/bold] {seg.get('startSec', 0):.1f}s — {seg.get('endSec', 0):.1f}s")
|
|
72
|
+
|
|
73
|
+
cam = seg.get("cameraInfo")
|
|
74
|
+
if cam:
|
|
75
|
+
console.print(f"[bold]Camera:[/bold] fx={cam['fx']:.1f} fy={cam['fy']:.1f} "
|
|
76
|
+
f"ppx={cam['ppx']:.1f} ppy={cam['ppy']:.1f}")
|
|
77
|
+
|
|
78
|
+
prefix = segment_r2_prefix(seg)
|
|
79
|
+
console.print(f"[bold]R2 prefix:[/bold] {prefix}/")
|
|
80
|
+
|
|
81
|
+
console.print("\n[bold]Files:[/bold]")
|
|
82
|
+
for key in segment_file_list(seg):
|
|
83
|
+
console.print(f" {key}")
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@ego_group.command("download")
|
|
87
|
+
@click.option("-o", "--output", default="./verlet-data", type=click.Path(), help="Output directory.")
|
|
88
|
+
@click.option("--task", default=None, help="Filter by station/task name.")
|
|
89
|
+
@click.option("--category", default=None, help="Filter by category.")
|
|
90
|
+
@click.option("--include", default=None, help="Comma-separated glob patterns for files to include.")
|
|
91
|
+
@click.option("--exclude", default=None, help="Comma-separated glob patterns for files to exclude.")
|
|
92
|
+
@click.option("--parallel", default=8, type=int, help="Number of concurrent downloads.")
|
|
93
|
+
@click.option("--dry-run", is_flag=True, help="Show download plan without downloading.")
|
|
94
|
+
def download_cmd(
|
|
95
|
+
output: str,
|
|
96
|
+
task: str | None,
|
|
97
|
+
category: str | None,
|
|
98
|
+
include: str | None,
|
|
99
|
+
exclude: str | None,
|
|
100
|
+
parallel: int,
|
|
101
|
+
dry_run: bool,
|
|
102
|
+
):
|
|
103
|
+
"""Download EgoDex data segments."""
|
|
104
|
+
catalog = fetch_catalog()
|
|
105
|
+
segments = flatten_segments(catalog)
|
|
106
|
+
segments = filter_segments(segments, task=task, category=category)
|
|
107
|
+
|
|
108
|
+
if not segments:
|
|
109
|
+
console.print("No segments found matching filters.")
|
|
110
|
+
return
|
|
111
|
+
|
|
112
|
+
include_pats = [p.strip() for p in include.split(",")] if include else None
|
|
113
|
+
exclude_pats = [p.strip() for p in exclude.split(",")] if exclude else None
|
|
114
|
+
|
|
115
|
+
# Build download plan: list of (r2_key, local_path)
|
|
116
|
+
base_dir = Path(output) / "ego"
|
|
117
|
+
file_plan: list[tuple[str, Path]] = []
|
|
118
|
+
|
|
119
|
+
for seg in segments:
|
|
120
|
+
keys = segment_file_list(seg, include=include_pats, exclude=exclude_pats)
|
|
121
|
+
prefix = segment_r2_prefix(seg)
|
|
122
|
+
for key in keys:
|
|
123
|
+
# key is like "station-1/episode_042_seg5/hands.npz"
|
|
124
|
+
rel = key # already relative
|
|
125
|
+
local_path = base_dir / rel
|
|
126
|
+
file_plan.append((key, local_path))
|
|
127
|
+
|
|
128
|
+
# Skip already-downloaded files (size check happens during download)
|
|
129
|
+
existing = sum(1 for _, p in file_plan if p.exists())
|
|
130
|
+
|
|
131
|
+
console.print(f"[bold]{len(segments)}[/bold] segments, "
|
|
132
|
+
f"[bold]{len(file_plan)}[/bold] files to download")
|
|
133
|
+
if existing:
|
|
134
|
+
console.print(f" ({existing} files already exist locally — will verify sizes)")
|
|
135
|
+
console.print(f" Output: {base_dir.resolve()}")
|
|
136
|
+
|
|
137
|
+
if dry_run:
|
|
138
|
+
console.print("\n[bold]Download plan:[/bold]")
|
|
139
|
+
for key, local in file_plan:
|
|
140
|
+
status = "[green]exists[/]" if local.exists() else "[dim]pending[/]"
|
|
141
|
+
console.print(f" {status} {key}")
|
|
142
|
+
return
|
|
143
|
+
|
|
144
|
+
from verlet.ego.download import download_files
|
|
145
|
+
|
|
146
|
+
ok, fail = asyncio.run(download_files(file_plan, parallel=parallel))
|
|
147
|
+
|
|
148
|
+
console.print(f"\n[bold green]{ok}[/] downloaded", end="")
|
|
149
|
+
if fail:
|
|
150
|
+
console.print(f", [bold red]{fail}[/] failed", end="")
|
|
151
|
+
console.print()
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
"""Async download engine with presigned URLs and Rich progress."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
import httpx
|
|
9
|
+
from rich.progress import (
|
|
10
|
+
BarColumn,
|
|
11
|
+
DownloadColumn,
|
|
12
|
+
Progress,
|
|
13
|
+
TaskID,
|
|
14
|
+
TextColumn,
|
|
15
|
+
TimeRemainingColumn,
|
|
16
|
+
TransferSpeedColumn,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
from verlet.auth import get_api_base, get_auth_headers
|
|
20
|
+
from verlet.display import console
|
|
21
|
+
|
|
22
|
+
CHUNK_SIZE = 256 * 1024 # 256 KB
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
async def _presign(
|
|
26
|
+
client: httpx.AsyncClient,
|
|
27
|
+
api_base: str,
|
|
28
|
+
headers: dict[str, str],
|
|
29
|
+
r2_key: str,
|
|
30
|
+
) -> str:
|
|
31
|
+
"""Get a presigned URL for an R2 key."""
|
|
32
|
+
resp = await client.get(
|
|
33
|
+
f"{api_base}/api/presign",
|
|
34
|
+
params={"key": r2_key, "bucket": "output"},
|
|
35
|
+
headers=headers,
|
|
36
|
+
)
|
|
37
|
+
if resp.status_code == 401:
|
|
38
|
+
raise RuntimeError("Session expired. Run 'verlet login' to re-authenticate.")
|
|
39
|
+
resp.raise_for_status()
|
|
40
|
+
return resp.json()["url"]
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
async def _download_file(
|
|
44
|
+
client: httpx.AsyncClient,
|
|
45
|
+
api_base: str,
|
|
46
|
+
auth_headers: dict[str, str],
|
|
47
|
+
r2_key: str,
|
|
48
|
+
local_path: Path,
|
|
49
|
+
semaphore: asyncio.Semaphore,
|
|
50
|
+
progress: Progress,
|
|
51
|
+
overall_task: TaskID,
|
|
52
|
+
retries: int = 3,
|
|
53
|
+
) -> bool:
|
|
54
|
+
"""Download a single file with retries and progress tracking."""
|
|
55
|
+
tmp_path = local_path.with_suffix(local_path.suffix + ".tmp")
|
|
56
|
+
|
|
57
|
+
for attempt in range(1, retries + 1):
|
|
58
|
+
try:
|
|
59
|
+
async with semaphore:
|
|
60
|
+
url = await _presign(client, api_base, auth_headers, r2_key)
|
|
61
|
+
|
|
62
|
+
async with client.stream("GET", url) as resp:
|
|
63
|
+
resp.raise_for_status()
|
|
64
|
+
total = int(resp.headers.get("content-length", 0))
|
|
65
|
+
|
|
66
|
+
# Skip if local file matches remote size
|
|
67
|
+
if local_path.exists() and local_path.stat().st_size == total and total > 0:
|
|
68
|
+
progress.advance(overall_task, total)
|
|
69
|
+
return True
|
|
70
|
+
|
|
71
|
+
local_path.parent.mkdir(parents=True, exist_ok=True)
|
|
72
|
+
|
|
73
|
+
file_task = progress.add_task(
|
|
74
|
+
f" {r2_key.rsplit('/', 1)[-1]}",
|
|
75
|
+
total=total or None,
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
with open(tmp_path, "wb") as f:
|
|
79
|
+
async for chunk in resp.aiter_bytes(CHUNK_SIZE):
|
|
80
|
+
f.write(chunk)
|
|
81
|
+
progress.advance(file_task, len(chunk))
|
|
82
|
+
progress.advance(overall_task, len(chunk))
|
|
83
|
+
|
|
84
|
+
progress.remove_task(file_task)
|
|
85
|
+
|
|
86
|
+
# Atomic rename
|
|
87
|
+
tmp_path.rename(local_path)
|
|
88
|
+
return True
|
|
89
|
+
|
|
90
|
+
except Exception as e:
|
|
91
|
+
if tmp_path.exists():
|
|
92
|
+
tmp_path.unlink()
|
|
93
|
+
if attempt == retries:
|
|
94
|
+
console.print(f"[red]Failed[/] {r2_key}: {e}")
|
|
95
|
+
return False
|
|
96
|
+
await asyncio.sleep(2 ** attempt)
|
|
97
|
+
|
|
98
|
+
return False
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
async def download_files(
|
|
102
|
+
file_plan: list[tuple[str, Path]],
|
|
103
|
+
parallel: int = 8,
|
|
104
|
+
) -> tuple[int, int]:
|
|
105
|
+
"""Download a list of (r2_key, local_path) pairs concurrently.
|
|
106
|
+
|
|
107
|
+
Returns (success_count, fail_count).
|
|
108
|
+
"""
|
|
109
|
+
if not file_plan:
|
|
110
|
+
console.print("Nothing to download.")
|
|
111
|
+
return 0, 0
|
|
112
|
+
|
|
113
|
+
# Check which files can be skipped (already downloaded with correct size)
|
|
114
|
+
api_base = get_api_base()
|
|
115
|
+
auth_headers = get_auth_headers()
|
|
116
|
+
semaphore = asyncio.Semaphore(parallel)
|
|
117
|
+
|
|
118
|
+
# Estimate total bytes (unknown until presign, so we track dynamically)
|
|
119
|
+
with Progress(
|
|
120
|
+
TextColumn("[bold blue]{task.description}"),
|
|
121
|
+
BarColumn(),
|
|
122
|
+
DownloadColumn(),
|
|
123
|
+
TransferSpeedColumn(),
|
|
124
|
+
TimeRemainingColumn(),
|
|
125
|
+
console=console,
|
|
126
|
+
) as progress:
|
|
127
|
+
overall_task = progress.add_task("Downloading", total=0)
|
|
128
|
+
|
|
129
|
+
# First pass: get total size by presigning all files
|
|
130
|
+
# (done lazily during download to avoid expiry)
|
|
131
|
+
|
|
132
|
+
async with httpx.AsyncClient(timeout=httpx.Timeout(60, connect=15)) as client:
|
|
133
|
+
tasks = [
|
|
134
|
+
_download_file(
|
|
135
|
+
client, api_base, auth_headers,
|
|
136
|
+
r2_key, local_path,
|
|
137
|
+
semaphore, progress, overall_task,
|
|
138
|
+
)
|
|
139
|
+
for r2_key, local_path in file_plan
|
|
140
|
+
]
|
|
141
|
+
results = await asyncio.gather(*tasks)
|
|
142
|
+
|
|
143
|
+
ok = sum(1 for r in results if r)
|
|
144
|
+
fail = sum(1 for r in results if not r)
|
|
145
|
+
return ok, fail
|