trossen-cloud-cli 0.1.2__tar.gz → 1.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/.github/workflows/publish.yml +17 -0
- {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/PKG-INFO +1 -1
- {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/pyproject.toml +1 -1
- trossen_cloud_cli-1.2.0/scripts/release.sh +70 -0
- {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/src/trossen_cloud_cli/__init__.py +3 -1
- {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/src/trossen_cloud_cli/commands/datasets.py +35 -2
- {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/src/trossen_cloud_cli/config.py +2 -2
- {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/src/trossen_cloud_cli/download.py +4 -1
- {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/src/trossen_cloud_cli/types.py +0 -2
- {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/src/trossen_cloud_cli/upload.py +43 -19
- trossen_cloud_cli-1.2.0/src/trossen_cloud_cli/validators/__init__.py +26 -0
- trossen_cloud_cli-1.2.0/src/trossen_cloud_cli/validators/lerobot.py +308 -0
- trossen_cloud_cli-1.2.0/src/trossen_cloud_cli/validators/mcap.py +175 -0
- {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/tests/test_api_endpoints.py +9 -9
- {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/tests/test_config.py +3 -3
- {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/tests/test_types.py +0 -2
- {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/tests/test_upload_scale.py +3 -0
- trossen_cloud_cli-1.2.0/tests/test_upload_truncation.py +202 -0
- trossen_cloud_cli-1.2.0/tests/test_validators.py +617 -0
- {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/.gitignore +0 -0
- {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/LICENSE +0 -0
- {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/README.md +0 -0
- {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/docs/configuration.md +0 -0
- {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/src/trossen_cloud_cli/__main__.py +0 -0
- {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/src/trossen_cloud_cli/api_client.py +0 -0
- {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/src/trossen_cloud_cli/auth.py +0 -0
- {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/src/trossen_cloud_cli/cli.py +0 -0
- {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/src/trossen_cloud_cli/commands/__init__.py +0 -0
- {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/src/trossen_cloud_cli/commands/auth.py +0 -0
- {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/src/trossen_cloud_cli/commands/config.py +0 -0
- {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/src/trossen_cloud_cli/commands/models.py +0 -0
- {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/src/trossen_cloud_cli/commands/training_jobs.py +0 -0
- {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/src/trossen_cloud_cli/output.py +0 -0
- {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/src/trossen_cloud_cli/progress.py +0 -0
- {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/tests/__init__.py +0 -0
- {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/tests/test_cli.py +0 -0
- {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/tests/test_progress.py +0 -0
|
@@ -17,7 +17,24 @@ on:
|
|
|
17
17
|
default: testpypi
|
|
18
18
|
|
|
19
19
|
jobs:
|
|
20
|
+
test:
|
|
21
|
+
runs-on: ubuntu-latest
|
|
22
|
+
steps:
|
|
23
|
+
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
|
24
|
+
with:
|
|
25
|
+
ref: ${{ inputs.release_tag }}
|
|
26
|
+
|
|
27
|
+
- name: Install uv
|
|
28
|
+
uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # v8.0.0
|
|
29
|
+
|
|
30
|
+
- name: Install dependencies
|
|
31
|
+
run: uv sync --frozen --extra dev
|
|
32
|
+
|
|
33
|
+
- name: Run checks
|
|
34
|
+
run: make check
|
|
35
|
+
|
|
20
36
|
build:
|
|
37
|
+
needs: test
|
|
21
38
|
runs-on: ubuntu-latest
|
|
22
39
|
steps:
|
|
23
40
|
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: trossen_cloud_cli
|
|
3
|
-
Version:
|
|
3
|
+
Version: 1.2.0
|
|
4
4
|
Summary: CLI for interacting with Trossen Cloud APIs
|
|
5
5
|
Project-URL: Homepage, https://github.com/TrossenRobotics/trossen_cloud_cli
|
|
6
6
|
Project-URL: Repository, https://github.com/TrossenRobotics/trossen_cloud_cli
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
#
|
|
3
|
+
# Cut a release: bump pyproject.toml, commit, tag, push, create a GitHub Release.
|
|
4
|
+
# Prints the command for triggering publish.yml manually after the release is up.
|
|
5
|
+
#
|
|
6
|
+
# Usage: scripts/release.sh v<MAJOR.MINOR.PATCH> [--dry-run]
|
|
7
|
+
|
|
8
|
+
set -euo pipefail
|
|
9
|
+
|
|
10
|
+
INPUT="${1:?usage: scripts/release.sh v<MAJOR.MINOR.PATCH> [--dry-run]}"
|
|
11
|
+
DRY_RUN=false
|
|
12
|
+
[[ "${2:-}" == "--dry-run" ]] && DRY_RUN=true
|
|
13
|
+
|
|
14
|
+
# Strict vX.Y.Z only — no pre-release suffixes.
|
|
15
|
+
[[ "$INPUT" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]] \
|
|
16
|
+
|| { echo "error: version must be vX.Y.Z (got '$INPUT')"; exit 1; }
|
|
17
|
+
TAG="$INPUT"
|
|
18
|
+
VERSION="${INPUT#v}" # strip 'v' for pyproject.toml (PEP 440 disallows it).
|
|
19
|
+
|
|
20
|
+
# Pre-flight: clean tree, on main, in sync, tag doesn't exist, version changed.
|
|
21
|
+
[[ -z "$(git status --porcelain)" ]] || { echo "error: working tree dirty"; exit 1; }
|
|
22
|
+
[[ "$(git rev-parse --abbrev-ref HEAD)" == "main" ]] || { echo "error: not on main"; exit 1; }
|
|
23
|
+
git fetch origin main --tags --quiet
|
|
24
|
+
[[ "$(git rev-parse HEAD)" == "$(git rev-parse origin/main)" ]] \
|
|
25
|
+
|| { echo "error: local main not in sync with origin/main"; exit 1; }
|
|
26
|
+
if git rev-parse "$TAG" >/dev/null 2>&1; then
|
|
27
|
+
echo "error: tag $TAG already exists"; exit 1
|
|
28
|
+
fi
|
|
29
|
+
|
|
30
|
+
CURRENT="$(grep -E '^version = ' pyproject.toml | head -1 | sed -E 's/version = "(.*)"/\1/')"
|
|
31
|
+
[[ "$CURRENT" != "$VERSION" ]] || { echo "error: pyproject.toml already at $VERSION"; exit 1; }
|
|
32
|
+
|
|
33
|
+
echo "Running make check..."
|
|
34
|
+
make check
|
|
35
|
+
|
|
36
|
+
# Preview release notes before committing anything irreversible.
|
|
37
|
+
PREV_TAG="$(git tag --list 'v*' --sort=version:refname | tail -1)"
|
|
38
|
+
NOTES_ARGS=(-f tag_name="$TAG" -f target_commitish="$(git rev-parse HEAD)")
|
|
39
|
+
[[ -n "$PREV_TAG" ]] && NOTES_ARGS+=(-f previous_tag_name="$PREV_TAG")
|
|
40
|
+
|
|
41
|
+
echo
|
|
42
|
+
echo "=== Auto-generated release notes preview ==="
|
|
43
|
+
gh api repos/{owner}/{repo}/releases/generate-notes "${NOTES_ARGS[@]}" --jq .body
|
|
44
|
+
echo "============================================"
|
|
45
|
+
echo
|
|
46
|
+
|
|
47
|
+
if $DRY_RUN; then
|
|
48
|
+
echo "Dry run: stopping before any changes."
|
|
49
|
+
exit 0
|
|
50
|
+
fi
|
|
51
|
+
|
|
52
|
+
read -rp "Proceed with release $TAG? [y/N] " ans
|
|
53
|
+
[[ "$ans" == "y" || "$ans" == "Y" ]] || { echo "aborted"; exit 1; }
|
|
54
|
+
|
|
55
|
+
# Bump, commit, tag, push.
|
|
56
|
+
sed -i.bak -E "s/^version = \".*\"/version = \"$VERSION\"/" pyproject.toml
|
|
57
|
+
rm pyproject.toml.bak
|
|
58
|
+
git add pyproject.toml
|
|
59
|
+
git commit -m "Release $TAG"
|
|
60
|
+
git tag -a "$TAG" -m "Release $TAG"
|
|
61
|
+
git push origin main "$TAG"
|
|
62
|
+
|
|
63
|
+
# Create the GitHub Release with auto-generated notes (the same ones previewed above).
|
|
64
|
+
gh release create "$TAG" --title "$TAG" --generate-notes
|
|
65
|
+
|
|
66
|
+
echo
|
|
67
|
+
echo "Release $TAG created. Smoke-test on testpypi first, then publish to pypi:"
|
|
68
|
+
echo " gh workflow run publish.yml -f release_tag=$TAG -f index=testpypi"
|
|
69
|
+
echo " gh workflow run publish.yml -f release_tag=$TAG -f index=pypi"
|
|
70
|
+
echo "Or run the workflow from the GitHub Actions UI."
|
{trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/src/trossen_cloud_cli/commands/datasets.py
RENAMED
|
@@ -14,9 +14,10 @@ from rich.table import Table
|
|
|
14
14
|
from ..api_client import ApiClient, ApiError
|
|
15
15
|
from ..auth import require_auth
|
|
16
16
|
from ..download import download_dataset
|
|
17
|
-
from ..output import console, print_error, print_info, print_success
|
|
17
|
+
from ..output import console, print_error, print_info, print_success, print_warning
|
|
18
18
|
from ..types import DatasetType, PrivacyLevel
|
|
19
19
|
from ..upload import UploadError, create_and_upload_dataset
|
|
20
|
+
from ..validators import validate_dataset
|
|
20
21
|
|
|
21
22
|
app = typer.Typer(help="Manage datasets")
|
|
22
23
|
|
|
@@ -57,7 +58,7 @@ def upload(
|
|
|
57
58
|
dataset_type: Annotated[
|
|
58
59
|
DatasetType,
|
|
59
60
|
typer.Option("--type", "-t", help="Dataset type"),
|
|
60
|
-
]
|
|
61
|
+
],
|
|
61
62
|
privacy: Annotated[
|
|
62
63
|
PrivacyLevel,
|
|
63
64
|
typer.Option("--privacy", "-p", help="Privacy level"),
|
|
@@ -66,6 +67,10 @@ def upload(
|
|
|
66
67
|
str | None,
|
|
67
68
|
typer.Option("--metadata", "-m", help="JSON metadata string"),
|
|
68
69
|
] = None,
|
|
70
|
+
force: Annotated[
|
|
71
|
+
bool,
|
|
72
|
+
typer.Option("--force", "-f", help="Skip validation confirmation prompt"),
|
|
73
|
+
] = False,
|
|
69
74
|
) -> None:
|
|
70
75
|
"""
|
|
71
76
|
Upload a dataset to Trossen Cloud.
|
|
@@ -81,6 +86,18 @@ def upload(
|
|
|
81
86
|
print_error("Invalid JSON metadata")
|
|
82
87
|
raise typer.Exit(1)
|
|
83
88
|
|
|
89
|
+
# Validate dataset before upload
|
|
90
|
+
validation_warnings = validate_dataset(path, dataset_type)
|
|
91
|
+
if validation_warnings:
|
|
92
|
+
console.print(
|
|
93
|
+
f"\n[warning]Found {len(validation_warnings)} validation warning(s):[/warning]"
|
|
94
|
+
)
|
|
95
|
+
for w in validation_warnings:
|
|
96
|
+
print_warning(w)
|
|
97
|
+
console.print()
|
|
98
|
+
if not force and not typer.confirm("Continue with upload?"):
|
|
99
|
+
raise typer.Exit(0)
|
|
100
|
+
|
|
84
101
|
try:
|
|
85
102
|
dataset = asyncio.run(
|
|
86
103
|
create_and_upload_dataset(
|
|
@@ -152,6 +169,10 @@ def import_hf(
|
|
|
152
169
|
bool,
|
|
153
170
|
typer.Option("--keep-local", help="Keep the downloaded files after upload"),
|
|
154
171
|
] = False,
|
|
172
|
+
force: Annotated[
|
|
173
|
+
bool,
|
|
174
|
+
typer.Option("--force", "-f", help="Skip validation confirmation prompt"),
|
|
175
|
+
] = False,
|
|
155
176
|
) -> None:
|
|
156
177
|
"""
|
|
157
178
|
Import a public HuggingFace dataset into Trossen Cloud.
|
|
@@ -201,6 +222,18 @@ def import_hf(
|
|
|
201
222
|
|
|
202
223
|
print_success(f"Downloaded to {local_path}")
|
|
203
224
|
|
|
225
|
+
# Validate dataset before upload
|
|
226
|
+
validation_warnings = validate_dataset(local_path, dataset_type)
|
|
227
|
+
if validation_warnings:
|
|
228
|
+
console.print(
|
|
229
|
+
f"\n[warning]Found {len(validation_warnings)} validation warning(s):[/warning]"
|
|
230
|
+
)
|
|
231
|
+
for w in validation_warnings:
|
|
232
|
+
print_warning(w)
|
|
233
|
+
console.print()
|
|
234
|
+
if not force and not typer.confirm("Continue with upload?"):
|
|
235
|
+
raise typer.Exit(0)
|
|
236
|
+
|
|
204
237
|
# Upload to Trossen Cloud
|
|
205
238
|
dataset = asyncio.run(
|
|
206
239
|
create_and_upload_dataset(
|
|
@@ -114,7 +114,10 @@ async def download_resource(
|
|
|
114
114
|
|
|
115
115
|
async with httpx.AsyncClient(
|
|
116
116
|
timeout=httpx.Timeout(300.0, connect=30.0),
|
|
117
|
-
limits=httpx.Limits(
|
|
117
|
+
limits=httpx.Limits(
|
|
118
|
+
max_connections=config.download.parallel_files,
|
|
119
|
+
max_keepalive_connections=config.download.parallel_files,
|
|
120
|
+
),
|
|
118
121
|
) as download_client:
|
|
119
122
|
if show_progress:
|
|
120
123
|
total_size = sum(f["size_bytes"] for f in raw_files)
|
|
@@ -135,6 +135,7 @@ UPLOAD_MAX_RETRIES = 5
|
|
|
135
135
|
STREAM_CHUNK_SIZE = 256 * 1024 # 256 KB chunks for streaming progress
|
|
136
136
|
BATCH_CHUNK_SIZE = 500 # Max files per batch API call
|
|
137
137
|
STATE_SAVE_INTERVAL = 10 # Save state every N part completions
|
|
138
|
+
MAX_UPLOAD_CONNECTIONS = 128 # Safety cap on the upload connection pool
|
|
138
139
|
|
|
139
140
|
|
|
140
141
|
async def upload_part(
|
|
@@ -167,39 +168,53 @@ async def upload_part(
|
|
|
167
168
|
offset = (part_number - 1) * part_size
|
|
168
169
|
chunk_size = min(part_size, file_size - offset)
|
|
169
170
|
|
|
170
|
-
def _read_data() -> bytes:
|
|
171
|
-
with open(file_path, "rb") as f:
|
|
172
|
-
f.seek(offset)
|
|
173
|
-
return f.read(chunk_size)
|
|
174
|
-
|
|
175
171
|
last_error: Exception | None = None
|
|
176
172
|
for attempt in range(UPLOAD_MAX_RETRIES):
|
|
177
|
-
|
|
173
|
+
# Re-check on every attempt: the file could be truncated/replaced between attempts,
|
|
174
|
+
# invalidating chunk_size and Content-Length.
|
|
175
|
+
current_size = file_path.stat().st_size
|
|
176
|
+
if current_size < offset + chunk_size:
|
|
177
|
+
raise UploadError(
|
|
178
|
+
f"file {file_path} truncated mid-upload "
|
|
179
|
+
f"(expected at least {offset + chunk_size} bytes, found {current_size})"
|
|
180
|
+
)
|
|
181
|
+
|
|
178
182
|
bytes_sent_this_attempt = 0
|
|
179
183
|
|
|
180
184
|
async def _streaming_body():
|
|
181
185
|
"""
|
|
182
|
-
|
|
186
|
+
Stream the part body from disk in small chunks to bound memory.
|
|
183
187
|
"""
|
|
184
188
|
nonlocal bytes_sent_this_attempt
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
189
|
+
with open(file_path, "rb") as f:
|
|
190
|
+
f.seek(offset)
|
|
191
|
+
remaining = chunk_size
|
|
192
|
+
while remaining > 0:
|
|
193
|
+
buf = f.read(min(STREAM_CHUNK_SIZE, remaining))
|
|
194
|
+
if not buf:
|
|
195
|
+
# Truncated mid-stream after the pre-flight check passed.
|
|
196
|
+
raise UploadError(
|
|
197
|
+
f"file {file_path} truncated during upload ({remaining} bytes short)"
|
|
198
|
+
)
|
|
199
|
+
yield buf
|
|
200
|
+
if progress:
|
|
201
|
+
progress.advance_file(filename, len(buf))
|
|
202
|
+
bytes_sent_this_attempt += len(buf)
|
|
203
|
+
remaining -= len(buf)
|
|
194
204
|
|
|
195
205
|
try:
|
|
196
206
|
response = await upload_client.put(
|
|
197
207
|
upload_url,
|
|
198
208
|
content=_streaming_body(),
|
|
199
|
-
headers={"Content-Length": str(
|
|
209
|
+
headers={"Content-Length": str(chunk_size)},
|
|
200
210
|
)
|
|
201
211
|
response.raise_for_status()
|
|
202
212
|
return response.headers.get("ETag", "")
|
|
213
|
+
except UploadError:
|
|
214
|
+
# Truncation isn't recoverable; rewind progress and bail without retrying.
|
|
215
|
+
if progress and bytes_sent_this_attempt > 0:
|
|
216
|
+
progress.advance_file(filename, -bytes_sent_this_attempt)
|
|
217
|
+
raise
|
|
203
218
|
except (httpx.ConnectError, httpx.ConnectTimeout, httpx.HTTPStatusError) as e:
|
|
204
219
|
if isinstance(e, httpx.HTTPStatusError) and e.response.status_code < 500:
|
|
205
220
|
raise
|
|
@@ -217,6 +232,7 @@ async def _upload_file_parts(
|
|
|
217
232
|
upload_client: httpx.AsyncClient,
|
|
218
233
|
file_path: str,
|
|
219
234
|
local_path: Path,
|
|
235
|
+
file_size: int,
|
|
220
236
|
part_urls: dict[int, str],
|
|
221
237
|
part_size: int,
|
|
222
238
|
progress: TransferProgress | None = None,
|
|
@@ -228,6 +244,9 @@ async def _upload_file_parts(
|
|
|
228
244
|
:param upload_client: Shared async HTTP client for storage requests.
|
|
229
245
|
:param file_path: The relative path of the file within the resource.
|
|
230
246
|
:param local_path: The local filesystem path to the file.
|
|
247
|
+
:param file_size: The expected size in bytes (from the FileInfo captured at collection
|
|
248
|
+
time, used to generate the presigned URLs). The pre-flight truncation check inside
|
|
249
|
+
upload_part compares against this declared size, not a fresh stat.
|
|
231
250
|
:param part_urls: Mapping of part_number -> presigned URL.
|
|
232
251
|
:param part_size: Size of each part in bytes.
|
|
233
252
|
:param progress: Optional progress tracker for UI updates.
|
|
@@ -235,7 +254,6 @@ async def _upload_file_parts(
|
|
|
235
254
|
|
|
236
255
|
"""
|
|
237
256
|
config = get_config()
|
|
238
|
-
file_size = local_path.stat().st_size
|
|
239
257
|
semaphore = asyncio.Semaphore(config.upload.parallel_parts)
|
|
240
258
|
|
|
241
259
|
# Filter out parts that have already been uploaded (resume support)
|
|
@@ -395,10 +413,15 @@ async def upload_resource(
|
|
|
395
413
|
file_sem = asyncio.Semaphore(config.upload.parallel_files)
|
|
396
414
|
failed_files: list[str] = []
|
|
397
415
|
|
|
416
|
+
# Cap concurrent connections to avoid overwhelming the system
|
|
417
|
+
max_conns = min(
|
|
418
|
+
config.upload.parallel_files * config.upload.parallel_parts, MAX_UPLOAD_CONNECTIONS
|
|
419
|
+
)
|
|
398
420
|
async with httpx.AsyncClient(
|
|
399
421
|
timeout=httpx.Timeout(300.0),
|
|
400
422
|
limits=httpx.Limits(
|
|
401
|
-
max_connections=
|
|
423
|
+
max_connections=max_conns,
|
|
424
|
+
max_keepalive_connections=max_conns,
|
|
402
425
|
),
|
|
403
426
|
) as upload_client:
|
|
404
427
|
|
|
@@ -437,6 +460,7 @@ async def upload_resource(
|
|
|
437
460
|
upload_client,
|
|
438
461
|
fp,
|
|
439
462
|
actual_path,
|
|
463
|
+
file_info.size_bytes,
|
|
440
464
|
part_urls_map,
|
|
441
465
|
part_size,
|
|
442
466
|
progress,
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""Dataset validators for pre-upload structural checks."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from ..types import DatasetType
|
|
6
|
+
from .lerobot import validate_lerobot
|
|
7
|
+
from .mcap import validate_mcap
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def validate_dataset(path: Path, dataset_type: DatasetType) -> list[str]:
|
|
11
|
+
"""
|
|
12
|
+
Validate a dataset directory against its type-specific spec.
|
|
13
|
+
|
|
14
|
+
Returns a list of warning messages. An empty list means no issues found.
|
|
15
|
+
Only runs for dataset types that have a validator (mcap, lerobot).
|
|
16
|
+
"""
|
|
17
|
+
validators = {
|
|
18
|
+
DatasetType.MCAP: validate_mcap,
|
|
19
|
+
DatasetType.LEROBOT: validate_lerobot,
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
validator = validators.get(dataset_type)
|
|
23
|
+
if validator is None:
|
|
24
|
+
return []
|
|
25
|
+
|
|
26
|
+
return validator(path)
|