trossen-cloud-cli 0.1.2__tar.gz → 1.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/.github/workflows/publish.yml +17 -0
  2. {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/PKG-INFO +1 -1
  3. {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/pyproject.toml +1 -1
  4. trossen_cloud_cli-1.2.0/scripts/release.sh +70 -0
  5. {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/src/trossen_cloud_cli/__init__.py +3 -1
  6. {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/src/trossen_cloud_cli/commands/datasets.py +35 -2
  7. {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/src/trossen_cloud_cli/config.py +2 -2
  8. {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/src/trossen_cloud_cli/download.py +4 -1
  9. {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/src/trossen_cloud_cli/types.py +0 -2
  10. {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/src/trossen_cloud_cli/upload.py +43 -19
  11. trossen_cloud_cli-1.2.0/src/trossen_cloud_cli/validators/__init__.py +26 -0
  12. trossen_cloud_cli-1.2.0/src/trossen_cloud_cli/validators/lerobot.py +308 -0
  13. trossen_cloud_cli-1.2.0/src/trossen_cloud_cli/validators/mcap.py +175 -0
  14. {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/tests/test_api_endpoints.py +9 -9
  15. {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/tests/test_config.py +3 -3
  16. {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/tests/test_types.py +0 -2
  17. {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/tests/test_upload_scale.py +3 -0
  18. trossen_cloud_cli-1.2.0/tests/test_upload_truncation.py +202 -0
  19. trossen_cloud_cli-1.2.0/tests/test_validators.py +617 -0
  20. {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/.gitignore +0 -0
  21. {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/LICENSE +0 -0
  22. {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/README.md +0 -0
  23. {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/docs/configuration.md +0 -0
  24. {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/src/trossen_cloud_cli/__main__.py +0 -0
  25. {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/src/trossen_cloud_cli/api_client.py +0 -0
  26. {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/src/trossen_cloud_cli/auth.py +0 -0
  27. {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/src/trossen_cloud_cli/cli.py +0 -0
  28. {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/src/trossen_cloud_cli/commands/__init__.py +0 -0
  29. {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/src/trossen_cloud_cli/commands/auth.py +0 -0
  30. {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/src/trossen_cloud_cli/commands/config.py +0 -0
  31. {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/src/trossen_cloud_cli/commands/models.py +0 -0
  32. {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/src/trossen_cloud_cli/commands/training_jobs.py +0 -0
  33. {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/src/trossen_cloud_cli/output.py +0 -0
  34. {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/src/trossen_cloud_cli/progress.py +0 -0
  35. {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/tests/__init__.py +0 -0
  36. {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/tests/test_cli.py +0 -0
  37. {trossen_cloud_cli-0.1.2 → trossen_cloud_cli-1.2.0}/tests/test_progress.py +0 -0
@@ -17,7 +17,24 @@ on:
17
17
  default: testpypi
18
18
 
19
19
  jobs:
20
+ test:
21
+ runs-on: ubuntu-latest
22
+ steps:
23
+ - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
24
+ with:
25
+ ref: ${{ inputs.release_tag }}
26
+
27
+ - name: Install uv
28
+ uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # v8.0.0
29
+
30
+ - name: Install dependencies
31
+ run: uv sync --frozen --extra dev
32
+
33
+ - name: Run checks
34
+ run: make check
35
+
20
36
  build:
37
+ needs: test
21
38
  runs-on: ubuntu-latest
22
39
  steps:
23
40
  - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: trossen_cloud_cli
3
- Version: 0.1.2
3
+ Version: 1.2.0
4
4
  Summary: CLI for interacting with Trossen Cloud APIs
5
5
  Project-URL: Homepage, https://github.com/TrossenRobotics/trossen_cloud_cli
6
6
  Project-URL: Repository, https://github.com/TrossenRobotics/trossen_cloud_cli
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "trossen_cloud_cli"
7
- version = "0.1.2"
7
+ version = "1.2.0"
8
8
  description = "CLI for interacting with Trossen Cloud APIs"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.11"
@@ -0,0 +1,70 @@
1
+ #!/usr/bin/env bash
2
+ #
3
+ # Cut a release: bump pyproject.toml, commit, tag, push, create a GitHub Release.
4
+ # Prints the command for triggering publish.yml manually after the release is up.
5
+ #
6
+ # Usage: scripts/release.sh v<MAJOR.MINOR.PATCH> [--dry-run]
7
+
8
+ set -euo pipefail
9
+
10
+ INPUT="${1:?usage: scripts/release.sh v<MAJOR.MINOR.PATCH> [--dry-run]}"
11
+ DRY_RUN=false
12
+ [[ "${2:-}" == "--dry-run" ]] && DRY_RUN=true
13
+
14
+ # Strict vX.Y.Z only — no pre-release suffixes.
15
+ [[ "$INPUT" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]] \
16
+ || { echo "error: version must be vX.Y.Z (got '$INPUT')"; exit 1; }
17
+ TAG="$INPUT"
18
+ VERSION="${INPUT#v}" # strip 'v' for pyproject.toml (PEP 440 disallows it).
19
+
20
+ # Pre-flight: clean tree, on main, in sync, tag doesn't exist, version changed.
21
+ [[ -z "$(git status --porcelain)" ]] || { echo "error: working tree dirty"; exit 1; }
22
+ [[ "$(git rev-parse --abbrev-ref HEAD)" == "main" ]] || { echo "error: not on main"; exit 1; }
23
+ git fetch origin main --tags --quiet
24
+ [[ "$(git rev-parse HEAD)" == "$(git rev-parse origin/main)" ]] \
25
+ || { echo "error: local main not in sync with origin/main"; exit 1; }
26
+ if git rev-parse "$TAG" >/dev/null 2>&1; then
27
+ echo "error: tag $TAG already exists"; exit 1
28
+ fi
29
+
30
+ CURRENT="$(grep -E '^version = ' pyproject.toml | head -1 | sed -E 's/version = "(.*)"/\1/')"
31
+ [[ "$CURRENT" != "$VERSION" ]] || { echo "error: pyproject.toml already at $VERSION"; exit 1; }
32
+
33
+ echo "Running make check..."
34
+ make check
35
+
36
+ # Preview release notes before committing anything irreversible.
37
+ PREV_TAG="$(git tag --list 'v*' --sort=version:refname | tail -1)"
38
+ NOTES_ARGS=(-f tag_name="$TAG" -f target_commitish="$(git rev-parse HEAD)")
39
+ [[ -n "$PREV_TAG" ]] && NOTES_ARGS+=(-f previous_tag_name="$PREV_TAG")
40
+
41
+ echo
42
+ echo "=== Auto-generated release notes preview ==="
43
+ gh api repos/{owner}/{repo}/releases/generate-notes "${NOTES_ARGS[@]}" --jq .body
44
+ echo "============================================"
45
+ echo
46
+
47
+ if $DRY_RUN; then
48
+ echo "Dry run: stopping before any changes."
49
+ exit 0
50
+ fi
51
+
52
+ read -rp "Proceed with release $TAG? [y/N] " ans
53
+ [[ "$ans" == "y" || "$ans" == "Y" ]] || { echo "aborted"; exit 1; }
54
+
55
+ # Bump, commit, tag, push.
56
+ sed -i.bak -E "s/^version = \".*\"/version = \"$VERSION\"/" pyproject.toml
57
+ rm pyproject.toml.bak
58
+ git add pyproject.toml
59
+ git commit -m "Release $TAG"
60
+ git tag -a "$TAG" -m "Release $TAG"
61
+ git push origin main "$TAG"
62
+
63
+ # Create the GitHub Release with auto-generated notes (the same ones previewed above).
64
+ gh release create "$TAG" --title "$TAG" --generate-notes
65
+
66
+ echo
67
+ echo "Release $TAG created. Smoke-test on testpypi first, then publish to pypi:"
68
+ echo " gh workflow run publish.yml -f release_tag=$TAG -f index=testpypi"
69
+ echo " gh workflow run publish.yml -f release_tag=$TAG -f index=pypi"
70
+ echo "Or run the workflow from the GitHub Actions UI."
@@ -1,6 +1,8 @@
1
1
  """Trossen CLI - A Python CLI for Trossen Cloud."""
2
2
 
3
- __version__ = "0.1.0"
3
+ from importlib.metadata import version
4
+
5
+ __version__ = version("trossen_cloud_cli")
4
6
 
5
7
  from .cli import app
6
8
 
@@ -14,9 +14,10 @@ from rich.table import Table
14
14
  from ..api_client import ApiClient, ApiError
15
15
  from ..auth import require_auth
16
16
  from ..download import download_dataset
17
- from ..output import console, print_error, print_info, print_success
17
+ from ..output import console, print_error, print_info, print_success, print_warning
18
18
  from ..types import DatasetType, PrivacyLevel
19
19
  from ..upload import UploadError, create_and_upload_dataset
20
+ from ..validators import validate_dataset
20
21
 
21
22
  app = typer.Typer(help="Manage datasets")
22
23
 
@@ -57,7 +58,7 @@ def upload(
57
58
  dataset_type: Annotated[
58
59
  DatasetType,
59
60
  typer.Option("--type", "-t", help="Dataset type"),
60
- ] = DatasetType.RAW,
61
+ ],
61
62
  privacy: Annotated[
62
63
  PrivacyLevel,
63
64
  typer.Option("--privacy", "-p", help="Privacy level"),
@@ -66,6 +67,10 @@ def upload(
66
67
  str | None,
67
68
  typer.Option("--metadata", "-m", help="JSON metadata string"),
68
69
  ] = None,
70
+ force: Annotated[
71
+ bool,
72
+ typer.Option("--force", "-f", help="Skip validation confirmation prompt"),
73
+ ] = False,
69
74
  ) -> None:
70
75
  """
71
76
  Upload a dataset to Trossen Cloud.
@@ -81,6 +86,18 @@ def upload(
81
86
  print_error("Invalid JSON metadata")
82
87
  raise typer.Exit(1)
83
88
 
89
+ # Validate dataset before upload
90
+ validation_warnings = validate_dataset(path, dataset_type)
91
+ if validation_warnings:
92
+ console.print(
93
+ f"\n[warning]Found {len(validation_warnings)} validation warning(s):[/warning]"
94
+ )
95
+ for w in validation_warnings:
96
+ print_warning(w)
97
+ console.print()
98
+ if not force and not typer.confirm("Continue with upload?"):
99
+ raise typer.Exit(0)
100
+
84
101
  try:
85
102
  dataset = asyncio.run(
86
103
  create_and_upload_dataset(
@@ -152,6 +169,10 @@ def import_hf(
152
169
  bool,
153
170
  typer.Option("--keep-local", help="Keep the downloaded files after upload"),
154
171
  ] = False,
172
+ force: Annotated[
173
+ bool,
174
+ typer.Option("--force", "-f", help="Skip validation confirmation prompt"),
175
+ ] = False,
155
176
  ) -> None:
156
177
  """
157
178
  Import a public HuggingFace dataset into Trossen Cloud.
@@ -201,6 +222,18 @@ def import_hf(
201
222
 
202
223
  print_success(f"Downloaded to {local_path}")
203
224
 
225
+ # Validate dataset before upload
226
+ validation_warnings = validate_dataset(local_path, dataset_type)
227
+ if validation_warnings:
228
+ console.print(
229
+ f"\n[warning]Found {len(validation_warnings)} validation warning(s):[/warning]"
230
+ )
231
+ for w in validation_warnings:
232
+ print_warning(w)
233
+ console.print()
234
+ if not force and not typer.confirm("Continue with upload?"):
235
+ raise typer.Exit(0)
236
+
204
237
  # Upload to Trossen Cloud
205
238
  dataset = asyncio.run(
206
239
  create_and_upload_dataset(
@@ -14,8 +14,8 @@ class UploadConfig(BaseModel):
14
14
  """
15
15
 
16
16
  chunk_size_mb: int = 50
17
- parallel_parts: int = 6
18
- parallel_files: int = 32
17
+ parallel_parts: int = 8
18
+ parallel_files: int = 64
19
19
 
20
20
 
21
21
  class DownloadConfig(BaseModel):
@@ -114,7 +114,10 @@ async def download_resource(
114
114
 
115
115
  async with httpx.AsyncClient(
116
116
  timeout=httpx.Timeout(300.0, connect=30.0),
117
- limits=httpx.Limits(max_connections=config.download.parallel_files),
117
+ limits=httpx.Limits(
118
+ max_connections=config.download.parallel_files,
119
+ max_keepalive_connections=config.download.parallel_files,
120
+ ),
118
121
  ) as download_client:
119
122
  if show_progress:
120
123
  total_size = sum(f["size_bytes"] for f in raw_files)
@@ -12,8 +12,6 @@ class DatasetType(StrEnum):
12
12
 
13
13
  MCAP = "mcap"
14
14
  LEROBOT = "lerobot"
15
- TROSSEN = "trossen"
16
- RAW = "raw"
17
15
 
18
16
 
19
17
  class PrivacyLevel(StrEnum):
@@ -135,6 +135,7 @@ UPLOAD_MAX_RETRIES = 5
135
135
  STREAM_CHUNK_SIZE = 256 * 1024 # 256 KB chunks for streaming progress
136
136
  BATCH_CHUNK_SIZE = 500 # Max files per batch API call
137
137
  STATE_SAVE_INTERVAL = 10 # Save state every N part completions
138
+ MAX_UPLOAD_CONNECTIONS = 128 # Safety cap on the upload connection pool
138
139
 
139
140
 
140
141
  async def upload_part(
@@ -167,39 +168,53 @@ async def upload_part(
167
168
  offset = (part_number - 1) * part_size
168
169
  chunk_size = min(part_size, file_size - offset)
169
170
 
170
- def _read_data() -> bytes:
171
- with open(file_path, "rb") as f:
172
- f.seek(offset)
173
- return f.read(chunk_size)
174
-
175
171
  last_error: Exception | None = None
176
172
  for attempt in range(UPLOAD_MAX_RETRIES):
177
- data = _read_data()
173
+ # Re-check on every attempt: the file could be truncated/replaced between attempts,
174
+ # invalidating chunk_size and Content-Length.
175
+ current_size = file_path.stat().st_size
176
+ if current_size < offset + chunk_size:
177
+ raise UploadError(
178
+ f"file {file_path} truncated mid-upload "
179
+ f"(expected at least {offset + chunk_size} bytes, found {current_size})"
180
+ )
181
+
178
182
  bytes_sent_this_attempt = 0
179
183
 
180
184
  async def _streaming_body():
181
185
  """
182
- Async generator that yields chunks and tracks bytes sent.
186
+ Stream the part body from disk in small chunks to bound memory.
183
187
  """
184
188
  nonlocal bytes_sent_this_attempt
185
- sent = 0
186
- while sent < len(data):
187
- end = min(sent + STREAM_CHUNK_SIZE, len(data))
188
- yield data[sent:end]
189
- chunk_len = end - sent
190
- if progress:
191
- progress.advance_file(filename, chunk_len)
192
- bytes_sent_this_attempt += chunk_len
193
- sent = end
189
+ with open(file_path, "rb") as f:
190
+ f.seek(offset)
191
+ remaining = chunk_size
192
+ while remaining > 0:
193
+ buf = f.read(min(STREAM_CHUNK_SIZE, remaining))
194
+ if not buf:
195
+ # Truncated mid-stream after the pre-flight check passed.
196
+ raise UploadError(
197
+ f"file {file_path} truncated during upload ({remaining} bytes short)"
198
+ )
199
+ yield buf
200
+ if progress:
201
+ progress.advance_file(filename, len(buf))
202
+ bytes_sent_this_attempt += len(buf)
203
+ remaining -= len(buf)
194
204
 
195
205
  try:
196
206
  response = await upload_client.put(
197
207
  upload_url,
198
208
  content=_streaming_body(),
199
- headers={"Content-Length": str(len(data))},
209
+ headers={"Content-Length": str(chunk_size)},
200
210
  )
201
211
  response.raise_for_status()
202
212
  return response.headers.get("ETag", "")
213
+ except UploadError:
214
+ # Truncation isn't recoverable; rewind progress and bail without retrying.
215
+ if progress and bytes_sent_this_attempt > 0:
216
+ progress.advance_file(filename, -bytes_sent_this_attempt)
217
+ raise
203
218
  except (httpx.ConnectError, httpx.ConnectTimeout, httpx.HTTPStatusError) as e:
204
219
  if isinstance(e, httpx.HTTPStatusError) and e.response.status_code < 500:
205
220
  raise
@@ -217,6 +232,7 @@ async def _upload_file_parts(
217
232
  upload_client: httpx.AsyncClient,
218
233
  file_path: str,
219
234
  local_path: Path,
235
+ file_size: int,
220
236
  part_urls: dict[int, str],
221
237
  part_size: int,
222
238
  progress: TransferProgress | None = None,
@@ -228,6 +244,9 @@ async def _upload_file_parts(
228
244
  :param upload_client: Shared async HTTP client for storage requests.
229
245
  :param file_path: The relative path of the file within the resource.
230
246
  :param local_path: The local filesystem path to the file.
247
+ :param file_size: The expected size in bytes (from the FileInfo captured at collection
248
+ time, used to generate the presigned URLs). The pre-flight truncation check inside
249
+ upload_part compares against this declared size, not a fresh stat.
231
250
  :param part_urls: Mapping of part_number -> presigned URL.
232
251
  :param part_size: Size of each part in bytes.
233
252
  :param progress: Optional progress tracker for UI updates.
@@ -235,7 +254,6 @@ async def _upload_file_parts(
235
254
 
236
255
  """
237
256
  config = get_config()
238
- file_size = local_path.stat().st_size
239
257
  semaphore = asyncio.Semaphore(config.upload.parallel_parts)
240
258
 
241
259
  # Filter out parts that have already been uploaded (resume support)
@@ -395,10 +413,15 @@ async def upload_resource(
395
413
  file_sem = asyncio.Semaphore(config.upload.parallel_files)
396
414
  failed_files: list[str] = []
397
415
 
416
+ # Cap concurrent connections to avoid overwhelming the system
417
+ max_conns = min(
418
+ config.upload.parallel_files * config.upload.parallel_parts, MAX_UPLOAD_CONNECTIONS
419
+ )
398
420
  async with httpx.AsyncClient(
399
421
  timeout=httpx.Timeout(300.0),
400
422
  limits=httpx.Limits(
401
- max_connections=config.upload.parallel_files * config.upload.parallel_parts
423
+ max_connections=max_conns,
424
+ max_keepalive_connections=max_conns,
402
425
  ),
403
426
  ) as upload_client:
404
427
 
@@ -437,6 +460,7 @@ async def upload_resource(
437
460
  upload_client,
438
461
  fp,
439
462
  actual_path,
463
+ file_info.size_bytes,
440
464
  part_urls_map,
441
465
  part_size,
442
466
  progress,
@@ -0,0 +1,26 @@
1
+ """Dataset validators for pre-upload structural checks."""
2
+
3
+ from pathlib import Path
4
+
5
+ from ..types import DatasetType
6
+ from .lerobot import validate_lerobot
7
+ from .mcap import validate_mcap
8
+
9
+
10
+ def validate_dataset(path: Path, dataset_type: DatasetType) -> list[str]:
11
+ """
12
+ Validate a dataset directory against its type-specific spec.
13
+
14
+ Returns a list of warning messages. An empty list means no issues found.
15
+ Only runs for dataset types that have a validator (mcap, lerobot).
16
+ """
17
+ validators = {
18
+ DatasetType.MCAP: validate_mcap,
19
+ DatasetType.LEROBOT: validate_lerobot,
20
+ }
21
+
22
+ validator = validators.get(dataset_type)
23
+ if validator is None:
24
+ return []
25
+
26
+ return validator(path)