visionpack 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. visionpack/__init__.py +5 -0
  2. visionpack/__main__.py +4 -0
  3. visionpack/cli/__init__.py +1 -0
  4. visionpack/cli/commands/__init__.py +1 -0
  5. visionpack/cli/commands/annotate.py +25 -0
  6. visionpack/cli/commands/diff.py +36 -0
  7. visionpack/cli/commands/export.py +56 -0
  8. visionpack/cli/commands/fsck.py +38 -0
  9. visionpack/cli/commands/import_.py +144 -0
  10. visionpack/cli/commands/init.py +25 -0
  11. visionpack/cli/commands/pack.py +56 -0
  12. visionpack/cli/commands/snapshot.py +50 -0
  13. visionpack/cli/commands/split.py +90 -0
  14. visionpack/cli/commands/stats.py +46 -0
  15. visionpack/cli/commands/sync.py +71 -0
  16. visionpack/cli/commands/validate.py +33 -0
  17. visionpack/cli/main.py +44 -0
  18. visionpack/core/__init__.py +3 -0
  19. visionpack/core/errors.py +14 -0
  20. visionpack/core/lock.py +66 -0
  21. visionpack/core/manifest.py +360 -0
  22. visionpack/core/models.py +275 -0
  23. visionpack/core/project.py +100 -0
  24. visionpack/diff.py +40 -0
  25. visionpack/duplicates.py +145 -0
  26. visionpack/formats/__init__.py +12 -0
  27. visionpack/formats/base.py +25 -0
  28. visionpack/formats/classification.py +177 -0
  29. visionpack/formats/coco.py +303 -0
  30. visionpack/formats/yolo.py +327 -0
  31. visionpack/fsck.py +107 -0
  32. visionpack/index/__init__.py +7 -0
  33. visionpack/index/json_index.py +97 -0
  34. visionpack/index/sqlite_index.py +298 -0
  35. visionpack/media.py +64 -0
  36. visionpack/packing/__init__.py +4 -0
  37. visionpack/packing/archive.py +163 -0
  38. visionpack/packing/webdataset.py +192 -0
  39. visionpack/perceptual.py +67 -0
  40. visionpack/progress.py +50 -0
  41. visionpack/py.typed +0 -0
  42. visionpack/snapshot.py +181 -0
  43. visionpack/sources/__init__.py +18 -0
  44. visionpack/sources/importer.py +501 -0
  45. visionpack/sources/join.py +50 -0
  46. visionpack/sources/resolver.py +125 -0
  47. visionpack/sources/schema.py +76 -0
  48. visionpack/split.py +183 -0
  49. visionpack/stats.py +80 -0
  50. visionpack/storage/__init__.py +4 -0
  51. visionpack/storage/hash.py +23 -0
  52. visionpack/storage/object_store.py +49 -0
  53. visionpack/validation/__init__.py +3 -0
  54. visionpack/validation/engine.py +222 -0
  55. visionpack-0.0.1.dist-info/METADATA +248 -0
  56. visionpack-0.0.1.dist-info/RECORD +60 -0
  57. visionpack-0.0.1.dist-info/WHEEL +5 -0
  58. visionpack-0.0.1.dist-info/entry_points.txt +3 -0
  59. visionpack-0.0.1.dist-info/licenses/LICENSE +202 -0
  60. visionpack-0.0.1.dist-info/top_level.txt +1 -0
visionpack/__init__.py ADDED
@@ -0,0 +1,5 @@
1
+ """VisionPack public Python API."""
2
+
3
+ from visionpack.core.project import Dataset, Project
4
+
5
+ __all__ = ["Dataset", "Project"]
visionpack/__main__.py ADDED
@@ -0,0 +1,4 @@
1
+ from visionpack.cli.main import main
2
+
3
+ if __name__ == "__main__":
4
+ main()
@@ -0,0 +1 @@
1
+ """Command line interface package."""
@@ -0,0 +1 @@
1
+ """Individual CLI command handlers."""
@@ -0,0 +1,25 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+
5
+
6
+ def register(subparsers: argparse._SubParsersAction[argparse.ArgumentParser]) -> None:
7
+ parser = subparsers.add_parser("annotate", help="Prepare or ingest annotation batches")
8
+ nested = parser.add_subparsers(dest="annotate_command", required=True)
9
+
10
+ prepare = nested.add_parser("prepare", help="Prepare images for annotation")
11
+ prepare.add_argument("--target", required=True)
12
+ prepare.set_defaults(func=run_scaffold)
13
+
14
+ ingest = nested.add_parser("ingest", help="Ingest reviewed annotations")
15
+ ingest.add_argument("source")
16
+ ingest.add_argument("--format", required=True)
17
+ ingest.set_defaults(func=run_scaffold)
18
+
19
+ review = nested.add_parser("review", help="Review annotations")
20
+ review.set_defaults(func=run_scaffold)
21
+
22
+
23
+ def run_scaffold(args: argparse.Namespace) -> int:
24
+ print("vp annotate is scaffolded but not implemented in this MVP slice yet.")
25
+ return 1
@@ -0,0 +1,36 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import json
5
+
6
+ from visionpack.core.project import Project
7
+ from visionpack.diff import diff_snapshots
8
+
9
+
10
+ def register(subparsers: argparse._SubParsersAction[argparse.ArgumentParser]) -> None:
11
+ parser = subparsers.add_parser("diff", help="Diff two snapshots")
12
+ parser.add_argument("left")
13
+ parser.add_argument("right")
14
+ parser.add_argument("--json", action="store_true", help="Print JSON")
15
+ parser.set_defaults(func=run)
16
+
17
+
18
+ def run(args: argparse.Namespace) -> int:
19
+ project = Project.open(".")
20
+ result = diff_snapshots(project, args.left, args.right)
21
+ if args.json:
22
+ print(json.dumps(result, indent=2, sort_keys=True))
23
+ return 0
24
+ print(f"Diff {args.left} -> {args.right}")
25
+ for key in (
26
+ "assets_added",
27
+ "assets_removed",
28
+ "annotations_added",
29
+ "annotations_removed",
30
+ "annotations_modified",
31
+ "classes_added",
32
+ "classes_removed",
33
+ ):
34
+ print(f"{key}: {len(result[key])}")
35
+ print(f"splits_changed: {result['splits_changed']}")
36
+ return 0
@@ -0,0 +1,56 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ from pathlib import Path
5
+
6
+ from visionpack.core.project import Project
7
+ from visionpack.formats.classification import export_imagefolder
8
+ from visionpack.formats.coco import export_coco
9
+ from visionpack.formats.yolo import export_yolo
10
+ from visionpack.progress import cli_progress
11
+ from visionpack.snapshot import open_snapshot
12
+
13
+
14
+ def register(subparsers: argparse._SubParsersAction[argparse.ArgumentParser]) -> None:
15
+ parser = subparsers.add_parser("export", help="Export a dataset")
16
+ parser.add_argument("--format", required=True, choices=["yolo", "coco", "imagefolder"], help="Output format")
17
+ parser.add_argument("--output", required=True, help="Output directory")
18
+ parser.add_argument(
19
+ "--split",
20
+ nargs="?",
21
+ const="default",
22
+ default=None,
23
+ help="Export into train/val/test using a split (defaults to 'default' when given without a value)",
24
+ )
25
+ parser.add_argument(
26
+ "--snapshot",
27
+ help="Export the dataset as it was at this snapshot version (e.g. v2) instead of the current state",
28
+ )
29
+ parser.set_defaults(func=run)
30
+
31
+
32
+ def run(args: argparse.Namespace) -> int:
33
+ project = Project.open(".")
34
+ if args.snapshot:
35
+ project = open_snapshot(project, args.snapshot)
36
+ output = Path(args.output)
37
+ with cli_progress(f"Exporting {args.format}") as callback:
38
+ if args.format == "coco":
39
+ summary = export_coco(project, output, split_id=args.split, progress=callback)
40
+ detail = f"{summary['images']} images, {summary['annotations']} annotations, {summary['objects']} objects"
41
+ message = f"Exported COCO dataset to {output.resolve()}: {detail}"
42
+ elif args.format == "imagefolder":
43
+ summary = export_imagefolder(project, output, split_id=args.split, progress=callback)
44
+ message = f"Exported ImageFolder dataset to {output.resolve()}: {summary['images']} images"
45
+ else:
46
+ summary = export_yolo(project, output, split_id=args.split, progress=callback)
47
+ detail = f"{summary['images']} images, {summary['labels']} label files, {summary['objects']} objects"
48
+ message = f"Exported YOLO dataset to {output.resolve()}: {detail}"
49
+ print(message)
50
+
51
+ if args.split:
52
+ sets = ", ".join(f"{name}={count}" for name, count in summary.get("sets", {}).items())
53
+ print(f"Split {args.split!r}: {sets}")
54
+ if summary.get("skipped"):
55
+ print(f"Skipped {summary['skipped']} assets not assigned to any set in split {args.split!r}")
56
+ return 0
@@ -0,0 +1,38 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+
5
+ from visionpack.core.project import Project
6
+ from visionpack.fsck import run_fsck
7
+
8
+
9
+ def register(subparsers: argparse._SubParsersAction[argparse.ArgumentParser]) -> None:
10
+ parser = subparsers.add_parser("fsck", help="Check index <-> object-store integrity")
11
+ parser.add_argument(
12
+ "--deep",
13
+ action="store_true",
14
+ help="Re-hash every stored object to detect silent corruption (reads all bytes)",
15
+ )
16
+ parser.add_argument(
17
+ "--no-orphans",
18
+ action="store_true",
19
+ help="Skip the scan for unreferenced objects in the store",
20
+ )
21
+ parser.set_defaults(func=run)
22
+
23
+
24
+ def run(args: argparse.Namespace) -> int:
25
+ project = Project.open(".")
26
+ report = run_fsck(project, deep=args.deep, check_orphans=not args.no_orphans)
27
+ mode = "deep" if args.deep else "quick"
28
+ print(
29
+ f"fsck ({mode}): checked {report.checked_assets} assets, {report.checked_objects} objects "
30
+ f"-> {len(report.errors)} errors, {len(report.warnings)} warnings"
31
+ )
32
+ for issue in report.issues[:50]:
33
+ print(f"[{issue.severity}] {issue.code}: {issue.message}")
34
+ if len(report.issues) > 50:
35
+ print(f"... {len(report.issues) - 50} more")
36
+ if report.ok:
37
+ print("OK: dataset is consistent." if not report.warnings else "OK (with warnings).")
38
+ return 0 if report.ok else 1
@@ -0,0 +1,144 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import os
5
+ from pathlib import Path
6
+
7
+ from visionpack.core.errors import VisionPackError
8
+ from visionpack.core.lock import project_lock
9
+ from visionpack.core.project import Project
10
+ from visionpack.formats.classification import ImageFolderImporter
11
+ from visionpack.formats.coco import CocoImporter
12
+ from visionpack.formats.yolo import YoloImporter
13
+ from visionpack.progress import cli_progress
14
+
15
+
16
+ def register(subparsers: argparse._SubParsersAction[argparse.ArgumentParser]) -> None:
17
+ parser = subparsers.add_parser("import", help="Import a dataset")
18
+ parser.add_argument("source", help="Input dataset path (YOLO/ImageFolder root, or COCO annotation JSON)")
19
+ parser.add_argument("--format", required=True, choices=["yolo", "coco", "imagefolder"], help="Input format")
20
+ parser.add_argument("--images", help="Image directory (required for --format coco)")
21
+ parser.add_argument(
22
+ "--task",
23
+ default=None,
24
+ choices=["detection", "classification", "segmentation", "keypoints"],
25
+ help="Override project task",
26
+ )
27
+ parser.add_argument("--copy", default="ingest", choices=["copy", "move", "hardlink", "reference", "ingest"], help="Asset copy mode")
28
+ parser.add_argument("--name", help="Name to record this source under in visionpack.yaml")
29
+ parser.add_argument(
30
+ "--no-record",
31
+ action="store_true",
32
+ help="Do not add this import as a source in visionpack.yaml (use for one-off/throwaway imports)",
33
+ )
34
+ parser.add_argument("--class-map", help="Reserved for explicit class mapping files")
35
+ parser.set_defaults(func=run)
36
+
37
+
38
+ def run(args: argparse.Namespace) -> int:
39
+ project = Project.open(".")
40
+ with project_lock(project.root):
41
+ return _run_locked(project, args)
42
+
43
+
44
+ def _run_locked(project: Project, args: argparse.Namespace) -> int:
45
+ if args.task and project.manifest.task != args.task:
46
+ project.manifest.task = args.task
47
+ project.save_manifest()
48
+
49
+ if args.format == "coco":
50
+ if not args.images:
51
+ raise VisionPackError("--images is required when importing COCO (the directory holding the image files)")
52
+ importer = CocoImporter(project, Path(args.source), Path(args.images), copy_mode=args.copy)
53
+ label = "COCO"
54
+ elif args.format == "imagefolder":
55
+ importer = ImageFolderImporter(project, Path(args.source), copy_mode=args.copy)
56
+ label = "ImageFolder"
57
+ else:
58
+ importer = YoloImporter(project, Path(args.source), copy_mode=args.copy)
59
+ label = "YOLO"
60
+
61
+ with cli_progress(f"Importing {label}") as callback:
62
+ summary = importer.run(progress=callback)
63
+
64
+ print(
65
+ f"Imported {label} dataset: "
66
+ f"{summary.assets} assets, {summary.annotations} annotations, {summary.objects} objects"
67
+ )
68
+ if summary.orphan_labels:
69
+ print(f"Warnings: {summary.orphan_labels} label files had no matching image")
70
+ if summary.classes_added:
71
+ print("Classes were discovered and written to visionpack.yaml")
72
+
73
+ if not args.no_record:
74
+ recorded = _record_source(project, args)
75
+ if recorded:
76
+ print(f"Recorded source {recorded!r} in visionpack.yaml (re-pull later with `vp sync --source {recorded}`)")
77
+
78
+ if summary.failures:
79
+ _report_failures(summary.failures)
80
+ return 1
81
+ return 0
82
+
83
+
84
+ def _report_failures(failures: list) -> None:
85
+ print(f"Skipped {len(failures)} unreadable/corrupt image(s):")
86
+ for failure in failures[:20]:
87
+ print(f" - {failure.path}: {failure.error}")
88
+ if len(failures) > 20:
89
+ print(f" ... {len(failures) - 20} more")
90
+
91
+
92
+ def _record_source(project: Project, args: argparse.Namespace) -> str | None:
93
+ """Append this import as a declared source so the manifest stays the source of
94
+ truth and the data can be re-pulled with `vp sync`. Skips silently when an
95
+ equivalent source is already declared (re-importing the same path)."""
96
+ entry = _source_entry(project, args)
97
+ # Dedupe on location identity only: a reloaded source carries pydantic default
98
+ # keys (class_map/credentials={}) the fresh entry lacks, so compare the parts
99
+ # that actually identify where the data comes from.
100
+ identity = _identity(entry)
101
+ if any(_identity(declared) == identity for declared in project.manifest.sources):
102
+ return None
103
+ # name first for readability in the manifest
104
+ entry = {"name": _unique_name(project, args), **entry}
105
+ project.manifest.sources.append(entry)
106
+ project.save_manifest()
107
+ return entry["name"]
108
+
109
+
110
+ def _identity(source: dict) -> tuple:
111
+ return (source.get("format"), source.get("root"), source.get("images"), source.get("labels"))
112
+
113
+
114
+ def _source_entry(project: Project, args: argparse.Namespace) -> dict[str, str]:
115
+ if args.format == "coco":
116
+ return {
117
+ "format": "coco",
118
+ "images": _rel(project, args.images),
119
+ "labels": _rel(project, args.source),
120
+ "copy": args.copy,
121
+ }
122
+ fmt = "imagefolder" if args.format == "imagefolder" else "yolo"
123
+ return {"format": fmt, "root": _rel(project, args.source), "copy": args.copy}
124
+
125
+
126
+ def _rel(project: Project, path: str) -> str:
127
+ """A project-relative, posix-style location, so the manifest stays portable."""
128
+ resolved = Path(path).resolve()
129
+ try:
130
+ rel = Path(os.path.relpath(resolved, project.root)).as_posix()
131
+ except ValueError: # different drive on Windows
132
+ return resolved.as_posix()
133
+ return rel if rel.startswith((".", "/")) else f"./{rel}"
134
+
135
+
136
+ def _unique_name(project: Project, args: argparse.Namespace) -> str:
137
+ base = Path(args.name or Path(args.source).stem or args.format).name or args.format
138
+ taken = {source.get("name") for source in project.manifest.sources}
139
+ if base not in taken:
140
+ return base
141
+ suffix = 2
142
+ while f"{base}-{suffix}" in taken:
143
+ suffix += 1
144
+ return f"{base}-{suffix}"
@@ -0,0 +1,25 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+
5
+ from visionpack.core.project import Project
6
+
7
+
8
+ def register(subparsers: argparse._SubParsersAction[argparse.ArgumentParser]) -> None:
9
+ parser = subparsers.add_parser("init", help="Initialize a VisionPack project")
10
+ parser.add_argument("path", nargs="?", default=".", help="Project directory")
11
+ parser.add_argument("--name", help="Dataset name")
12
+ parser.add_argument(
13
+ "--task",
14
+ default="detection",
15
+ choices=["detection", "classification", "segmentation", "keypoints"],
16
+ help="Computer vision task",
17
+ )
18
+ parser.set_defaults(func=run)
19
+
20
+
21
+ def run(args: argparse.Namespace) -> int:
22
+ project = Project.init(args.path, name=args.name, task=args.task)
23
+ print(f"Initialized VisionPack dataset at {project.root}")
24
+ print(f"Manifest: {project.manifest_path}")
25
+ return 0
@@ -0,0 +1,56 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ from pathlib import Path
5
+
6
+ from visionpack.core.errors import VisionPackError
7
+ from visionpack.core.project import Project
8
+ from visionpack.packing import pack_archive, pack_training
9
+
10
+
11
+ def register(subparsers: argparse._SubParsersAction[argparse.ArgumentParser]) -> None:
12
+ parser = subparsers.add_parser("pack", help="Pack a dataset for archive, training, or review")
13
+ parser.add_argument("--profile", required=True, help="Pack profile name (from visionpack.yaml)")
14
+ parser.add_argument("--output", help="Output path (archive file, or directory for WebDataset shards)")
15
+ parser.add_argument(
16
+ "--split",
17
+ nargs="?",
18
+ const="default",
19
+ default=None,
20
+ help="For training packs: emit per-set shards from this split (default 'default' when given without a value)",
21
+ )
22
+ parser.set_defaults(func=run)
23
+
24
+
25
+ def run(args: argparse.Namespace) -> int:
26
+ project = Project.open(".")
27
+ profile = project.manifest.pack_profiles.get(args.profile)
28
+ if profile is None:
29
+ raise VisionPackError(f"Pack profile not found in visionpack.yaml: {args.profile}")
30
+
31
+ fmt = str(profile.get("format", "tar.zst"))
32
+ output = Path(args.output) if args.output else None
33
+
34
+ if fmt == "webdataset":
35
+ summary = pack_training(project, output=output, profile_name=args.profile, split_id=args.split)
36
+ sets = ", ".join(f"{name}={count}" for name, count in summary.sets.items())
37
+ print(
38
+ f"Packed WebDataset to {summary.path} "
39
+ f"({summary.shards} shards, {summary.samples} samples; {sets})"
40
+ )
41
+ if summary.skipped:
42
+ print(f"Skipped {summary.skipped} assets not assigned to any set in split {args.split!r}")
43
+ return 0
44
+
45
+ if fmt in {"tar", "tar.zst"}:
46
+ summary = pack_archive(project, output=output, profile_name=args.profile)
47
+ print(
48
+ f"Packed archive: {summary.path} "
49
+ f"({summary.format}, {summary.files} files, {summary.assets} assets, {summary.size_bytes} bytes)"
50
+ )
51
+ return 0
52
+
53
+ raise VisionPackError(
54
+ f"Pack profile {args.profile!r} uses unsupported format {fmt!r}. "
55
+ "Supported: 'webdataset' (training), 'tar'/'tar.zst' (archive)."
56
+ )
@@ -0,0 +1,50 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import json
5
+
6
+ from visionpack.core.lock import project_lock
7
+ from visionpack.core.project import Project
8
+ from visionpack.snapshot import create_snapshot, list_snapshots, load_snapshot
9
+
10
+
11
+ def register(subparsers: argparse._SubParsersAction[argparse.ArgumentParser]) -> None:
12
+ parser = subparsers.add_parser("snapshot", help="Manage dataset snapshots")
13
+ nested = parser.add_subparsers(dest="snapshot_command", required=True)
14
+
15
+ create = nested.add_parser("create", help="Create a snapshot")
16
+ create.add_argument("-m", "--message", required=True, help="Snapshot message")
17
+ create.set_defaults(func=run_create)
18
+
19
+ list_parser = nested.add_parser("list", help="List snapshots")
20
+ list_parser.set_defaults(func=run_list)
21
+
22
+ show = nested.add_parser("show", help="Show snapshot details")
23
+ show.add_argument("version", help="Snapshot version, e.g. v1")
24
+ show.set_defaults(func=run_show)
25
+
26
+
27
+ def run_create(args: argparse.Namespace) -> int:
28
+ project = Project.open(".")
29
+ with project_lock(project.root):
30
+ snapshot = create_snapshot(project, args.message)
31
+ print(f"Created snapshot {snapshot['version']}: {snapshot['message']}")
32
+ return 0
33
+
34
+
35
+ def run_list(args: argparse.Namespace) -> int:
36
+ project = Project.open(".")
37
+ snapshots = list_snapshots(project)
38
+ for item in snapshots:
39
+ stats = item.get("stats", {})
40
+ counts = f"{stats.get('assets', '?')} imgs, {stats.get('objects', '?')} objs"
41
+ print(f"{item['version']:<5} {item['created_at']} {counts:<20} {item['message']}")
42
+ if not snapshots:
43
+ print("No snapshots. Create one with: vp snapshot create -m <message>")
44
+ return 0
45
+
46
+
47
+ def run_show(args: argparse.Namespace) -> int:
48
+ project = Project.open(".")
49
+ print(json.dumps(load_snapshot(project, args.version), indent=2, sort_keys=True))
50
+ return 0
@@ -0,0 +1,90 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+
5
+ from visionpack.core.lock import project_lock
6
+ from visionpack.core.project import Project
7
+ from visionpack.split import create_split, lock_split
8
+
9
+
10
+ def register(subparsers: argparse._SubParsersAction[argparse.ArgumentParser]) -> None:
11
+ parser = subparsers.add_parser("split", help="Create and manage deterministic, versioned splits")
12
+ nested = parser.add_subparsers(dest="split_command", required=True)
13
+
14
+ create = nested.add_parser("create", help="Create a deterministic train/val/test split")
15
+ create.add_argument("--train", type=float, default=0.8, help="Train fraction (default 0.8)")
16
+ create.add_argument("--val", type=float, default=0.1, help="Validation fraction (default 0.1)")
17
+ create.add_argument("--test", type=float, default=0.1, help="Test fraction (default 0.1)")
18
+ create.add_argument(
19
+ "--strategy",
20
+ choices=["stratified", "random", "hash"],
21
+ default="stratified",
22
+ help="stratified=balanced per class (default), random=exact global ratios, hash=stable as data grows",
23
+ )
24
+ create.add_argument("--by", choices=["class"], default="class", help="Stratification key (stratified strategy)")
25
+ create.add_argument("--seed", type=int, default=0, help="Seed mixed into the content hash for reproducibility")
26
+ create.add_argument("--id", dest="split_id", default="default", help="Split id (default: 'default')")
27
+ create.add_argument("--force", action="store_true", help="Overwrite even if the split is locked")
28
+ create.set_defaults(func=run_create)
29
+
30
+ lock = nested.add_parser("lock", help="Lock a split so it cannot be changed")
31
+ lock.add_argument("--id", dest="split_id", default="default", help="Split id (default: 'default')")
32
+ lock.set_defaults(func=run_lock)
33
+
34
+ list_parser = nested.add_parser("list", help="List splits")
35
+ list_parser.set_defaults(func=run_list)
36
+
37
+ show = nested.add_parser("show", help="Show set sizes for a split")
38
+ show.add_argument("--id", dest="split_id", default="default", help="Split id (default: 'default')")
39
+ show.set_defaults(func=run_show)
40
+
41
+
42
+ def run_create(args: argparse.Namespace) -> int:
43
+ project = Project.open(".")
44
+ with project_lock(project.root):
45
+ split = create_split(
46
+ project,
47
+ train=args.train,
48
+ val=args.val,
49
+ test=args.test,
50
+ strategy=args.strategy,
51
+ seed=args.seed,
52
+ split_id=args.split_id,
53
+ by=args.by,
54
+ force=args.force,
55
+ )
56
+ sizes = ", ".join(f"{name}={len(ids)}" for name, ids in split.sets.items())
57
+ print(f"Created split {split.id!r} (strategy={split.strategy}, seed={args.seed}): {sizes}")
58
+ return 0
59
+
60
+
61
+ def run_lock(args: argparse.Namespace) -> int:
62
+ project = Project.open(".")
63
+ with project_lock(project.root):
64
+ split = lock_split(project, args.split_id)
65
+ print(f"Locked split {split.id!r}. It will be captured as-is in snapshots.")
66
+ return 0
67
+
68
+
69
+ def run_list(args: argparse.Namespace) -> int:
70
+ project = Project.open(".")
71
+ splits = project.index.splits()
72
+ for split in splits:
73
+ sizes = ", ".join(f"{name}={len(ids)}" for name, ids in split.sets.items())
74
+ lock = " [locked]" if split.locked else ""
75
+ print(f"{split.id} strategy={split.strategy}{lock} {sizes}")
76
+ if not splits:
77
+ print("No splits. Create one with: vp split create")
78
+ return 0
79
+
80
+
81
+ def run_show(args: argparse.Namespace) -> int:
82
+ project = Project.open(".")
83
+ split = next((item for item in project.index.splits() if item.id == args.split_id), None)
84
+ if split is None:
85
+ print(f"No split named {args.split_id!r}. Create one with: vp split create")
86
+ return 1
87
+ print(f"Split {split.id!r} strategy={split.strategy} locked={split.locked}")
88
+ for name, ids in split.sets.items():
89
+ print(f" {name}: {len(ids)} images")
90
+ return 0
@@ -0,0 +1,46 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import json
5
+
6
+ from visionpack.core.project import Project
7
+ from visionpack.stats import collect_stats, split_breakdown
8
+
9
+
10
+ def register(subparsers: argparse._SubParsersAction[argparse.ArgumentParser]) -> None:
11
+ parser = subparsers.add_parser("stats", help="Show dataset statistics")
12
+ parser.add_argument("--by", choices=["class", "split"], help="Focus output on one dimension")
13
+ parser.add_argument("--json", action="store_true", help="Print JSON")
14
+ parser.add_argument("--html", help="Reserved for HTML reports")
15
+ parser.set_defaults(func=run)
16
+
17
+
18
+ def run(args: argparse.Namespace) -> int:
19
+ project = Project.open(".")
20
+ stats = collect_stats(project)
21
+ if args.json:
22
+ print(json.dumps(stats, indent=2, sort_keys=True))
23
+ return 0
24
+ if args.by == "class":
25
+ for class_id, count in stats["class_distribution"].items():
26
+ print(f"{class_id}: {count}")
27
+ return 0
28
+ if args.by == "split":
29
+ breakdown = split_breakdown(project)
30
+ if breakdown is None:
31
+ print("No split named 'default'. Create one with: vp split create")
32
+ return 0
33
+ lock = " [locked]" if breakdown["locked"] else ""
34
+ print(f"Split 'default' (strategy={breakdown['strategy']}){lock}")
35
+ for set_name, info in breakdown["sets"].items():
36
+ print(f"{set_name}: {info['images']} images, {info['objects']} objects")
37
+ for class_id, count in info["class_distribution"].items():
38
+ print(f" {class_id}: {count}")
39
+ return 0
40
+ print(f"Images: {stats['assets']}")
41
+ print(f"Annotations: {stats['annotations']}")
42
+ print(f"Objects: {stats['objects']}")
43
+ print(f"Classes: {stats['classes']}")
44
+ print(f"Images without annotations: {stats['images_without_annotations']}")
45
+ print(f"Average labels per image: {stats['avg_labels_per_image']}")
46
+ return 0
@@ -0,0 +1,71 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+
5
+ from visionpack.core.lock import project_lock
6
+ from visionpack.core.project import Project
7
+ from visionpack.progress import cli_progress
8
+ from visionpack.sources import plan_sources, sync_sources
9
+
10
+
11
+ def register(subparsers: argparse._SubParsersAction[argparse.ArgumentParser]) -> None:
12
+ parser = subparsers.add_parser(
13
+ "sync",
14
+ help="Reconcile the dataset with the sources declared in visionpack.yaml",
15
+ )
16
+ parser.add_argument("--source", help="Sync only this source (by name); default syncs all")
17
+ parser.add_argument(
18
+ "--dry-run",
19
+ action="store_true",
20
+ help="Report what each source would ingest (found/matched/unmatched) without writing",
21
+ )
22
+ parser.set_defaults(func=run)
23
+
24
+
25
+ def run(args: argparse.Namespace) -> int:
26
+ project = Project.open(".")
27
+
28
+ if args.dry_run:
29
+ plans = plan_sources(project, args.source)
30
+ for plan in plans:
31
+ print(f"[{plan.name}] {plan.format}")
32
+ print(f" images: {plan.images_uri}")
33
+ if plan.labels_uri:
34
+ print(f" labels: {plan.labels_uri}")
35
+ print(
36
+ f" found {plan.images_found} images, {plan.labels_found} labels"
37
+ f" -> {plan.matched} matched"
38
+ )
39
+ if plan.images_without_label:
40
+ print(f" {plan.images_without_label} images without a label")
41
+ if plan.labels_without_image:
42
+ print(f" {plan.labels_without_image} labels without an image")
43
+ print(f" classes: {', '.join(plan.class_names) if plan.class_names else '(none discovered)'}")
44
+ return 0
45
+
46
+ with project_lock(project.root):
47
+ summaries = sync_sources(project, args.source, progress_factory=cli_progress)
48
+ total_added = 0
49
+ total_failures = 0
50
+ for summary in summaries:
51
+ total_added += summary.assets_added
52
+ total_failures += len(summary.failures)
53
+ print(
54
+ f"[{summary.name}] +{summary.assets_added} new assets "
55
+ f"({summary.assets_existing} already present), "
56
+ f"{summary.annotations} annotations, {summary.objects} objects"
57
+ )
58
+ if summary.classes_added:
59
+ print(f" {summary.classes_added} new classes merged into visionpack.yaml")
60
+ if summary.images_without_label:
61
+ print(f" warning: {summary.images_without_label} images had no matching label")
62
+ if summary.labels_without_image:
63
+ print(f" warning: {summary.labels_without_image} labels had no matching image")
64
+ if summary.failures:
65
+ print(f" skipped {len(summary.failures)} unreadable/corrupt image(s):")
66
+ for failure in summary.failures[:10]:
67
+ print(f" - {failure.path}: {failure.error}")
68
+ if len(summary.failures) > 10:
69
+ print(f" ... {len(summary.failures) - 10} more")
70
+ print(f"Synced {len(summaries)} source(s): {total_added} new assets total.")
71
+ return 1 if total_failures else 0