sunstone-py 0.5.2__tar.gz → 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {sunstone_py-0.5.2 → sunstone_py-0.6.0}/PKG-INFO +4 -2
  2. {sunstone_py-0.5.2 → sunstone_py-0.6.0}/README.md +1 -1
  3. {sunstone_py-0.5.2 → sunstone_py-0.6.0}/pyproject.toml +4 -1
  4. sunstone_py-0.6.0/src/sunstone/cli.py +542 -0
  5. {sunstone_py-0.5.2 → sunstone_py-0.6.0}/src/sunstone/dataframe.py +16 -89
  6. {sunstone_py-0.5.2 → sunstone_py-0.6.0}/src/sunstone/datasets.py +78 -17
  7. {sunstone_py-0.5.2 → sunstone_py-0.6.0}/src/sunstone/lineage.py +58 -29
  8. {sunstone_py-0.5.2 → sunstone_py-0.6.0}/src/sunstone_py.egg-info/PKG-INFO +4 -2
  9. {sunstone_py-0.5.2 → sunstone_py-0.6.0}/src/sunstone_py.egg-info/SOURCES.txt +2 -0
  10. {sunstone_py-0.5.2 → sunstone_py-0.6.0}/src/sunstone_py.egg-info/entry_points.txt +1 -0
  11. {sunstone_py-0.5.2 → sunstone_py-0.6.0}/src/sunstone_py.egg-info/requires.txt +2 -0
  12. sunstone_py-0.6.0/tests/test_cli.py +396 -0
  13. sunstone_py-0.6.0/tests/test_dataframe.py +454 -0
  14. {sunstone_py-0.5.2 → sunstone_py-0.6.0}/tests/test_lineage_persistence.py +4 -14
  15. {sunstone_py-0.5.2 → sunstone_py-0.6.0}/tests/test_pandas_compatibility.py +3 -2
  16. sunstone_py-0.5.2/tests/test_dataframe.py +0 -225
  17. {sunstone_py-0.5.2 → sunstone_py-0.6.0}/LICENSE +0 -0
  18. {sunstone_py-0.5.2 → sunstone_py-0.6.0}/setup.cfg +0 -0
  19. {sunstone_py-0.5.2 → sunstone_py-0.6.0}/src/sunstone/__init__.py +0 -0
  20. {sunstone_py-0.5.2 → sunstone_py-0.6.0}/src/sunstone/_release.py +0 -0
  21. {sunstone_py-0.5.2 → sunstone_py-0.6.0}/src/sunstone/exceptions.py +0 -0
  22. {sunstone_py-0.5.2 → sunstone_py-0.6.0}/src/sunstone/pandas.py +0 -0
  23. {sunstone_py-0.5.2 → sunstone_py-0.6.0}/src/sunstone/py.typed +0 -0
  24. {sunstone_py-0.5.2 → sunstone_py-0.6.0}/src/sunstone/validation.py +0 -0
  25. {sunstone_py-0.5.2 → sunstone_py-0.6.0}/src/sunstone_py.egg-info/dependency_links.txt +0 -0
  26. {sunstone_py-0.5.2 → sunstone_py-0.6.0}/src/sunstone_py.egg-info/top_level.txt +0 -0
  27. {sunstone_py-0.5.2 → sunstone_py-0.6.0}/tests/test_datasets.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sunstone-py
3
- Version: 0.5.2
3
+ Version: 0.6.0
4
4
  Summary: Python library for managing datasets with lineage tracking in Sunstone projects
5
5
  Author-email: Sunstone Institute <stig@sunstone.institute>
6
6
  License: MIT
@@ -17,8 +17,10 @@ Classifier: Programming Language :: Python :: 3.14
17
17
  Requires-Python: >=3.12
18
18
  Description-Content-Type: text/markdown
19
19
  License-File: LICENSE
20
+ Requires-Dist: click>=8.0
20
21
  Requires-Dist: frictionless>=5.18.1
21
22
  Requires-Dist: google-auth>=2.43.0
23
+ Requires-Dist: google-cloud-storage>=2.0.0
22
24
  Requires-Dist: pandas>=2.0.0
23
25
  Requires-Dist: pyyaml>=6.0
24
26
  Requires-Dist: requests>=2.31.0
@@ -29,7 +31,7 @@ Dynamic: license-file
29
31
 
30
32
  A Python library for managing datasets with lineage tracking in data science projects.
31
33
 
32
- [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
34
+ [![Python 3.12+](https://img.shields.io/badge/python-3.12+-blue.svg)](https://www.python.org/downloads/)
33
35
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
34
36
 
35
37
  ## Features
@@ -2,7 +2,7 @@
2
2
 
3
3
  A Python library for managing datasets with lineage tracking in data science projects.
4
4
 
5
- [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
5
+ [![Python 3.12+](https://img.shields.io/badge/python-3.12+-blue.svg)](https://www.python.org/downloads/)
6
6
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
7
7
 
8
8
  ## Features
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
7
7
 
8
8
  [project]
9
9
  name = "sunstone-py"
10
- version = "0.5.2"
10
+ version = "0.6.0"
11
11
  description = "Python library for managing datasets with lineage tracking in Sunstone projects"
12
12
  readme = "README.md"
13
13
  requires-python = ">=3.12"
@@ -24,8 +24,10 @@ classifiers = [
24
24
  "Programming Language :: Python :: 3.14",
25
25
  ]
26
26
  dependencies = [
27
+ "click>=8.0",
27
28
  "frictionless>=5.18.1",
28
29
  "google-auth>=2.43.0",
30
+ "google-cloud-storage>=2.0.0",
29
31
  "pandas>=2.0.0",
30
32
  "pyyaml>=6.0",
31
33
  "requests>=2.31.0",
@@ -42,6 +44,7 @@ Repository = "https://github.com/sunstoneinstitute/sunstone-py"
42
44
 
43
45
  [project.scripts]
44
46
  release = "sunstone._release:main"
47
+ sunstone = "sunstone.cli:main"
45
48
 
46
49
  [tool.setuptools.packages.find]
47
50
  where = [
@@ -0,0 +1,542 @@
1
+ """
2
+ Sunstone command-line interface.
3
+ """
4
+
5
+ import json
6
+ import os
7
+ import re
8
+ import sys
9
+ import tomllib
10
+ from pathlib import Path
11
+ from typing import Optional
12
+ from urllib.parse import urlparse
13
+
14
+ import click
15
+ from click.shell_completion import CompletionItem
16
+ from ruamel.yaml import YAML
17
+
18
+ from .datasets import DatasetsManager
19
+ from .exceptions import DatasetNotFoundError
20
+
21
+ # Configure ruamel.yaml for round-trip parsing
22
+ _yaml = YAML()
23
+ _yaml.preserve_quotes = True
24
+ _yaml.default_flow_style = False
25
+ _yaml.indent(mapping=2, sequence=4, offset=2)
26
+
27
+ # Valid field types
28
+ VALID_FIELD_TYPES = {"string", "number", "integer", "boolean", "date", "datetime"}
29
+
30
+ # Pattern for ${VAR} or ${VAR:-default} substitution
31
+ ENV_VAR_PATTERN = re.compile(r"\$\{([^}:]+)(?::-([^}]*))?\}")
32
+
33
+
34
+ def get_project_slug(project_path: Path) -> str:
35
+ """
36
+ Get the project slug from pyproject.toml or directory name.
37
+
38
+ Args:
39
+ project_path: Path to the project directory.
40
+
41
+ Returns:
42
+ The project slug (kebab-case identifier).
43
+ """
44
+ pyproject_path = project_path / "pyproject.toml"
45
+ if pyproject_path.exists():
46
+ try:
47
+ with open(pyproject_path, "rb") as f:
48
+ pyproject = tomllib.load(f)
49
+ name = pyproject.get("project", {}).get("name")
50
+ if isinstance(name, str):
51
+ return name
52
+ except Exception:
53
+ pass
54
+ return project_path.name
55
+
56
+
57
+ def expand_env_vars(text: str) -> str:
58
+ """
59
+ Expand environment variables in text using ${VAR} or ${VAR:-default} syntax.
60
+
61
+ Args:
62
+ text: The text containing environment variable references.
63
+
64
+ Returns:
65
+ The text with environment variables expanded.
66
+ """
67
+
68
+ def replace_var(match: re.Match[str]) -> str:
69
+ var_name = match.group(1)
70
+ default_value = match.group(2)
71
+ value = os.environ.get(var_name)
72
+ if value is not None:
73
+ return value
74
+ if default_value is not None:
75
+ return default_value
76
+ return match.group(0) # Return original if no value and no default
77
+
78
+ return ENV_VAR_PATTERN.sub(replace_var, text)
79
+
80
+
81
+ def get_manager(datasets_file: str) -> tuple[DatasetsManager, Path]:
82
+ """Get DatasetsManager and project path from datasets file."""
83
+ datasets_path = Path(datasets_file).resolve()
84
+ project_path = datasets_path.parent
85
+ manager = DatasetsManager(project_path)
86
+ return manager, project_path
87
+
88
+
89
+ def complete_dataset_slugs(ctx: click.Context, param: click.Parameter, incomplete: str) -> list[CompletionItem]:
90
+ """Shell completion for dataset slugs."""
91
+ # Get the datasets file from context or use default
92
+ datasets_file = ctx.params.get("datasets_file", "datasets.yaml")
93
+
94
+ try:
95
+ manager, _ = get_manager(datasets_file)
96
+ all_datasets = manager.get_all_inputs() + manager.get_all_outputs()
97
+ slugs = [ds.slug for ds in all_datasets]
98
+
99
+ return [CompletionItem(slug) for slug in slugs if slug.startswith(incomplete)]
100
+ except Exception:
101
+ return []
102
+
103
+
104
+ # =============================================================================
105
+ # Main CLI group
106
+ # =============================================================================
107
+
108
+
109
+ @click.group()
110
+ @click.version_option()
111
+ def main() -> None:
112
+ """Sunstone dataset and package management CLI."""
113
+ pass
114
+
115
+
116
+ # =============================================================================
117
+ # Dataset commands
118
+ # =============================================================================
119
+
120
+
121
+ @main.group()
122
+ def dataset() -> None:
123
+ """Manage datasets in datasets.yaml."""
124
+ pass
125
+
126
+
127
+ @dataset.command("list")
128
+ @click.option(
129
+ "-f", "--file", "datasets_file", type=click.Path(exists=True), default="datasets.yaml", help="Path to datasets.yaml"
130
+ )
131
+ def dataset_list(datasets_file: str) -> None:
132
+ """List all datasets."""
133
+ try:
134
+ manager, _ = get_manager(datasets_file)
135
+ except FileNotFoundError as e:
136
+ click.echo(f"Error: {e}", err=True)
137
+ sys.exit(1)
138
+
139
+ inputs = manager.get_all_inputs()
140
+ outputs = manager.get_all_outputs()
141
+
142
+ if inputs:
143
+ click.echo("Inputs:")
144
+ for ds in inputs:
145
+ flags = []
146
+ if ds.strict:
147
+ flags.append("strict")
148
+ flag_str = f" [{', '.join(flags)}]" if flags else ""
149
+ click.echo(f" - {ds.slug} ({ds.name}){flag_str}")
150
+
151
+ if outputs:
152
+ if inputs:
153
+ click.echo()
154
+ click.echo("Outputs:")
155
+ for ds in outputs:
156
+ flags = []
157
+ if ds.is_publishable:
158
+ flags.append("publish")
159
+ if ds.strict:
160
+ flags.append("strict")
161
+ flag_str = f" [{', '.join(flags)}]" if flags else ""
162
+ click.echo(f" - {ds.slug} ({ds.name}){flag_str}")
163
+
164
+ if not inputs and not outputs:
165
+ click.echo("No datasets found.")
166
+
167
+
168
+ @dataset.command("validate")
169
+ @click.option(
170
+ "-f", "--file", "datasets_file", type=click.Path(exists=True), default="datasets.yaml", help="Path to datasets.yaml"
171
+ )
172
+ @click.argument("datasets", nargs=-1, shell_complete=complete_dataset_slugs)
173
+ def dataset_validate(datasets_file: str, datasets: tuple[str, ...]) -> None:
174
+ """Validate datasets.
175
+
176
+ If no datasets are specified, validates all datasets.
177
+ """
178
+ datasets_path = Path(datasets_file).resolve()
179
+
180
+ errors: list[str] = []
181
+
182
+ # Load and parse YAML
183
+ try:
184
+ with open(datasets_path, "r") as f:
185
+ data = _yaml.load(f)
186
+ except Exception as e:
187
+ click.echo(f"Error: Failed to parse YAML: {e}", err=True)
188
+ sys.exit(1)
189
+
190
+ if data is None:
191
+ data = {}
192
+
193
+ # Check structure
194
+ if "inputs" not in data and "outputs" not in data:
195
+ errors.append("datasets.yaml must contain 'inputs' and/or 'outputs' lists")
196
+
197
+ # Track slugs for duplicate detection
198
+ all_slugs: dict[str, str] = {} # slug -> type
199
+ datasets_to_validate = set(datasets) if datasets else None
200
+
201
+ def validate_dataset_entry(ds: dict, ds_type: str, index: int) -> None:
202
+ prefix = f"{ds_type}[{index}]"
203
+ slug = ds.get("slug")
204
+
205
+ # Skip if specific datasets requested and this isn't one of them
206
+ if datasets_to_validate and slug not in datasets_to_validate:
207
+ # Still track slug for duplicate detection
208
+ if slug:
209
+ all_slugs[slug] = ds_type
210
+ return
211
+
212
+ # Required fields
213
+ for field in ["name", "slug", "location", "fields"]:
214
+ if field not in ds:
215
+ errors.append(f"{prefix}: missing required field '{field}'")
216
+
217
+ # Check slug
218
+ if slug:
219
+ if slug in all_slugs:
220
+ errors.append(f"{prefix}: duplicate slug '{slug}' (also in {all_slugs[slug]})")
221
+ else:
222
+ all_slugs[slug] = ds_type
223
+
224
+ # Check fields
225
+ fields = ds.get("fields", [])
226
+ if not isinstance(fields, list):
227
+ errors.append(f"{prefix}: 'fields' must be a list")
228
+ else:
229
+ for i, field in enumerate(fields):
230
+ if not isinstance(field, dict):
231
+ errors.append(f"{prefix}.fields[{i}]: must be an object")
232
+ continue
233
+ if "name" not in field:
234
+ errors.append(f"{prefix}.fields[{i}]: missing 'name'")
235
+ if "type" not in field:
236
+ errors.append(f"{prefix}.fields[{i}]: missing 'type'")
237
+ elif field["type"] not in VALID_FIELD_TYPES:
238
+ errors.append(
239
+ f"{prefix}.fields[{i}]: invalid type '{field['type']}' "
240
+ f"(must be one of: {', '.join(sorted(VALID_FIELD_TYPES))})"
241
+ )
242
+
243
+ # Validate inputs
244
+ inputs = data.get("inputs", [])
245
+ if not isinstance(inputs, list):
246
+ errors.append("'inputs' must be a list")
247
+ else:
248
+ for i, ds in enumerate(inputs):
249
+ if not isinstance(ds, dict):
250
+ errors.append(f"inputs[{i}]: must be an object")
251
+ else:
252
+ validate_dataset_entry(ds, "inputs", i)
253
+
254
+ # Validate outputs
255
+ outputs = data.get("outputs", [])
256
+ if not isinstance(outputs, list):
257
+ errors.append("'outputs' must be a list")
258
+ else:
259
+ for i, ds in enumerate(outputs):
260
+ if not isinstance(ds, dict):
261
+ errors.append(f"outputs[{i}]: must be an object")
262
+ else:
263
+ validate_dataset_entry(ds, "outputs", i)
264
+
265
+ # Check if requested datasets were found
266
+ if datasets_to_validate:
267
+ found_slugs = set(all_slugs.keys())
268
+ missing = datasets_to_validate - found_slugs
269
+ for slug in missing:
270
+ errors.append(f"Dataset '{slug}' not found")
271
+
272
+ if errors:
273
+ click.echo("Validation errors:", err=True)
274
+ for error in errors:
275
+ click.echo(f" - {error}", err=True)
276
+ sys.exit(1)
277
+ else:
278
+ if datasets:
279
+ click.echo(f"✓ {len(datasets)} dataset(s) valid")
280
+ else:
281
+ click.echo(f"✓ {datasets_file} is valid")
282
+
283
+
284
+ @dataset.command("lock")
285
+ @click.option(
286
+ "-f", "--file", "datasets_file", type=click.Path(exists=True), default="datasets.yaml", help="Path to datasets.yaml"
287
+ )
288
+ @click.argument("datasets", nargs=-1, shell_complete=complete_dataset_slugs)
289
+ def dataset_lock(datasets_file: str, datasets: tuple[str, ...]) -> None:
290
+ """Enable strict mode for datasets.
291
+
292
+ If no datasets are specified, locks all datasets.
293
+ """
294
+ try:
295
+ manager, _ = get_manager(datasets_file)
296
+ except FileNotFoundError as e:
297
+ click.echo(f"Error: {e}", err=True)
298
+ sys.exit(1)
299
+
300
+ # Get all datasets if none specified
301
+ if not datasets:
302
+ all_datasets = manager.get_all_inputs() + manager.get_all_outputs()
303
+ datasets = tuple(ds.slug for ds in all_datasets)
304
+
305
+ if not datasets:
306
+ click.echo("No datasets found.")
307
+ return
308
+
309
+ locked = []
310
+ for slug in datasets:
311
+ try:
312
+ manager.set_dataset_strict(slug, strict=True)
313
+ locked.append(slug)
314
+ except DatasetNotFoundError:
315
+ click.echo(f"Warning: Dataset '{slug}' not found", err=True)
316
+
317
+ if locked:
318
+ click.echo(f"✓ Locked {len(locked)} dataset(s): {', '.join(locked)}")
319
+
320
+
321
+ @dataset.command("unlock")
322
+ @click.option(
323
+ "-f", "--file", "datasets_file", type=click.Path(exists=True), default="datasets.yaml", help="Path to datasets.yaml"
324
+ )
325
+ @click.argument("datasets", nargs=-1, shell_complete=complete_dataset_slugs)
326
+ def dataset_unlock(datasets_file: str, datasets: tuple[str, ...]) -> None:
327
+ """Disable strict mode for datasets.
328
+
329
+ If no datasets are specified, unlocks all datasets.
330
+ """
331
+ try:
332
+ manager, _ = get_manager(datasets_file)
333
+ except FileNotFoundError as e:
334
+ click.echo(f"Error: {e}", err=True)
335
+ sys.exit(1)
336
+
337
+ # Get all datasets if none specified
338
+ if not datasets:
339
+ all_datasets = manager.get_all_inputs() + manager.get_all_outputs()
340
+ datasets = tuple(ds.slug for ds in all_datasets)
341
+
342
+ if not datasets:
343
+ click.echo("No datasets found.")
344
+ return
345
+
346
+ unlocked = []
347
+ for slug in datasets:
348
+ try:
349
+ manager.set_dataset_strict(slug, strict=False)
350
+ unlocked.append(slug)
351
+ except DatasetNotFoundError:
352
+ click.echo(f"Warning: Dataset '{slug}' not found", err=True)
353
+
354
+ if unlocked:
355
+ click.echo(f"✓ Unlocked {len(unlocked)} dataset(s): {', '.join(unlocked)}")
356
+
357
+
358
+ # =============================================================================
359
+ # Package commands
360
+ # =============================================================================
361
+
362
+
363
+ @main.group()
364
+ def package() -> None:
365
+ """Manage data packages."""
366
+ pass
367
+
368
+
369
+ @package.command("build")
370
+ @click.option(
371
+ "-f", "--file", "datasets_file", type=click.Path(exists=True), default="datasets.yaml", help="Path to datasets.yaml"
372
+ )
373
+ @click.option("-o", "--output", "output_file", type=click.Path(), default="datapackage.json", help="Output file path")
374
+ def package_build(datasets_file: str, output_file: str) -> None:
375
+ """Build a datapackage.json from datasets.yaml.
376
+
377
+ Creates a Data Package (https://datapackage.org/) with all output datasets as resources.
378
+ """
379
+ try:
380
+ manager, project_path = get_manager(datasets_file)
381
+ except FileNotFoundError as e:
382
+ click.echo(f"Error: {e}", err=True)
383
+ sys.exit(1)
384
+
385
+ outputs = manager.get_all_outputs()
386
+ if not outputs:
387
+ click.echo("No output datasets found.", err=True)
388
+ sys.exit(1)
389
+
390
+ project_slug = get_project_slug(project_path)
391
+
392
+ try:
393
+ from frictionless import describe
394
+ except ImportError:
395
+ click.echo("Error: frictionless is required for package build", err=True)
396
+ sys.exit(1)
397
+
398
+ resources = []
399
+ for ds in outputs:
400
+ data_path = manager.get_absolute_path(ds.location)
401
+ if not data_path.exists():
402
+ click.echo(f"Warning: Data file not found for '{ds.slug}': {data_path}", err=True)
403
+ continue
404
+
405
+ try:
406
+ resource = describe(str(data_path))
407
+ resource.name = ds.slug
408
+ resource.title = ds.name
409
+ # Use relative path in the package
410
+ resource.path = ds.location
411
+ resources.append(resource.to_dict())
412
+ click.echo(f" + {ds.slug}")
413
+ except Exception as e:
414
+ click.echo(f"Warning: Failed to describe '{ds.slug}': {e}", err=True)
415
+
416
+ if not resources:
417
+ click.echo("Error: No resources could be added to the package", err=True)
418
+ sys.exit(1)
419
+
420
+ datapackage = {
421
+ "name": project_slug,
422
+ "resources": resources,
423
+ }
424
+
425
+ output_path = Path(output_file)
426
+ with open(output_path, "w") as f:
427
+ json.dump(datapackage, f, indent=2)
428
+
429
+ click.echo(f"\n✓ Created {output_file} with {len(resources)} resource(s)")
430
+
431
+
432
+ @package.command("push")
433
+ @click.option("--env", type=click.Choice(["dev", "prod"]), default="dev", help="Target environment")
434
+ @click.option(
435
+ "-f", "--file", "datasets_file", type=click.Path(exists=True), default="datasets.yaml", help="Path to datasets.yaml"
436
+ )
437
+ @click.option("--destination", "-d", "destination", type=str, default=None, help="Override destination gs:// URL")
438
+ def package_push(env: str, datasets_file: str, destination: Optional[str]) -> None:
439
+ """Push the data package to Google Cloud Storage.
440
+
441
+ Uploads datapackage.json and all publishable output datasets.
442
+ """
443
+ try:
444
+ manager, project_path = get_manager(datasets_file)
445
+ except FileNotFoundError as e:
446
+ click.echo(f"Error: {e}", err=True)
447
+ sys.exit(1)
448
+
449
+ outputs = manager.get_all_outputs()
450
+ publishable = [ds for ds in outputs if ds.is_publishable]
451
+
452
+ if not publishable:
453
+ click.echo("Error: No publishable datasets found (need publish.enabled: true)", err=True)
454
+ sys.exit(1)
455
+
456
+ project_slug = get_project_slug(project_path)
457
+
458
+ # Determine destination
459
+ if destination:
460
+ dest_url = expand_env_vars(destination)
461
+ elif publishable[0].publish and publishable[0].publish.to:
462
+ # Use first dataset's publish.to as package destination
463
+ dest_url = expand_env_vars(publishable[0].publish.to)
464
+ else:
465
+ dest_url = f"gs://payloadcms-{env}/datasets/projects/{project_slug}/"
466
+
467
+ parsed = urlparse(dest_url)
468
+ if parsed.scheme != "gs":
469
+ click.echo(f"Error: Destination must be a gs:// URL, got: {dest_url}", err=True)
470
+ sys.exit(1)
471
+
472
+ bucket_name = parsed.netloc
473
+ gcs_prefix = parsed.path.lstrip("/")
474
+ if gcs_prefix and not gcs_prefix.endswith("/"):
475
+ gcs_prefix += "/"
476
+
477
+ # Build the datapackage
478
+ try:
479
+ from frictionless import describe
480
+ except ImportError:
481
+ click.echo("Error: frictionless is required for package push", err=True)
482
+ sys.exit(1)
483
+
484
+ resources = []
485
+ data_files: list[tuple[Path, str]] = [] # (local_path, remote_name)
486
+
487
+ for ds in publishable:
488
+ data_path = manager.get_absolute_path(ds.location)
489
+ if not data_path.exists():
490
+ click.echo(f"Warning: Data file not found for '{ds.slug}': {data_path}", err=True)
491
+ continue
492
+
493
+ try:
494
+ resource = describe(str(data_path))
495
+ resource.name = ds.slug
496
+ resource.title = ds.name
497
+ resource.path = data_path.name # Just the filename in the package
498
+ resources.append(resource.to_dict())
499
+ data_files.append((data_path, data_path.name))
500
+ except Exception as e:
501
+ click.echo(f"Warning: Failed to describe '{ds.slug}': {e}", err=True)
502
+
503
+ if not resources:
504
+ click.echo("Error: No resources could be added to the package", err=True)
505
+ sys.exit(1)
506
+
507
+ datapackage = {
508
+ "name": project_slug,
509
+ "resources": resources,
510
+ }
511
+
512
+ # Upload to GCS
513
+ try:
514
+ from google.cloud import storage # type: ignore[import-untyped]
515
+
516
+ client = storage.Client()
517
+ bucket = client.bucket(bucket_name)
518
+
519
+ # Upload datapackage.json
520
+ datapackage_blob = bucket.blob(f"{gcs_prefix}datapackage.json")
521
+ datapackage_blob.upload_from_string(json.dumps(datapackage, indent=2), content_type="application/json")
522
+ click.echo("✓ Uploaded datapackage.json")
523
+
524
+ # Upload data files
525
+ for local_path, remote_name in data_files:
526
+ data_blob = bucket.blob(f"{gcs_prefix}{remote_name}")
527
+ data_blob.upload_from_filename(str(local_path))
528
+ click.echo(f"✓ Uploaded {remote_name}")
529
+
530
+ click.echo(f"\nPackage pushed to: gs://{bucket_name}/{gcs_prefix}")
531
+
532
+ except ImportError:
533
+ click.echo("Error: google-cloud-storage is required for push", err=True)
534
+ click.echo("Install with: pip install google-cloud-storage", err=True)
535
+ sys.exit(1)
536
+ except Exception as e:
537
+ click.echo(f"Error uploading to GCS: {e}", err=True)
538
+ sys.exit(1)
539
+
540
+
541
+ if __name__ == "__main__":
542
+ main()