zagg 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
zagg/__init__.py ADDED
@@ -0,0 +1,51 @@
1
+ """
2
+ zagg - Multi-resolution Aggregation
3
+
4
+ Multi-resolution aggregation using morton/healpix indexing.
5
+
6
+ This package provides cloud-agnostic processing functions that can be deployed
7
+ to various cloud platforms (AWS Lambda, GCP Cloud Functions, Azure Functions, etc.)
8
+ or used for local processing.
9
+ """
10
+
11
+ try:
12
+ from ._version import __version__
13
+ except ImportError:
14
+ __version__ = "0.0.0+unknown"
15
+
16
+ from .auth import get_edl_token, get_nsidc_s3_credentials
17
+ from .config import (
18
+ PipelineConfig,
19
+ default_config,
20
+ get_child_order,
21
+ get_driver,
22
+ get_store_path,
23
+ load_config,
24
+ )
25
+ from .processing import (
26
+ calculate_cell_statistics,
27
+ process_morton_cell,
28
+ write_dataframe_to_zarr,
29
+ )
30
+ from .runner import agg
31
+ from .schema import xdggs_spec, xdggs_zarr_template
32
+ from .store import open_store, parse_s3_path
33
+
34
+ __all__ = [
35
+ "PipelineConfig",
36
+ "calculate_cell_statistics",
37
+ "default_config",
38
+ "get_child_order",
39
+ "get_driver",
40
+ "get_edl_token",
41
+ "get_nsidc_s3_credentials",
42
+ "get_store_path",
43
+ "load_config",
44
+ "open_store",
45
+ "parse_s3_path",
46
+ "process_morton_cell",
47
+ "agg",
48
+ "write_dataframe_to_zarr",
49
+ "xdggs_spec",
50
+ "xdggs_zarr_template",
51
+ ]
zagg/__main__.py ADDED
@@ -0,0 +1,86 @@
1
+ """CLI entry point for zagg processing.
2
+
3
+ Usage:
4
+ python -m zagg --config atl06.yaml --catalog catalog.json
5
+ python -m zagg --config atl06.yaml --catalog catalog.json --store ./test.zarr
6
+ python -m zagg --config atl06.yaml --catalog catalog.json --max-cells 5
7
+ python -m zagg --config atl06.yaml --catalog catalog.json --backend lambda
8
+ """
9
+
10
+ import argparse
11
+ import logging
12
+ import os
13
+
14
+ from zagg.config import load_config
15
+ from zagg.runner import agg
16
+
17
+
18
+ def main():
19
+ parser = argparse.ArgumentParser(
20
+ description="zagg processing runner",
21
+ formatter_class=argparse.RawDescriptionHelpFormatter,
22
+ epilog="""
23
+ examples:
24
+ python -m zagg --config atl06.yaml --catalog catalog.json
25
+ python -m zagg --config atl06.yaml --catalog catalog.json --store ./test.zarr
26
+ python -m zagg --config atl06.yaml --catalog catalog.json --max-cells 5
27
+ python -m zagg --config atl06.yaml --catalog catalog.json --backend lambda
28
+ """,
29
+ )
30
+ parser.add_argument("--config", required=True, help="Path to pipeline config YAML")
31
+ parser.add_argument("--catalog", default=None, help="Path to granule catalog JSON (overrides config)")
32
+ parser.add_argument("--store", default=None, help="Output store path (overrides config)")
33
+ parser.add_argument("--backend", default="local", choices=["local", "lambda"],
34
+ help="Execution backend (default: local)")
35
+ parser.add_argument("--driver", default=None, choices=["s3", "https"],
36
+ help="Data access driver (default: from config, or s3)")
37
+ parser.add_argument("--max-cells", type=int, default=None, help="Limit number of cells (for testing)")
38
+ parser.add_argument("--morton-cell", type=str, default=None, help="Process a specific morton cell")
39
+ parser.add_argument("--max-workers", type=int, default=None, help="Max concurrent workers")
40
+ parser.add_argument("--overwrite", action="store_true", help="Overwrite existing Zarr template")
41
+ parser.add_argument("--dry-run", action="store_true", help="Show what would be processed")
42
+ parser.add_argument("--region", default="us-west-2", help="AWS region (default: us-west-2)")
43
+ parser.add_argument(
44
+ "--function-name",
45
+ default=os.environ.get("ZAGG_LAMBDA_FUNCTION_NAME", "process-morton-cell"),
46
+ help="Lambda function name (default: env ZAGG_LAMBDA_FUNCTION_NAME or 'process-morton-cell')",
47
+ )
48
+ args = parser.parse_args()
49
+
50
+ logging.basicConfig(level=logging.INFO, format="%(message)s")
51
+
52
+ config = load_config(args.config)
53
+
54
+ results = agg(
55
+ config,
56
+ catalog=args.catalog,
57
+ store=args.store,
58
+ backend=args.backend,
59
+ driver=args.driver,
60
+ max_cells=args.max_cells,
61
+ morton_cell=args.morton_cell,
62
+ max_workers=args.max_workers,
63
+ overwrite=args.overwrite,
64
+ dry_run=args.dry_run,
65
+ function_name=args.function_name,
66
+ region=args.region,
67
+ )
68
+
69
+ if args.dry_run:
70
+ print(f"\n[DRY RUN] Would process {results['total_cells']} cells")
71
+ print(f" Granules per cell: min={results['granules_per_cell_min']}, "
72
+ f"max={results['granules_per_cell_max']}, "
73
+ f"avg={results['granules_per_cell_avg']:.1f}")
74
+ print(f" Output: {results['store_path']}")
75
+ else:
76
+ print(f"\nDone: {results['cells_with_data']} cells with data, "
77
+ f"{results['total_obs']:,} obs, {results['cells_error']} errors, "
78
+ f"{results['wall_time_s']:.1f}s")
79
+ if "estimated_cost_usd" in results:
80
+ print(f"Lambda compute: {results['lambda_time_s']:.0f}s total, "
81
+ f"{results['gb_seconds']:.0f} GB-s, ~${results['estimated_cost_usd']:.2f}")
82
+ print(f"Output: {results['store_path']}")
83
+
84
+
85
+ if __name__ == "__main__":
86
+ main()
zagg/_version.py ADDED
@@ -0,0 +1,24 @@
1
+ # file generated by vcs-versioning
2
+ # don't change, don't track in version control
3
+ from __future__ import annotations
4
+
5
+ __all__ = [
6
+ "__version__",
7
+ "__version_tuple__",
8
+ "version",
9
+ "version_tuple",
10
+ "__commit_id__",
11
+ "commit_id",
12
+ ]
13
+
14
+ version: str
15
+ __version__: str
16
+ __version_tuple__: tuple[int | str, ...]
17
+ version_tuple: tuple[int | str, ...]
18
+ commit_id: str | None
19
+ __commit_id__: str | None
20
+
21
+ __version__ = version = '0.1.0'
22
+ __version_tuple__ = version_tuple = (0, 1, 0)
23
+
24
+ __commit_id__ = commit_id = None
zagg/auth.py ADDED
@@ -0,0 +1,66 @@
1
+ """
2
+ Orchestrator authentication helpers for NASA Earthdata access.
3
+
4
+ Two credential types:
5
+
6
+ - **S3**: ``get_nsidc_s3_credentials()`` returns STS temporary credentials
7
+ for direct S3 access. Only works from within us-west-2.
8
+ - **HTTPS**: ``get_edl_token()`` returns a bearer token for HTTPS access.
9
+ Works from anywhere.
10
+
11
+ Call ONCE in the orchestrator before processing. Credentials are valid
12
+ for approximately 1 hour.
13
+ """
14
+
15
+ import earthaccess
16
+
17
+
18
+ def get_edl_token() -> str:
19
+ """Return an Earthdata Login bearer token for HTTPS data access.
20
+
21
+ Works from any network location (not region-restricted like S3).
22
+ The token is used by h5coro's HTTPDriver.
23
+
24
+ Returns
25
+ -------
26
+ str
27
+ Bearer token string.
28
+ """
29
+ auth = earthaccess.login()
30
+ return auth.token["access_token"]
31
+
32
+
33
+ def get_nsidc_s3_credentials() -> dict:
34
+ """
35
+ Authenticate with NASA Earthdata and return S3 credentials for NSIDC.
36
+
37
+ Call this ONCE in the orchestrator before invoking Lambda functions.
38
+ Credentials are valid for ~1 hour, which is longer than Lambda max
39
+ execution time (15 minutes).
40
+
41
+ Returns
42
+ -------
43
+ dict
44
+ S3 credentials with keys:
45
+ - accessKeyId: str
46
+ - secretAccessKey: str
47
+ - sessionToken: str
48
+ - expiration: str (ISO timestamp)
49
+
50
+ Examples
51
+ --------
52
+
53
+ ```python
54
+ creds = get_nsidc_s3_credentials()
55
+ print(f"Credentials expire: {creds.get('expiration')}")
56
+
57
+ # Pass to Lambda invocation
58
+ event = {
59
+ "parent_morton": -6134114,
60
+ "s3_credentials": creds,
61
+ # ... other params
62
+ }
63
+ ```
64
+ """
65
+ auth = earthaccess.login()
66
+ return auth.get_s3_credentials(daac="NSIDC")