zagg 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zagg-0.1.0/.gitignore +62 -0
- zagg-0.1.0/LICENSE +21 -0
- zagg-0.1.0/PKG-INFO +217 -0
- zagg-0.1.0/README.md +146 -0
- zagg-0.1.0/pyproject.toml +118 -0
- zagg-0.1.0/src/zagg/__init__.py +51 -0
- zagg-0.1.0/src/zagg/__main__.py +86 -0
- zagg-0.1.0/src/zagg/_version.py +24 -0
- zagg-0.1.0/src/zagg/auth.py +66 -0
- zagg-0.1.0/src/zagg/catalog.py +622 -0
- zagg-0.1.0/src/zagg/config.py +402 -0
- zagg-0.1.0/src/zagg/configs/__init__.py +0 -0
- zagg-0.1.0/src/zagg/configs/atl06.yaml +73 -0
- zagg-0.1.0/src/zagg/processing.py +442 -0
- zagg-0.1.0/src/zagg/runner.py +519 -0
- zagg-0.1.0/src/zagg/schema.py +165 -0
- zagg-0.1.0/src/zagg/store.py +75 -0
- zagg-0.1.0/tests/conftest.py +35 -0
- zagg-0.1.0/tests/test_catalog.py +404 -0
- zagg-0.1.0/tests/test_config.py +434 -0
- zagg-0.1.0/tests/test_integration.py +79 -0
- zagg-0.1.0/tests/test_lambda_build.py +198 -0
- zagg-0.1.0/tests/test_processing.py +135 -0
- zagg-0.1.0/tests/test_runner.py +118 -0
- zagg-0.1.0/tests/test_schema.py +172 -0
- zagg-0.1.0/tests/test_store.py +38 -0
zagg-0.1.0/.gitignore
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# Claude-related files
|
|
2
|
+
CLAUDE.md
|
|
3
|
+
.claude/
|
|
4
|
+
claude.json
|
|
5
|
+
.claude-code/
|
|
6
|
+
.claude-chat/
|
|
7
|
+
.claude-cache/
|
|
8
|
+
*.claude
|
|
9
|
+
|
|
10
|
+
# Python
|
|
11
|
+
__pycache__/
|
|
12
|
+
*.py[cod]
|
|
13
|
+
*$py.class
|
|
14
|
+
*.so
|
|
15
|
+
.Python
|
|
16
|
+
env/
|
|
17
|
+
venv/
|
|
18
|
+
.venv/
|
|
19
|
+
*.egg-info/
|
|
20
|
+
src/zagg.egg-info/
|
|
21
|
+
dist/
|
|
22
|
+
build/
|
|
23
|
+
uv.lock
|
|
24
|
+
|
|
25
|
+
# hatch-vcs generated version file
|
|
26
|
+
src/zagg/_version.py
|
|
27
|
+
|
|
28
|
+
# IDE
|
|
29
|
+
.vscode/
|
|
30
|
+
.idea/
|
|
31
|
+
*.swp
|
|
32
|
+
*.swo
|
|
33
|
+
*~
|
|
34
|
+
|
|
35
|
+
# OS
|
|
36
|
+
.DS_Store
|
|
37
|
+
Thumbs.db
|
|
38
|
+
|
|
39
|
+
# Testing
|
|
40
|
+
.coverage
|
|
41
|
+
.pytest_cache/
|
|
42
|
+
htmlcov/
|
|
43
|
+
|
|
44
|
+
# Jupyter
|
|
45
|
+
.ipynb_checkpoints/
|
|
46
|
+
*.ipynb_checkpoints
|
|
47
|
+
|
|
48
|
+
# Logs
|
|
49
|
+
*.log
|
|
50
|
+
logs/
|
|
51
|
+
|
|
52
|
+
# Environment variables
|
|
53
|
+
.env
|
|
54
|
+
.env.local
|
|
55
|
+
|
|
56
|
+
# Deployment artifacts
|
|
57
|
+
deployment/data/results/*.json
|
|
58
|
+
deployment/layers/*.zip
|
|
59
|
+
deployment/builds/*.zip
|
|
60
|
+
deployment/aws/layer_build*
|
|
61
|
+
|
|
62
|
+
site/*
|
zagg-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 englacial
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
zagg-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: zagg
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Multi-resolution aggregation for ICESat-2 ATL06 data using morton/healpix indexing
|
|
5
|
+
Project-URL: Homepage, https://github.com/englacial/zagg
|
|
6
|
+
Project-URL: Repository, https://github.com/englacial/zagg
|
|
7
|
+
Project-URL: Issues, https://github.com/englacial/zagg/issues
|
|
8
|
+
Author-email: Shane Grigsby <refuge@rocktalus.com>
|
|
9
|
+
License: MIT License
|
|
10
|
+
|
|
11
|
+
Copyright (c) 2025 englacial
|
|
12
|
+
|
|
13
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
14
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
15
|
+
in the Software without restriction, including without limitation the rights
|
|
16
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
17
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
18
|
+
furnished to do so, subject to the following conditions:
|
|
19
|
+
|
|
20
|
+
The above copyright notice and this permission notice shall be included in all
|
|
21
|
+
copies or substantial portions of the Software.
|
|
22
|
+
|
|
23
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
24
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
25
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
26
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
27
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
28
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
29
|
+
SOFTWARE.
|
|
30
|
+
License-File: LICENSE
|
|
31
|
+
Classifier: Development Status :: 3 - Alpha
|
|
32
|
+
Classifier: Intended Audience :: Science/Research
|
|
33
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
34
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
35
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
36
|
+
Classifier: Topic :: Scientific/Engineering :: GIS
|
|
37
|
+
Requires-Python: >=3.12
|
|
38
|
+
Requires-Dist: boto3
|
|
39
|
+
Requires-Dist: earthaccess
|
|
40
|
+
Requires-Dist: fastparquet
|
|
41
|
+
Requires-Dist: h5coro>=0.0.8
|
|
42
|
+
Requires-Dist: healpy
|
|
43
|
+
Requires-Dist: mortie>=0.6.3
|
|
44
|
+
Requires-Dist: numpy>=2.0
|
|
45
|
+
Requires-Dist: obstore>=0.8.2
|
|
46
|
+
Requires-Dist: pandas>=2.2
|
|
47
|
+
Requires-Dist: pyarrow
|
|
48
|
+
Requires-Dist: pydantic-zarr>=0.9.1
|
|
49
|
+
Requires-Dist: pyproj
|
|
50
|
+
Requires-Dist: pyyaml
|
|
51
|
+
Requires-Dist: shapely
|
|
52
|
+
Requires-Dist: zarr>=3.1.5
|
|
53
|
+
Provides-Extra: analysis
|
|
54
|
+
Requires-Dist: cartopy>=0.25.0; extra == 'analysis'
|
|
55
|
+
Requires-Dist: cubed-xarray>=0.0.9; extra == 'analysis'
|
|
56
|
+
Requires-Dist: cubed>=0.24.0; extra == 'analysis'
|
|
57
|
+
Requires-Dist: geopandas; extra == 'analysis'
|
|
58
|
+
Requires-Dist: matplotlib>=3.10.8; extra == 'analysis'
|
|
59
|
+
Requires-Dist: notebook; extra == 'analysis'
|
|
60
|
+
Requires-Dist: xarray[io]; extra == 'analysis'
|
|
61
|
+
Requires-Dist: xdggs; extra == 'analysis'
|
|
62
|
+
Provides-Extra: lambda
|
|
63
|
+
Requires-Dist: astropy; extra == 'lambda'
|
|
64
|
+
Requires-Dist: cramjam; extra == 'lambda'
|
|
65
|
+
Requires-Dist: h5coro==0.0.8; extra == 'lambda'
|
|
66
|
+
Requires-Dist: numpy==2.2.6; extra == 'lambda'
|
|
67
|
+
Requires-Dist: pandas==2.2.3; extra == 'lambda'
|
|
68
|
+
Provides-Extra: test
|
|
69
|
+
Requires-Dist: pytest>=8.0; extra == 'test'
|
|
70
|
+
Description-Content-Type: text/markdown
|
|
71
|
+
|
|
72
|
+
# zagg - Multi-resolution Aggregation
|
|
73
|
+
|
|
74
|
+
Aggregate point observations to multi-resolution grids using HEALPix spatial indexing and serverless compute.
|
|
75
|
+
|
|
76
|
+
## Overview
|
|
77
|
+
|
|
78
|
+
zagg aggregates sparse point data (e.g., ICESat-2 ATL06 elevation measurements) to gridded products using HEALPix/morton spatial indexing. Processing runs in parallel on AWS Lambda — each worker handles one spatial cell independently, writing to a shared [Zarr v3](https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html) store following the [DGGS convention](https://github.com/zarr-conventions/dggs).
|
|
79
|
+
|
|
80
|
+
## Features
|
|
81
|
+
|
|
82
|
+
- **Pre-computed granule catalogs** — query CMR once, process many times
|
|
83
|
+
- **Morton-based spatial indexing** — HEALPix nested scheme for hierarchical grids
|
|
84
|
+
- **Massive parallelism** — tested with up to 1,700 concurrent Lambda workers
|
|
85
|
+
- **Direct S3 access** — h5coro reads HDF5 via byte-range requests, no downloads
|
|
86
|
+
- **Cost-effective** — ~$0.006/cell (~$2 per full Antarctica run on ARM64)
|
|
87
|
+
|
|
88
|
+
## End-to-End Workflow
|
|
89
|
+
|
|
90
|
+
### Step 1: Build a Granule Catalog
|
|
91
|
+
|
|
92
|
+
Query NASA's CMR to build a mapping of spatial cells to granule S3 URLs.
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
# ICESat-2 convenience — cycle number computes dates automatically:
|
|
96
|
+
uv run python -m zagg.catalog --cycle 22 --parent-order 6
|
|
97
|
+
|
|
98
|
+
# General — explicit date range and spatial polygon:
|
|
99
|
+
uv run python -m zagg.catalog \
|
|
100
|
+
--start-date 2024-01-06 --end-date 2024-04-07 \
|
|
101
|
+
--short-name ATL06 \
|
|
102
|
+
--polygon my_region.geojson \
|
|
103
|
+
--parent-order 6
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
When `--polygon` is provided, the bounding box for the CMR query is computed automatically from the polygon's extent, and `morton_coverage` uses the polygon for cell discovery. When no polygon is given, Antarctic drainage basins are used as the default.
|
|
107
|
+
|
|
108
|
+
Output: `catalog_ATL06_2024-01-06_2024-04-07_order6.json`
|
|
109
|
+
|
|
110
|
+
See [Catalog API](docs/api/catalog.md) for full options.
|
|
111
|
+
|
|
112
|
+
### Step 2: Deploy the Lambda Function
|
|
113
|
+
|
|
114
|
+
Build and deploy the Lambda function and its dependency layer.
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
# Build the function package
|
|
118
|
+
bash deployment/aws/build_function.sh
|
|
119
|
+
|
|
120
|
+
# Build the dependency layer (ARM64)
|
|
121
|
+
bash deployment/aws/build_arm64_layer.sh
|
|
122
|
+
|
|
123
|
+
# Deploy
|
|
124
|
+
bash deployment/aws/deploy.sh
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
See [Lambda Deployment](docs/deployment/lambda.md) and [ARM64 Build Guide](docs/deployment/arm64.md).
|
|
128
|
+
|
|
129
|
+
### Step 3: Run Processing
|
|
130
|
+
|
|
131
|
+
Processing reads a pipeline config YAML (data source, aggregation, output store) and a granule catalog. Run locally or dispatch to Lambda.
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
# Local processing (write to local Zarr):
|
|
135
|
+
uv run python -m zagg --config atl06.yaml --catalog catalog.json --store ./output.zarr
|
|
136
|
+
|
|
137
|
+
# Local processing (write to S3):
|
|
138
|
+
uv run python -m zagg --config atl06.yaml --catalog catalog.json --store s3://bucket/output.zarr
|
|
139
|
+
|
|
140
|
+
# Lambda dispatch (requires deployed Lambda function):
|
|
141
|
+
uv run python deployment/aws/invoke_lambda.py \
|
|
142
|
+
--config atl06.yaml --catalog catalog.json
|
|
143
|
+
|
|
144
|
+
# Test with a few cells:
|
|
145
|
+
uv run python -m zagg --config atl06.yaml --catalog catalog.json --max-cells 5
|
|
146
|
+
|
|
147
|
+
# Dry run:
|
|
148
|
+
uv run python -m zagg --config atl06.yaml --catalog catalog.json --dry-run
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
The store path and output grid parameters are defined in the YAML config (`output.store`, `output.grid.child_order`) and can be overridden via `--store` on the command line.
|
|
152
|
+
|
|
153
|
+
### Step 4: Visualize Results
|
|
154
|
+
|
|
155
|
+
The output Zarr is a public DGGS dataset. The included notebook rasterizes HEALPix cells to a polar stereographic grid for fast rendering with `imshow`.
|
|
156
|
+
|
|
157
|
+
```bash
|
|
158
|
+
uv run jupyter notebook notebooks/rasterized_zarr.ipynb
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
Adjust `GRID_SPACING` in the notebook to control output resolution (default 2 km).
|
|
162
|
+
|
|
163
|
+
## Project Structure
|
|
164
|
+
|
|
165
|
+
```
|
|
166
|
+
zagg/
|
|
167
|
+
├── src/zagg/ # Main package (cloud-agnostic)
|
|
168
|
+
│ ├── __main__.py # Local processing runner (python -m zagg)
|
|
169
|
+
│ ├── config.py # YAML pipeline configuration
|
|
170
|
+
│ ├── processing.py # Core aggregation pipeline
|
|
171
|
+
│ ├── catalog.py # CMR query + catalog building
|
|
172
|
+
│ ├── schema.py # Output schema + Zarr template
|
|
173
|
+
│ ├── store.py # Store factory (local or S3)
|
|
174
|
+
│ ├── auth.py # NASA Earthdata authentication
|
|
175
|
+
│ └── configs/ # Built-in pipeline configs (atl06.yaml)
|
|
176
|
+
├── deployment/ # Cloud-specific deployment
|
|
177
|
+
│ └── aws/ # Lambda handler, orchestrator, build scripts
|
|
178
|
+
├── notebooks/ # Visualization
|
|
179
|
+
├── docs/ # Documentation
|
|
180
|
+
└── tests/ # Test suite
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
## Documentation
|
|
184
|
+
|
|
185
|
+
- **[Architecture](docs/design/architecture.md)** — design philosophy, end-to-end flow diagram, key decisions
|
|
186
|
+
- **[Schema](docs/design/schema.md)** — aggregation dispatch, extending with new statistics
|
|
187
|
+
- **[API Reference](docs/api/catalog.md)** — catalog, processing, schema, auth modules
|
|
188
|
+
- **[Lambda Deployment](docs/deployment/lambda.md)** — AWS setup and production use
|
|
189
|
+
- **[ARM64 Build Guide](docs/deployment/arm64.md)** — building Lambda layers for ARM64
|
|
190
|
+
|
|
191
|
+
## Development
|
|
192
|
+
|
|
193
|
+
```bash
|
|
194
|
+
# Install
|
|
195
|
+
uv sync --all-groups
|
|
196
|
+
|
|
197
|
+
# Run tests
|
|
198
|
+
uv run pytest
|
|
199
|
+
|
|
200
|
+
# Lint
|
|
201
|
+
uv run ruff check src/
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
Requires Python >= 3.12, [uv](https://docs.astral.sh/uv/), AWS credentials (for Lambda), and a [NASA Earthdata](https://urs.earthdata.nasa.gov/) account (for data access).
|
|
205
|
+
|
|
206
|
+
## Performance
|
|
207
|
+
|
|
208
|
+
| Metric | Value |
|
|
209
|
+
|--------|-------|
|
|
210
|
+
| Execution time | 2–3 min average per cell |
|
|
211
|
+
| Memory | 2 GB configured, 1–1.5 GB typical |
|
|
212
|
+
| Throughput | Tested with up to 1,700 concurrent workers |
|
|
213
|
+
| Cost | ~$0.006/cell (~$2 per full Antarctica run on ARM64) |
|
|
214
|
+
|
|
215
|
+
## License
|
|
216
|
+
|
|
217
|
+
MIT — see LICENSE file.
|
zagg-0.1.0/README.md
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
# zagg - Multi-resolution Aggregation
|
|
2
|
+
|
|
3
|
+
Aggregate point observations to multi-resolution grids using HEALPix spatial indexing and serverless compute.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
zagg aggregates sparse point data (e.g., ICESat-2 ATL06 elevation measurements) to gridded products using HEALPix/morton spatial indexing. Processing runs in parallel on AWS Lambda — each worker handles one spatial cell independently, writing to a shared [Zarr v3](https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html) store following the [DGGS convention](https://github.com/zarr-conventions/dggs).
|
|
8
|
+
|
|
9
|
+
## Features
|
|
10
|
+
|
|
11
|
+
- **Pre-computed granule catalogs** — query CMR once, process many times
|
|
12
|
+
- **Morton-based spatial indexing** — HEALPix nested scheme for hierarchical grids
|
|
13
|
+
- **Massive parallelism** — tested with up to 1,700 concurrent Lambda workers
|
|
14
|
+
- **Direct S3 access** — h5coro reads HDF5 via byte-range requests, no downloads
|
|
15
|
+
- **Cost-effective** — ~$0.006/cell (~$2 per full Antarctica run on ARM64)
|
|
16
|
+
|
|
17
|
+
## End-to-End Workflow
|
|
18
|
+
|
|
19
|
+
### Step 1: Build a Granule Catalog
|
|
20
|
+
|
|
21
|
+
Query NASA's CMR to build a mapping of spatial cells to granule S3 URLs.
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
# ICESat-2 convenience — cycle number computes dates automatically:
|
|
25
|
+
uv run python -m zagg.catalog --cycle 22 --parent-order 6
|
|
26
|
+
|
|
27
|
+
# General — explicit date range and spatial polygon:
|
|
28
|
+
uv run python -m zagg.catalog \
|
|
29
|
+
--start-date 2024-01-06 --end-date 2024-04-07 \
|
|
30
|
+
--short-name ATL06 \
|
|
31
|
+
--polygon my_region.geojson \
|
|
32
|
+
--parent-order 6
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
When `--polygon` is provided, the bounding box for the CMR query is computed automatically from the polygon's extent, and `morton_coverage` uses the polygon for cell discovery. When no polygon is given, Antarctic drainage basins are used as the default.
|
|
36
|
+
|
|
37
|
+
Output: `catalog_ATL06_2024-01-06_2024-04-07_order6.json`
|
|
38
|
+
|
|
39
|
+
See [Catalog API](docs/api/catalog.md) for full options.
|
|
40
|
+
|
|
41
|
+
### Step 2: Deploy the Lambda Function
|
|
42
|
+
|
|
43
|
+
Build and deploy the Lambda function and its dependency layer.
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
# Build the function package
|
|
47
|
+
bash deployment/aws/build_function.sh
|
|
48
|
+
|
|
49
|
+
# Build the dependency layer (ARM64)
|
|
50
|
+
bash deployment/aws/build_arm64_layer.sh
|
|
51
|
+
|
|
52
|
+
# Deploy
|
|
53
|
+
bash deployment/aws/deploy.sh
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
See [Lambda Deployment](docs/deployment/lambda.md) and [ARM64 Build Guide](docs/deployment/arm64.md).
|
|
57
|
+
|
|
58
|
+
### Step 3: Run Processing
|
|
59
|
+
|
|
60
|
+
Processing reads a pipeline config YAML (data source, aggregation, output store) and a granule catalog. Run locally or dispatch to Lambda.
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
# Local processing (write to local Zarr):
|
|
64
|
+
uv run python -m zagg --config atl06.yaml --catalog catalog.json --store ./output.zarr
|
|
65
|
+
|
|
66
|
+
# Local processing (write to S3):
|
|
67
|
+
uv run python -m zagg --config atl06.yaml --catalog catalog.json --store s3://bucket/output.zarr
|
|
68
|
+
|
|
69
|
+
# Lambda dispatch (requires deployed Lambda function):
|
|
70
|
+
uv run python deployment/aws/invoke_lambda.py \
|
|
71
|
+
--config atl06.yaml --catalog catalog.json
|
|
72
|
+
|
|
73
|
+
# Test with a few cells:
|
|
74
|
+
uv run python -m zagg --config atl06.yaml --catalog catalog.json --max-cells 5
|
|
75
|
+
|
|
76
|
+
# Dry run:
|
|
77
|
+
uv run python -m zagg --config atl06.yaml --catalog catalog.json --dry-run
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
The store path and output grid parameters are defined in the YAML config (`output.store`, `output.grid.child_order`) and can be overridden via `--store` on the command line.
|
|
81
|
+
|
|
82
|
+
### Step 4: Visualize Results
|
|
83
|
+
|
|
84
|
+
The output Zarr is a public DGGS dataset. The included notebook rasterizes HEALPix cells to a polar stereographic grid for fast rendering with `imshow`.
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
uv run jupyter notebook notebooks/rasterized_zarr.ipynb
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
Adjust `GRID_SPACING` in the notebook to control output resolution (default 2 km).
|
|
91
|
+
|
|
92
|
+
## Project Structure
|
|
93
|
+
|
|
94
|
+
```
|
|
95
|
+
zagg/
|
|
96
|
+
├── src/zagg/ # Main package (cloud-agnostic)
|
|
97
|
+
│ ├── __main__.py # Local processing runner (python -m zagg)
|
|
98
|
+
│ ├── config.py # YAML pipeline configuration
|
|
99
|
+
│ ├── processing.py # Core aggregation pipeline
|
|
100
|
+
│ ├── catalog.py # CMR query + catalog building
|
|
101
|
+
│ ├── schema.py # Output schema + Zarr template
|
|
102
|
+
│ ├── store.py # Store factory (local or S3)
|
|
103
|
+
│ ├── auth.py # NASA Earthdata authentication
|
|
104
|
+
│ └── configs/ # Built-in pipeline configs (atl06.yaml)
|
|
105
|
+
├── deployment/ # Cloud-specific deployment
|
|
106
|
+
│ └── aws/ # Lambda handler, orchestrator, build scripts
|
|
107
|
+
├── notebooks/ # Visualization
|
|
108
|
+
├── docs/ # Documentation
|
|
109
|
+
└── tests/ # Test suite
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
## Documentation
|
|
113
|
+
|
|
114
|
+
- **[Architecture](docs/design/architecture.md)** — design philosophy, end-to-end flow diagram, key decisions
|
|
115
|
+
- **[Schema](docs/design/schema.md)** — aggregation dispatch, extending with new statistics
|
|
116
|
+
- **[API Reference](docs/api/catalog.md)** — catalog, processing, schema, auth modules
|
|
117
|
+
- **[Lambda Deployment](docs/deployment/lambda.md)** — AWS setup and production use
|
|
118
|
+
- **[ARM64 Build Guide](docs/deployment/arm64.md)** — building Lambda layers for ARM64
|
|
119
|
+
|
|
120
|
+
## Development
|
|
121
|
+
|
|
122
|
+
```bash
|
|
123
|
+
# Install
|
|
124
|
+
uv sync --all-groups
|
|
125
|
+
|
|
126
|
+
# Run tests
|
|
127
|
+
uv run pytest
|
|
128
|
+
|
|
129
|
+
# Lint
|
|
130
|
+
uv run ruff check src/
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
Requires Python >= 3.12, [uv](https://docs.astral.sh/uv/), AWS credentials (for Lambda), and a [NASA Earthdata](https://urs.earthdata.nasa.gov/) account (for data access).
|
|
134
|
+
|
|
135
|
+
## Performance
|
|
136
|
+
|
|
137
|
+
| Metric | Value |
|
|
138
|
+
|--------|-------|
|
|
139
|
+
| Execution time | 2–3 min average per cell |
|
|
140
|
+
| Memory | 2 GB configured, 1–1.5 GB typical |
|
|
141
|
+
| Throughput | Tested with up to 1,700 concurrent workers |
|
|
142
|
+
| Cost | ~$0.006/cell (~$2 per full Antarctica run on ARM64) |
|
|
143
|
+
|
|
144
|
+
## License
|
|
145
|
+
|
|
146
|
+
MIT — see LICENSE file.
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling", "hatch-vcs"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "zagg"
|
|
7
|
+
dynamic = ["version"]
|
|
8
|
+
description = "Multi-resolution aggregation for ICESat-2 ATL06 data using morton/healpix indexing"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.12"
|
|
11
|
+
license = {file = "LICENSE"}
|
|
12
|
+
authors = [
|
|
13
|
+
{name = "Shane Grigsby", email = "refuge@rocktalus.com"},
|
|
14
|
+
]
|
|
15
|
+
classifiers = [
|
|
16
|
+
"Development Status :: 3 - Alpha",
|
|
17
|
+
"Intended Audience :: Science/Research",
|
|
18
|
+
"License :: OSI Approved :: MIT License",
|
|
19
|
+
"Programming Language :: Python :: 3.12",
|
|
20
|
+
"Programming Language :: Python :: 3.13",
|
|
21
|
+
"Topic :: Scientific/Engineering :: GIS",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
dependencies = [
|
|
25
|
+
"numpy>=2.0",
|
|
26
|
+
"pandas>=2.2",
|
|
27
|
+
"h5coro>=0.0.8",
|
|
28
|
+
"mortie>=0.6.3",
|
|
29
|
+
"healpy",
|
|
30
|
+
"earthaccess",
|
|
31
|
+
"boto3",
|
|
32
|
+
"fastparquet",
|
|
33
|
+
"pyarrow",
|
|
34
|
+
"pydantic-zarr>=0.9.1",
|
|
35
|
+
"zarr>=3.1.5",
|
|
36
|
+
"obstore>=0.8.2",
|
|
37
|
+
"pyyaml",
|
|
38
|
+
"pyproj",
|
|
39
|
+
"shapely",
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
[project.optional-dependencies]
|
|
43
|
+
lambda = [
|
|
44
|
+
"numpy==2.2.6",
|
|
45
|
+
"pandas==2.2.3",
|
|
46
|
+
"h5coro==0.0.8",
|
|
47
|
+
"cramjam",
|
|
48
|
+
"astropy",
|
|
49
|
+
]
|
|
50
|
+
analysis = [
|
|
51
|
+
"cubed>=0.24.0",
|
|
52
|
+
"cubed-xarray>=0.0.9",
|
|
53
|
+
"xarray[io]",
|
|
54
|
+
"xdggs",
|
|
55
|
+
"geopandas",
|
|
56
|
+
"cartopy>=0.25.0",
|
|
57
|
+
"matplotlib>=3.10.8",
|
|
58
|
+
"notebook",
|
|
59
|
+
]
|
|
60
|
+
test = [
|
|
61
|
+
"pytest>=8.0",
|
|
62
|
+
]
|
|
63
|
+
|
|
64
|
+
[project.urls]
|
|
65
|
+
Homepage = "https://github.com/englacial/zagg"
|
|
66
|
+
Repository = "https://github.com/englacial/zagg"
|
|
67
|
+
Issues = "https://github.com/englacial/zagg/issues"
|
|
68
|
+
|
|
69
|
+
[project.scripts]
|
|
70
|
+
zagg = "zagg.__main__:main"
|
|
71
|
+
|
|
72
|
+
[tool.hatch.version]
|
|
73
|
+
source = "vcs"
|
|
74
|
+
|
|
75
|
+
[tool.hatch.build.hooks.vcs]
|
|
76
|
+
version-file = "src/zagg/_version.py"
|
|
77
|
+
|
|
78
|
+
[tool.hatch.build.targets.wheel]
|
|
79
|
+
packages = ["src/zagg"]
|
|
80
|
+
|
|
81
|
+
[tool.hatch.build.targets.sdist]
|
|
82
|
+
only-include = [
|
|
83
|
+
"src/zagg",
|
|
84
|
+
"tests",
|
|
85
|
+
"README.md",
|
|
86
|
+
"LICENSE",
|
|
87
|
+
"pyproject.toml",
|
|
88
|
+
]
|
|
89
|
+
|
|
90
|
+
[tool.pytest.ini_options]
|
|
91
|
+
testpaths = ["tests"]
|
|
92
|
+
python_files = ["test_*.py"]
|
|
93
|
+
markers = [
|
|
94
|
+
"slow: marks tests as slow (run with pytest -m slow)",
|
|
95
|
+
]
|
|
96
|
+
|
|
97
|
+
[tool.ruff]
|
|
98
|
+
line-length = 100
|
|
99
|
+
target-version = "py312"
|
|
100
|
+
lint.select = ["E", "F", "W", "I", "N"]
|
|
101
|
+
lint.ignore = ["E501"]
|
|
102
|
+
|
|
103
|
+
[tool.ruff.lint.isort]
|
|
104
|
+
known-first-party = ["zagg"]
|
|
105
|
+
|
|
106
|
+
[dependency-groups]
|
|
107
|
+
dev = [
|
|
108
|
+
"pre-commit>=4.5.1",
|
|
109
|
+
"pytest",
|
|
110
|
+
"pytest-cov",
|
|
111
|
+
"ruff",
|
|
112
|
+
]
|
|
113
|
+
docs = [
|
|
114
|
+
"mkdocs>=1.6.1",
|
|
115
|
+
"mkdocs-material[imaging]>=9.6.14",
|
|
116
|
+
"mkdocstrings>=0.29.1",
|
|
117
|
+
"mkdocstrings-python>=1.16.10",
|
|
118
|
+
]
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"""
|
|
2
|
+
zagg - Multi-resolution Aggregation
|
|
3
|
+
|
|
4
|
+
Multi-resolution aggregation using morton/healpix indexing.
|
|
5
|
+
|
|
6
|
+
This package provides cloud-agnostic processing functions that can be deployed
|
|
7
|
+
to various cloud platforms (AWS Lambda, GCP Cloud Functions, Azure Functions, etc.)
|
|
8
|
+
or used for local processing.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
try:
|
|
12
|
+
from ._version import __version__
|
|
13
|
+
except ImportError:
|
|
14
|
+
__version__ = "0.0.0+unknown"
|
|
15
|
+
|
|
16
|
+
from .auth import get_edl_token, get_nsidc_s3_credentials
|
|
17
|
+
from .config import (
|
|
18
|
+
PipelineConfig,
|
|
19
|
+
default_config,
|
|
20
|
+
get_child_order,
|
|
21
|
+
get_driver,
|
|
22
|
+
get_store_path,
|
|
23
|
+
load_config,
|
|
24
|
+
)
|
|
25
|
+
from .processing import (
|
|
26
|
+
calculate_cell_statistics,
|
|
27
|
+
process_morton_cell,
|
|
28
|
+
write_dataframe_to_zarr,
|
|
29
|
+
)
|
|
30
|
+
from .runner import agg
|
|
31
|
+
from .schema import xdggs_spec, xdggs_zarr_template
|
|
32
|
+
from .store import open_store, parse_s3_path
|
|
33
|
+
|
|
34
|
+
__all__ = [
|
|
35
|
+
"PipelineConfig",
|
|
36
|
+
"calculate_cell_statistics",
|
|
37
|
+
"default_config",
|
|
38
|
+
"get_child_order",
|
|
39
|
+
"get_driver",
|
|
40
|
+
"get_edl_token",
|
|
41
|
+
"get_nsidc_s3_credentials",
|
|
42
|
+
"get_store_path",
|
|
43
|
+
"load_config",
|
|
44
|
+
"open_store",
|
|
45
|
+
"parse_s3_path",
|
|
46
|
+
"process_morton_cell",
|
|
47
|
+
"agg",
|
|
48
|
+
"write_dataframe_to_zarr",
|
|
49
|
+
"xdggs_spec",
|
|
50
|
+
"xdggs_zarr_template",
|
|
51
|
+
]
|