xarray-prism 2602.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xarray_prism-2602.0.0/LICENSE +28 -0
- xarray_prism-2602.0.0/PKG-INFO +349 -0
- xarray_prism-2602.0.0/README.md +278 -0
- xarray_prism-2602.0.0/pyproject.toml +214 -0
- xarray_prism-2602.0.0/setup.cfg +4 -0
- xarray_prism-2602.0.0/src/xarray_prism/__init__.py +40 -0
- xarray_prism-2602.0.0/src/xarray_prism/_detection.py +276 -0
- xarray_prism-2602.0.0/src/xarray_prism/_registry.py +45 -0
- xarray_prism-2602.0.0/src/xarray_prism/_version.py +3 -0
- xarray_prism-2602.0.0/src/xarray_prism/backends/__init__.py +4 -0
- xarray_prism-2602.0.0/src/xarray_prism/backends/cloud.py +186 -0
- xarray_prism-2602.0.0/src/xarray_prism/backends/posix.py +25 -0
- xarray_prism-2602.0.0/src/xarray_prism/entrypoint.py +261 -0
- xarray_prism-2602.0.0/src/xarray_prism/utils.py +159 -0
- xarray_prism-2602.0.0/src/xarray_prism.egg-info/PKG-INFO +349 -0
- xarray_prism-2602.0.0/src/xarray_prism.egg-info/SOURCES.txt +21 -0
- xarray_prism-2602.0.0/src/xarray_prism.egg-info/dependency_links.txt +1 -0
- xarray_prism-2602.0.0/src/xarray_prism.egg-info/entry_points.txt +2 -0
- xarray_prism-2602.0.0/src/xarray_prism.egg-info/requires.txt +24 -0
- xarray_prism-2602.0.0/src/xarray_prism.egg-info/top_level.txt +1 -0
- xarray_prism-2602.0.0/tests/test_backends.py +203 -0
- xarray_prism-2602.0.0/tests/test_detection.py +287 -0
- xarray_prism-2602.0.0/tests/test_entrypoint.py +298 -0
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
BSD 3-Clause License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2023, Climate Informatics and Technologies (CLINT)
|
|
4
|
+
|
|
5
|
+
Redistribution and use in source and binary forms, with or without
|
|
6
|
+
modification, are permitted provided that the following conditions are met:
|
|
7
|
+
|
|
8
|
+
1. Redistributions of source code must retain the above copyright notice, this
|
|
9
|
+
list of conditions and the following disclaimer.
|
|
10
|
+
|
|
11
|
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
|
12
|
+
this list of conditions and the following disclaimer in the documentation
|
|
13
|
+
and/or other materials provided with the distribution.
|
|
14
|
+
|
|
15
|
+
3. Neither the name of the copyright holder nor the names of its
|
|
16
|
+
contributors may be used to endorse or promote products derived from
|
|
17
|
+
this software without specific prior written permission.
|
|
18
|
+
|
|
19
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
20
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
21
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
22
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
23
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
24
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
25
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
26
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
27
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
28
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@@ -0,0 +1,349 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: xarray-prism
|
|
3
|
+
Version: 2602.0.0
|
|
4
|
+
Summary: A multi-format and multi-storage xarray engine with automatic engine detection, and ability to register new data format and uri type for climate data.
|
|
5
|
+
Author-email: "DKRZ, Clint" <freva@dkrz.de>
|
|
6
|
+
License: BSD 3-Clause License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2023, Climate Informatics and Technologies (CLINT)
|
|
9
|
+
|
|
10
|
+
Redistribution and use in source and binary forms, with or without
|
|
11
|
+
modification, are permitted provided that the following conditions are met:
|
|
12
|
+
|
|
13
|
+
1. Redistributions of source code must retain the above copyright notice, this
|
|
14
|
+
list of conditions and the following disclaimer.
|
|
15
|
+
|
|
16
|
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
|
17
|
+
this list of conditions and the following disclaimer in the documentation
|
|
18
|
+
and/or other materials provided with the distribution.
|
|
19
|
+
|
|
20
|
+
3. Neither the name of the copyright holder nor the names of its
|
|
21
|
+
contributors may be used to endorse or promote products derived from
|
|
22
|
+
this software without specific prior written permission.
|
|
23
|
+
|
|
24
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
25
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
26
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
27
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
28
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
29
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
30
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
31
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
32
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
33
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
34
|
+
Project-URL: Issues, https://github.com/freva-org/xarray-prism/issues
|
|
35
|
+
Project-URL: Source, https://github.com/freva-org/xarray-prism/
|
|
36
|
+
Keywords: xarray,climate,netcdf,zarr,grib,geotiff
|
|
37
|
+
Classifier: Development Status :: 4 - Beta
|
|
38
|
+
Classifier: Environment :: Console
|
|
39
|
+
Classifier: Intended Audience :: Developers
|
|
40
|
+
Classifier: Intended Audience :: Science/Research
|
|
41
|
+
Classifier: License :: OSI Approved :: BSD License
|
|
42
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
43
|
+
Classifier: Programming Language :: Python :: 3
|
|
44
|
+
Requires-Python: >=3.9
|
|
45
|
+
Description-Content-Type: text/markdown
|
|
46
|
+
License-File: LICENSE
|
|
47
|
+
Requires-Dist: xarray
|
|
48
|
+
Requires-Dist: fsspec
|
|
49
|
+
Requires-Dist: h5py
|
|
50
|
+
Requires-Dist: h5netcdf
|
|
51
|
+
Requires-Dist: scipy
|
|
52
|
+
Requires-Dist: zarr
|
|
53
|
+
Requires-Dist: cfgrib
|
|
54
|
+
Requires-Dist: eccodes
|
|
55
|
+
Requires-Dist: rioxarray
|
|
56
|
+
Requires-Dist: rasterio
|
|
57
|
+
Requires-Dist: netCDF4
|
|
58
|
+
Requires-Dist: s3fs
|
|
59
|
+
Requires-Dist: gcsfs
|
|
60
|
+
Requires-Dist: adlfs
|
|
61
|
+
Provides-Extra: dev
|
|
62
|
+
Requires-Dist: pytest; extra == "dev"
|
|
63
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
64
|
+
Requires-Dist: mypy; extra == "dev"
|
|
65
|
+
Requires-Dist: black; extra == "dev"
|
|
66
|
+
Requires-Dist: isort; extra == "dev"
|
|
67
|
+
Requires-Dist: flake8; extra == "dev"
|
|
68
|
+
Requires-Dist: codespell; extra == "dev"
|
|
69
|
+
Requires-Dist: tox; extra == "dev"
|
|
70
|
+
Dynamic: license-file
|
|
71
|
+
|
|
72
|
+
# Xarray Prism Engine
|
|
73
|
+
|
|
74
|
+
A multi-format and multi-storage xarray engine with automatic engine detection,
|
|
75
|
+
and ability to register new data format and uri type for climate data.
|
|
76
|
+
|
|
77
|
+
> [!Important]
|
|
78
|
+
> If you encounter with a data formats that `freva` engine is not able to open, please
|
|
79
|
+
> files an issue report [here](https://github.com/freva-org/freva-xarray/issues/new).
|
|
80
|
+
> This helps us to improve the engine enabling users work with different kinds of climate data.
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
## Installation
|
|
84
|
+
|
|
85
|
+
### Install via PyPI
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
pip install xarray-prism
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
### Install via Conda
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
conda install xarray-prism
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
## Quick Start
|
|
98
|
+
|
|
99
|
+
### Using with xarray
|
|
100
|
+
|
|
101
|
+
```python
|
|
102
|
+
import xarray as xr
|
|
103
|
+
|
|
104
|
+
# Auto-detect format
|
|
105
|
+
ds = xr.open_dataset("my_data.unknown_fmt", engine="prism")
|
|
106
|
+
|
|
107
|
+
# Remote Zarr on S3
|
|
108
|
+
ds = xr.open_dataset(
|
|
109
|
+
"s3://freva/workshop/tas.zarr",
|
|
110
|
+
engine="prism",
|
|
111
|
+
storage_options={
|
|
112
|
+
"anon": True,
|
|
113
|
+
"client_kwargs": {
|
|
114
|
+
"endpoint_url": "https://s3.eu-dkrz-1.dkrz.cloud"
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
# Remote NetCDF3 on S3
|
|
120
|
+
ds = xr.open_dataset(
|
|
121
|
+
"s3://freva/workshop/tas.nc",
|
|
122
|
+
engine="prism",
|
|
123
|
+
storage_options={
|
|
124
|
+
"anon": True,
|
|
125
|
+
"client_kwargs": {
|
|
126
|
+
"endpoint_url": "https://s3.eu-dkrz-1.dkrz.cloud"
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
# Remote NetCDF4 on S3
|
|
132
|
+
ds = xr.open_dataset(
|
|
133
|
+
"s3://freva/workshop/tas.nc4",
|
|
134
|
+
engine="prism",
|
|
135
|
+
storage_options={
|
|
136
|
+
"anon": True,
|
|
137
|
+
"client_kwargs": {
|
|
138
|
+
"endpoint_url": "https://s3.eu-dkrz-1.dkrz.cloud"
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
# Remote Zarr on S3 - non-anon
|
|
144
|
+
ds = xr.open_dataset(
|
|
145
|
+
"s3://bucket/data.zarr",
|
|
146
|
+
engine="prism",
|
|
147
|
+
storage_options={
|
|
148
|
+
"key": "YOUR_KEY",
|
|
149
|
+
"secret": "YOUR_SECRET",
|
|
150
|
+
"client_kwargs": {
|
|
151
|
+
"endpoint_url": "S3_ENDPOINT"
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
# OPeNDAP from THREDDS
|
|
157
|
+
ds = xr.open_dataset(
|
|
158
|
+
"https://icdc.cen.uni-hamburg.de/thredds/dodsC/ftpthredds/ar5_sea_level_rise/gia_mean.nc",
|
|
159
|
+
engine="prism"
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
# Local GRIB file
|
|
163
|
+
ds = xr.open_dataset("forecast.grib2", engine="prism")
|
|
164
|
+
|
|
165
|
+
# GeoTIFF
|
|
166
|
+
ds = xr.open_dataset("satellite.tif", engine="prism")
|
|
167
|
+
|
|
168
|
+
# tip: Handle the cache manually by yourself
|
|
169
|
+
xr.open_dataset(
|
|
170
|
+
"simplecache::s3://bucket/file.nc3",
|
|
171
|
+
engine="prism",
|
|
172
|
+
storage_options={
|
|
173
|
+
"s3": {"anon": True, "client_kwargs": {"endpoint_url": "..."}},
|
|
174
|
+
"simplecache": {"cache_storage": "/path/to/cache"}
|
|
175
|
+
}
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
# Even for the tif format on the S3 you can pass the credential through
|
|
179
|
+
# storage_options which is not supported by rasterio:
|
|
180
|
+
xr.open_dataset(
|
|
181
|
+
"s3://bucket/file.tif",
|
|
182
|
+
engine="prism",
|
|
183
|
+
storage_options={
|
|
184
|
+
"key": "YOUR_KEY",
|
|
185
|
+
"secret": "YOUR_SECRET",
|
|
186
|
+
"client_kwargs": {
|
|
187
|
+
"endpoint_url": "S3_ENDPOINT"
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
)
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
## Supported Formats
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
|Data format | Remote backend | Local FS | Cache|
|
|
197
|
+
|--------------|------------------------|-----------|-----------|
|
|
198
|
+
|GRIB | cfgrib + fsspec | cfgrib | fsspec simplecache (full-file)|
|
|
199
|
+
|Zarr | zarr + fsspec | zarr | chunked key/value store|
|
|
200
|
+
|NetCDF3 | scipy + fsspec | scipy | fsspec byte cache (5 MB blocks but full dowload)|
|
|
201
|
+
|NetCDF4/HDF5 | h5netcdf + fsspec | h5netcdf | fsspec byte cache (5 MB block)|
|
|
202
|
+
|GeoTIFF | rasterio + fsspec | rasterio | GDAL/rasterio block cache (5 MB block)|
|
|
203
|
+
|OPeNDAP/DODS | netCDF4 | n/a | n/a|
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
> [!WARNING]
|
|
207
|
+
> **Remote GRIB & NetCDF3 require full file download**
|
|
208
|
+
>
|
|
209
|
+
> Unlike Zarr or HDF5, these formats don't support partial/chunk reads over the network.
|
|
210
|
+
>
|
|
211
|
+
> By default, xarray-prism caches files in the system temp directory.
|
|
212
|
+
> This works well for most cases.
|
|
213
|
+
> If temp storage is a concern (e.g., limited space or cleared on reboot),
|
|
214
|
+
> you can specify a persistent cache:
|
|
215
|
+
>
|
|
216
|
+
> | Option | How |
|
|
217
|
+
> |--------|-----|
|
|
218
|
+
> | Environment variable | `export XARRAY_PRISM_CACHE=/path/to/cache` |
|
|
219
|
+
> | Per-call | `storage_options={"simplecache": {"cache_storage": "/path"}}` |
|
|
220
|
+
> | Default | System temp directory |
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
## Customization
|
|
224
|
+
|
|
225
|
+
### Custom Format Detectors and URI Types
|
|
226
|
+
|
|
227
|
+
You can extend **xarray-prism** with custom *format detectors*, *URI types*, and *open handlers* by providing a small plugin package.
|
|
228
|
+
Registration happens **at import time**, so importing the plugin activates it.
|
|
229
|
+
|
|
230
|
+
### Plugin structure
|
|
231
|
+
|
|
232
|
+
```text
|
|
233
|
+
xarray_prism_myplugin/
|
|
234
|
+
__init__.py # imports the plugin module (triggers registration)
|
|
235
|
+
plugin.py # detectors, URI types, and open handlers
|
|
236
|
+
pyproject.toml
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
### Plugin implementation
|
|
240
|
+
|
|
241
|
+
`xarray_prism_myplugin/__init__.py`
|
|
242
|
+
|
|
243
|
+
```python
|
|
244
|
+
from .plugin import * # noqa: F401,F403
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
`xarray_prism_myplugin/plugin.py`
|
|
248
|
+
|
|
249
|
+
```python
|
|
250
|
+
import xarray as xr
|
|
251
|
+
from xarray_prism import register_detector, register_uri_type, registry
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
@register_uri_type(priority=100)
|
|
255
|
+
def detect_myfs_uri(uri: str):
|
|
256
|
+
"""Detect a custom filesystem URI."""
|
|
257
|
+
if uri.lower().startswith("myfs://"):
|
|
258
|
+
return "myfs"
|
|
259
|
+
return None
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
@register_detector(priority=100)
|
|
263
|
+
def detect_foo_format(uri: str):
|
|
264
|
+
"""Detect a custom file format."""
|
|
265
|
+
if uri.lower().endswith(".foo"):
|
|
266
|
+
return "foo"
|
|
267
|
+
return None
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
@registry.register("foo", uri_type="myfs")
|
|
271
|
+
def open_foo_from_myfs(uri: str, **kwargs):
|
|
272
|
+
"""Open .foo files from myfs:// URIs."""
|
|
273
|
+
translated = uri.replace("myfs://", "https://my-gateway.example/")
|
|
274
|
+
return xr.open_dataset(translated, engine="h5netcdf", **kwargs)
|
|
275
|
+
```
|
|
276
|
+
|
|
277
|
+
### Plugin installation
|
|
278
|
+
|
|
279
|
+
`pyproject.toml`
|
|
280
|
+
|
|
281
|
+
```toml
|
|
282
|
+
[project]
|
|
283
|
+
name = "xarray-prism-myplugin"
|
|
284
|
+
version = "0.1.0"
|
|
285
|
+
dependencies = ["xarray-prism"]
|
|
286
|
+
|
|
287
|
+
[project.entry-points."xarray_prism.plugins"]
|
|
288
|
+
myplugin = "xarray_prism_myplugin"
|
|
289
|
+
```
|
|
290
|
+
|
|
291
|
+
### Using the plugin
|
|
292
|
+
|
|
293
|
+
After installing the plugin package, **import it once** to activate the registrations:
|
|
294
|
+
|
|
295
|
+
```python
|
|
296
|
+
import xarray_prism_myplugin # activates detectors and handlers
|
|
297
|
+
|
|
298
|
+
import xarray as xr
|
|
299
|
+
ds = xr.open_dataset("myfs://bucket/path/data.foo", engine="prism")
|
|
300
|
+
```
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
## Development
|
|
304
|
+
|
|
305
|
+
### Setup Development Environment
|
|
306
|
+
|
|
307
|
+
```bash
|
|
308
|
+
# Start test services (MinIO, THREDDS)
|
|
309
|
+
docker-compose -f dev-env/docker-compose.yaml up -d --remove-orphans
|
|
310
|
+
|
|
311
|
+
# Create conda environment
|
|
312
|
+
conda create -n xarray-prism python=3.12 -y
|
|
313
|
+
conda activate xarray-prism
|
|
314
|
+
|
|
315
|
+
# Install package in editable mode with dev dependencies
|
|
316
|
+
pip install -e ".[dev]"
|
|
317
|
+
```
|
|
318
|
+
|
|
319
|
+
### Running Tests
|
|
320
|
+
|
|
321
|
+
```bash
|
|
322
|
+
# Run tests
|
|
323
|
+
tox -e test
|
|
324
|
+
|
|
325
|
+
# Run with coverage
|
|
326
|
+
tox -e test-cov
|
|
327
|
+
|
|
328
|
+
# Lint
|
|
329
|
+
tox -e lint
|
|
330
|
+
|
|
331
|
+
# Type checking
|
|
332
|
+
tox -e types
|
|
333
|
+
|
|
334
|
+
# Auto-format code
|
|
335
|
+
tox -e format
|
|
336
|
+
```
|
|
337
|
+
|
|
338
|
+
### Creating a Release
|
|
339
|
+
|
|
340
|
+
Releases are managed via GitHub Actions and tox:
|
|
341
|
+
|
|
342
|
+
```bash
|
|
343
|
+
# Tag a new release (creates git tag)
|
|
344
|
+
tox -e release
|
|
345
|
+
```
|
|
346
|
+
|
|
347
|
+
The release workflow is triggered automatically when:
|
|
348
|
+
- A version tag (`v*.*.*`) is pushed -> Full release to PyPI
|
|
349
|
+
- Manual workflow dispatch with RC number -> Pre-release to PyPI
|
|
@@ -0,0 +1,278 @@
|
|
|
1
|
+
# Xarray Prism Engine
|
|
2
|
+
|
|
3
|
+
A multi-format and multi-storage xarray engine with automatic engine detection,
|
|
4
|
+
and ability to register new data format and uri type for climate data.
|
|
5
|
+
|
|
6
|
+
> [!Important]
|
|
7
|
+
> If you encounter with a data formats that `freva` engine is not able to open, please
|
|
8
|
+
> files an issue report [here](https://github.com/freva-org/freva-xarray/issues/new).
|
|
9
|
+
> This helps us to improve the engine enabling users work with different kinds of climate data.
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
## Installation
|
|
13
|
+
|
|
14
|
+
### Install via PyPI
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
pip install xarray-prism
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
### Install via Conda
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
conda install xarray-prism
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## Quick Start
|
|
27
|
+
|
|
28
|
+
### Using with xarray
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
import xarray as xr
|
|
32
|
+
|
|
33
|
+
# Auto-detect format
|
|
34
|
+
ds = xr.open_dataset("my_data.unknown_fmt", engine="prism")
|
|
35
|
+
|
|
36
|
+
# Remote Zarr on S3
|
|
37
|
+
ds = xr.open_dataset(
|
|
38
|
+
"s3://freva/workshop/tas.zarr",
|
|
39
|
+
engine="prism",
|
|
40
|
+
storage_options={
|
|
41
|
+
"anon": True,
|
|
42
|
+
"client_kwargs": {
|
|
43
|
+
"endpoint_url": "https://s3.eu-dkrz-1.dkrz.cloud"
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
# Remote NetCDF3 on S3
|
|
49
|
+
ds = xr.open_dataset(
|
|
50
|
+
"s3://freva/workshop/tas.nc",
|
|
51
|
+
engine="prism",
|
|
52
|
+
storage_options={
|
|
53
|
+
"anon": True,
|
|
54
|
+
"client_kwargs": {
|
|
55
|
+
"endpoint_url": "https://s3.eu-dkrz-1.dkrz.cloud"
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
# Remote NetCDF4 on S3
|
|
61
|
+
ds = xr.open_dataset(
|
|
62
|
+
"s3://freva/workshop/tas.nc4",
|
|
63
|
+
engine="prism",
|
|
64
|
+
storage_options={
|
|
65
|
+
"anon": True,
|
|
66
|
+
"client_kwargs": {
|
|
67
|
+
"endpoint_url": "https://s3.eu-dkrz-1.dkrz.cloud"
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
# Remote Zarr on S3 - non-anon
|
|
73
|
+
ds = xr.open_dataset(
|
|
74
|
+
"s3://bucket/data.zarr",
|
|
75
|
+
engine="prism",
|
|
76
|
+
storage_options={
|
|
77
|
+
"key": "YOUR_KEY",
|
|
78
|
+
"secret": "YOUR_SECRET",
|
|
79
|
+
"client_kwargs": {
|
|
80
|
+
"endpoint_url": "S3_ENDPOINT"
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
# OPeNDAP from THREDDS
|
|
86
|
+
ds = xr.open_dataset(
|
|
87
|
+
"https://icdc.cen.uni-hamburg.de/thredds/dodsC/ftpthredds/ar5_sea_level_rise/gia_mean.nc",
|
|
88
|
+
engine="prism"
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
# Local GRIB file
|
|
92
|
+
ds = xr.open_dataset("forecast.grib2", engine="prism")
|
|
93
|
+
|
|
94
|
+
# GeoTIFF
|
|
95
|
+
ds = xr.open_dataset("satellite.tif", engine="prism")
|
|
96
|
+
|
|
97
|
+
# tip: Handle the cache manually by yourself
|
|
98
|
+
xr.open_dataset(
|
|
99
|
+
"simplecache::s3://bucket/file.nc3",
|
|
100
|
+
engine="prism",
|
|
101
|
+
storage_options={
|
|
102
|
+
"s3": {"anon": True, "client_kwargs": {"endpoint_url": "..."}},
|
|
103
|
+
"simplecache": {"cache_storage": "/path/to/cache"}
|
|
104
|
+
}
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
# Even for the tif format on the S3 you can pass the credential through
|
|
108
|
+
# storage_options which is not supported by rasterio:
|
|
109
|
+
xr.open_dataset(
|
|
110
|
+
"s3://bucket/file.tif",
|
|
111
|
+
engine="prism",
|
|
112
|
+
storage_options={
|
|
113
|
+
"key": "YOUR_KEY",
|
|
114
|
+
"secret": "YOUR_SECRET",
|
|
115
|
+
"client_kwargs": {
|
|
116
|
+
"endpoint_url": "S3_ENDPOINT"
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
)
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
## Supported Formats
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
|Data format | Remote backend | Local FS | Cache|
|
|
126
|
+
|--------------|------------------------|-----------|-----------|
|
|
127
|
+
|GRIB | cfgrib + fsspec | cfgrib | fsspec simplecache (full-file)|
|
|
128
|
+
|Zarr | zarr + fsspec | zarr | chunked key/value store|
|
|
129
|
+
|NetCDF3 | scipy + fsspec | scipy | fsspec byte cache (5 MB blocks but full dowload)|
|
|
130
|
+
|NetCDF4/HDF5 | h5netcdf + fsspec | h5netcdf | fsspec byte cache (5 MB block)|
|
|
131
|
+
|GeoTIFF | rasterio + fsspec | rasterio | GDAL/rasterio block cache (5 MB block)|
|
|
132
|
+
|OPeNDAP/DODS | netCDF4 | n/a | n/a|
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
> [!WARNING]
|
|
136
|
+
> **Remote GRIB & NetCDF3 require full file download**
|
|
137
|
+
>
|
|
138
|
+
> Unlike Zarr or HDF5, these formats don't support partial/chunk reads over the network.
|
|
139
|
+
>
|
|
140
|
+
> By default, xarray-prism caches files in the system temp directory.
|
|
141
|
+
> This works well for most cases.
|
|
142
|
+
> If temp storage is a concern (e.g., limited space or cleared on reboot),
|
|
143
|
+
> you can specify a persistent cache:
|
|
144
|
+
>
|
|
145
|
+
> | Option | How |
|
|
146
|
+
> |--------|-----|
|
|
147
|
+
> | Environment variable | `export XARRAY_PRISM_CACHE=/path/to/cache` |
|
|
148
|
+
> | Per-call | `storage_options={"simplecache": {"cache_storage": "/path"}}` |
|
|
149
|
+
> | Default | System temp directory |
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
## Customization
|
|
153
|
+
|
|
154
|
+
### Custom Format Detectors and URI Types
|
|
155
|
+
|
|
156
|
+
You can extend **xarray-prism** with custom *format detectors*, *URI types*, and *open handlers* by providing a small plugin package.
|
|
157
|
+
Registration happens **at import time**, so importing the plugin activates it.
|
|
158
|
+
|
|
159
|
+
### Plugin structure
|
|
160
|
+
|
|
161
|
+
```text
|
|
162
|
+
xarray_prism_myplugin/
|
|
163
|
+
__init__.py # imports the plugin module (triggers registration)
|
|
164
|
+
plugin.py # detectors, URI types, and open handlers
|
|
165
|
+
pyproject.toml
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
### Plugin implementation
|
|
169
|
+
|
|
170
|
+
`xarray_prism_myplugin/__init__.py`
|
|
171
|
+
|
|
172
|
+
```python
|
|
173
|
+
from .plugin import * # noqa: F401,F403
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
`xarray_prism_myplugin/plugin.py`
|
|
177
|
+
|
|
178
|
+
```python
|
|
179
|
+
import xarray as xr
|
|
180
|
+
from xarray_prism import register_detector, register_uri_type, registry
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
@register_uri_type(priority=100)
|
|
184
|
+
def detect_myfs_uri(uri: str):
|
|
185
|
+
"""Detect a custom filesystem URI."""
|
|
186
|
+
if uri.lower().startswith("myfs://"):
|
|
187
|
+
return "myfs"
|
|
188
|
+
return None
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
@register_detector(priority=100)
|
|
192
|
+
def detect_foo_format(uri: str):
|
|
193
|
+
"""Detect a custom file format."""
|
|
194
|
+
if uri.lower().endswith(".foo"):
|
|
195
|
+
return "foo"
|
|
196
|
+
return None
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
@registry.register("foo", uri_type="myfs")
|
|
200
|
+
def open_foo_from_myfs(uri: str, **kwargs):
|
|
201
|
+
"""Open .foo files from myfs:// URIs."""
|
|
202
|
+
translated = uri.replace("myfs://", "https://my-gateway.example/")
|
|
203
|
+
return xr.open_dataset(translated, engine="h5netcdf", **kwargs)
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
### Plugin installation
|
|
207
|
+
|
|
208
|
+
`pyproject.toml`
|
|
209
|
+
|
|
210
|
+
```toml
|
|
211
|
+
[project]
|
|
212
|
+
name = "xarray-prism-myplugin"
|
|
213
|
+
version = "0.1.0"
|
|
214
|
+
dependencies = ["xarray-prism"]
|
|
215
|
+
|
|
216
|
+
[project.entry-points."xarray_prism.plugins"]
|
|
217
|
+
myplugin = "xarray_prism_myplugin"
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
### Using the plugin
|
|
221
|
+
|
|
222
|
+
After installing the plugin package, **import it once** to activate the registrations:
|
|
223
|
+
|
|
224
|
+
```python
|
|
225
|
+
import xarray_prism_myplugin # activates detectors and handlers
|
|
226
|
+
|
|
227
|
+
import xarray as xr
|
|
228
|
+
ds = xr.open_dataset("myfs://bucket/path/data.foo", engine="prism")
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
## Development
|
|
233
|
+
|
|
234
|
+
### Setup Development Environment
|
|
235
|
+
|
|
236
|
+
```bash
|
|
237
|
+
# Start test services (MinIO, THREDDS)
|
|
238
|
+
docker-compose -f dev-env/docker-compose.yaml up -d --remove-orphans
|
|
239
|
+
|
|
240
|
+
# Create conda environment
|
|
241
|
+
conda create -n xarray-prism python=3.12 -y
|
|
242
|
+
conda activate xarray-prism
|
|
243
|
+
|
|
244
|
+
# Install package in editable mode with dev dependencies
|
|
245
|
+
pip install -e ".[dev]"
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
### Running Tests
|
|
249
|
+
|
|
250
|
+
```bash
|
|
251
|
+
# Run tests
|
|
252
|
+
tox -e test
|
|
253
|
+
|
|
254
|
+
# Run with coverage
|
|
255
|
+
tox -e test-cov
|
|
256
|
+
|
|
257
|
+
# Lint
|
|
258
|
+
tox -e lint
|
|
259
|
+
|
|
260
|
+
# Type checking
|
|
261
|
+
tox -e types
|
|
262
|
+
|
|
263
|
+
# Auto-format code
|
|
264
|
+
tox -e format
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
### Creating a Release
|
|
268
|
+
|
|
269
|
+
Releases are managed via GitHub Actions and tox:
|
|
270
|
+
|
|
271
|
+
```bash
|
|
272
|
+
# Tag a new release (creates git tag)
|
|
273
|
+
tox -e release
|
|
274
|
+
```
|
|
275
|
+
|
|
276
|
+
The release workflow is triggered automatically when:
|
|
277
|
+
- A version tag (`v*.*.*`) is pushed -> Full release to PyPI
|
|
278
|
+
- Manual workflow dispatch with RC number -> Pre-release to PyPI
|