xarray_sql 0.2.1__tar.gz → 0.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {xarray_sql-0.2.1 → xarray_sql-0.2.2}/.gitignore +2 -0
- {xarray_sql-0.2.1 → xarray_sql-0.2.2}/Cargo.lock +1 -1
- {xarray_sql-0.2.1 → xarray_sql-0.2.2}/Cargo.toml +1 -1
- {xarray_sql-0.2.1 → xarray_sql-0.2.2}/PKG-INFO +25 -16
- {xarray_sql-0.2.1 → xarray_sql-0.2.2}/README.md +15 -15
- xarray_sql-0.2.2/docs/assets/logo.svg +104 -0
- xarray_sql-0.2.2/docs/contributing.md +1 -0
- xarray_sql-0.2.2/docs/examples.md +23 -0
- xarray_sql-0.2.2/docs/index.md +1 -0
- xarray_sql-0.2.2/docs/reference/xarray_sql.md +8 -0
- {xarray_sql-0.2.1 → xarray_sql-0.2.2}/pyproject.toml +16 -1
- xarray_sql-0.2.2/tests/__init__.py +0 -0
- xarray_sql-0.2.2/tests/conftest.py +144 -0
- xarray_sql-0.2.2/tests/test_cft.py +176 -0
- xarray_sql-0.2.1/xarray_sql/df_test.py → xarray_sql-0.2.2/tests/test_df.py +78 -139
- xarray_sql-0.2.1/xarray_sql/reader_test.py → xarray_sql-0.2.2/tests/test_reader.py +3 -3
- xarray_sql-0.2.2/tests/test_sql.py +318 -0
- {xarray_sql-0.2.1 → xarray_sql-0.2.2}/xarray_sql/__init__.py +2 -0
- xarray_sql-0.2.2/xarray_sql/cftime.py +248 -0
- {xarray_sql-0.2.1 → xarray_sql-0.2.2}/xarray_sql/df.py +108 -41
- {xarray_sql-0.2.1 → xarray_sql-0.2.2}/xarray_sql/reader.py +9 -8
- xarray_sql-0.2.2/xarray_sql/sql.py +63 -0
- xarray_sql-0.2.2/zensical.toml +122 -0
- xarray_sql-0.2.1/xarray_sql/sql.py +0 -18
- xarray_sql-0.2.1/xarray_sql/sql_test.py +0 -194
- {xarray_sql-0.2.1 → xarray_sql-0.2.2}/LICENSE +0 -0
- {xarray_sql-0.2.1 → xarray_sql-0.2.2}/src/lib.rs +0 -0
- {xarray_sql-0.2.1 → xarray_sql-0.2.2}/xarray_sql/core.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: xarray_sql
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.2
|
|
4
4
|
Classifier: Development Status :: 4 - Beta
|
|
5
5
|
Classifier: Intended Audience :: Science/Research
|
|
6
6
|
Classifier: Intended Audience :: Developers
|
|
@@ -19,9 +19,18 @@ Classifier: Topic :: Database :: Front-Ends
|
|
|
19
19
|
Requires-Dist: dask>=2024.8.0
|
|
20
20
|
Requires-Dist: datafusion==52.0.0
|
|
21
21
|
Requires-Dist: xarray>=2024.7.0
|
|
22
|
+
Requires-Dist: xarray-sql[docs] ; extra == 'dev'
|
|
23
|
+
Requires-Dist: pre-commit ; extra == 'dev'
|
|
24
|
+
Requires-Dist: pytest ; extra == 'dev'
|
|
25
|
+
Requires-Dist: watchfiles ; extra == 'dev'
|
|
26
|
+
Requires-Dist: zensical ; extra == 'docs'
|
|
27
|
+
Requires-Dist: mkdocstrings[python] ; extra == 'docs'
|
|
28
|
+
Requires-Dist: cftime ; extra == 'test'
|
|
22
29
|
Requires-Dist: pytest ; extra == 'test'
|
|
23
30
|
Requires-Dist: xarray[io] ; extra == 'test'
|
|
24
31
|
Requires-Dist: gcsfs ; extra == 'test'
|
|
32
|
+
Provides-Extra: dev
|
|
33
|
+
Provides-Extra: docs
|
|
25
34
|
Provides-Extra: test
|
|
26
35
|
License-File: LICENSE
|
|
27
36
|
Summary: Querry Xarray with SQL.
|
|
@@ -128,11 +137,11 @@ That's it!
|
|
|
128
137
|
_2025 update_: This library now implements a Dask-like `from_map` interface in
|
|
129
138
|
pure DataFusion and PyArrow, but works with the same principle!
|
|
130
139
|
|
|
131
|
-
_2026 update_: Instead of `from_map()`, we
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
140
|
+
_2026 update_: Instead of `from_map()`, we create a way to translate Xarray chunks
|
|
141
|
+
into Arrow RecordBatches. We pass a Python callback into a DataFusion `TableProvider`
|
|
142
|
+
that lets the DB engine translate the underlying Dataset arrays into DataFusion partitions.
|
|
143
|
+
Ultimately, the initial insight of the `pivot()` function -- that any ndarray can be
|
|
144
|
+
translated into a 2D table -- underlies this performant query mechanism.
|
|
136
145
|
|
|
137
146
|
## Why does this work?
|
|
138
147
|
|
|
@@ -150,11 +159,6 @@ early users – "tire kickers", if you will. We'd love your input to shape the d
|
|
|
150
159
|
project! Please, give this a try and [file issues](https://github.com/alxmrs/xarray-sql/issues) as
|
|
151
160
|
you see fit. Check out our [contributing guide](CONTRIBUTING.md), too 😉.
|
|
152
161
|
|
|
153
|
-
I can say that for now, the library is oriented towards making whole scans of
|
|
154
|
-
Xarray Datasets. Common filter optimizations (even basic ones like an `.sel()` on
|
|
155
|
-
core dimensions, let alone predicate push downs) are not fully implemented yet.
|
|
156
|
-
However, these operations and more are on our roadmap.
|
|
157
|
-
|
|
158
162
|
## What would a deeper integration look like?
|
|
159
163
|
|
|
160
164
|
I have a few ideas so far. One approach involves applying operations directly on
|
|
@@ -169,18 +173,21 @@ and BigQuery. More thoughts on this
|
|
|
169
173
|
in [#4](https://github.com/alxmrs/xarray-sql/issues/4).
|
|
170
174
|
|
|
171
175
|
_2025 update_: Something like this is being built across a few projects! The ones I know about are:
|
|
176
|
+
|
|
172
177
|
- [CartoDB's Raquet](https://github.com/CartoDB/raquet)
|
|
173
178
|
- The DataFusion community's [arrow-zarr](https://github.com/datafusion-contrib/arrow-zarr)
|
|
174
179
|
|
|
175
|
-
|
|
176
|
-
|
|
180
|
+
_2026 update_: A colleague and I are experimenting with native Zarr RDBMS engines. Check out:
|
|
181
|
+
|
|
182
|
+
- [Zarr-Datafusion](https://lib.rs/crates/zarr-datafusion)
|
|
183
|
+
- [DuckDB-Zarr](https://github.com/hobbes-bot/duckdb-zarr)
|
|
177
184
|
|
|
178
185
|
## Roadmap
|
|
179
186
|
|
|
180
187
|
- [x] ~Lazy evaluation via the pyarrow Dataset interface [#93](https://github.com/alxmrs/xarray-sql/issues/93).~ _Implemented in [#100](https://github.com/alxmrs/xarray-sql/pull/100)_
|
|
181
|
-
- [
|
|
182
|
-
- [
|
|
183
|
-
- [ ] Translate a single Zarr to a collection of tables
|
|
188
|
+
- [x] Support proper parallelism via proper partition handling on the rust/datafusion side. [#106](https://github.com/alxmrs/xarray-sql/issues/106)
|
|
189
|
+
- [x] Support core datafusion optimizations to scan less data, like [104](https://github.com/alxmrs/xarray-sql/issues/104), ...
|
|
190
|
+
- [ ] Translate a single Zarr to a collection of tables [#85](https://github.com/alxmrs/xarray-sql/issues/85).
|
|
184
191
|
- [ ] Distributed beyond a single node through the DataFusion integration with Ray Datasets [#68](https://github.com/alxmrs/xarray-sql/issues/68) or Apache Ballista [#98](https://github.com/alxmrs/xarray-sql/issues/98).
|
|
185
192
|
- [ ] Demo: calculate Sea Surface Temperature from 1940 - Present in SQL [#36](https://github.com/alxmrs/xarray-sql/issues/36).
|
|
186
193
|
- [ ] Provide an option to integrate DataFusion directly to Zarr via Rust [#4](https://github.com/alxmrs/xarray-sql/issues/4).
|
|
@@ -200,6 +207,8 @@ I want to give a special thanks to the following folks and institutions:
|
|
|
200
207
|
and DataFusion-specific collaboration.
|
|
201
208
|
- The gracious volunteer data science students at [UCSD's DS3](https://www.ds3atucsd.com/) org,
|
|
202
209
|
who are working to make this library better.
|
|
210
|
+
- Andrew Huang for the sense of taste he brings to the project and consummate code
|
|
211
|
+
changes.
|
|
203
212
|
|
|
204
213
|
|
|
205
214
|
## License
|
|
@@ -94,11 +94,11 @@ That's it!
|
|
|
94
94
|
_2025 update_: This library now implements a Dask-like `from_map` interface in
|
|
95
95
|
pure DataFusion and PyArrow, but works with the same principle!
|
|
96
96
|
|
|
97
|
-
_2026 update_: Instead of `from_map()`, we
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
97
|
+
_2026 update_: Instead of `from_map()`, we create a way to translate Xarray chunks
|
|
98
|
+
into Arrow RecordBatches. We pass a Python callback into a DataFusion `TableProvider`
|
|
99
|
+
that lets the DB engine translate the underlying Dataset arrays into DataFusion partitions.
|
|
100
|
+
Ultimately, the initial insight of the `pivot()` function -- that any ndarray can be
|
|
101
|
+
translated into a 2D table -- underlies this performant query mechanism.
|
|
102
102
|
|
|
103
103
|
## Why does this work?
|
|
104
104
|
|
|
@@ -116,11 +116,6 @@ early users – "tire kickers", if you will. We'd love your input to shape the d
|
|
|
116
116
|
project! Please, give this a try and [file issues](https://github.com/alxmrs/xarray-sql/issues) as
|
|
117
117
|
you see fit. Check out our [contributing guide](CONTRIBUTING.md), too 😉.
|
|
118
118
|
|
|
119
|
-
I can say that for now, the library is oriented towards making whole scans of
|
|
120
|
-
Xarray Datasets. Common filter optimizations (even basic ones like an `.sel()` on
|
|
121
|
-
core dimensions, let alone predicate push downs) are not fully implemented yet.
|
|
122
|
-
However, these operations and more are on our roadmap.
|
|
123
|
-
|
|
124
119
|
## What would a deeper integration look like?
|
|
125
120
|
|
|
126
121
|
I have a few ideas so far. One approach involves applying operations directly on
|
|
@@ -135,18 +130,21 @@ and BigQuery. More thoughts on this
|
|
|
135
130
|
in [#4](https://github.com/alxmrs/xarray-sql/issues/4).
|
|
136
131
|
|
|
137
132
|
_2025 update_: Something like this is being built across a few projects! The ones I know about are:
|
|
133
|
+
|
|
138
134
|
- [CartoDB's Raquet](https://github.com/CartoDB/raquet)
|
|
139
135
|
- The DataFusion community's [arrow-zarr](https://github.com/datafusion-contrib/arrow-zarr)
|
|
140
136
|
|
|
141
|
-
|
|
142
|
-
|
|
137
|
+
_2026 update_: A colleague and I are experimenting with native Zarr RDBMS engines. Check out:
|
|
138
|
+
|
|
139
|
+
- [Zarr-Datafusion](https://lib.rs/crates/zarr-datafusion)
|
|
140
|
+
- [DuckDB-Zarr](https://github.com/hobbes-bot/duckdb-zarr)
|
|
143
141
|
|
|
144
142
|
## Roadmap
|
|
145
143
|
|
|
146
144
|
- [x] ~Lazy evaluation via the pyarrow Dataset interface [#93](https://github.com/alxmrs/xarray-sql/issues/93).~ _Implemented in [#100](https://github.com/alxmrs/xarray-sql/pull/100)_
|
|
147
|
-
- [
|
|
148
|
-
- [
|
|
149
|
-
- [ ] Translate a single Zarr to a collection of tables
|
|
145
|
+
- [x] Support proper parallelism via proper partition handling on the rust/datafusion side. [#106](https://github.com/alxmrs/xarray-sql/issues/106)
|
|
146
|
+
- [x] Support core datafusion optimizations to scan less data, like [104](https://github.com/alxmrs/xarray-sql/issues/104), ...
|
|
147
|
+
- [ ] Translate a single Zarr to a collection of tables [#85](https://github.com/alxmrs/xarray-sql/issues/85).
|
|
150
148
|
- [ ] Distributed beyond a single node through the DataFusion integration with Ray Datasets [#68](https://github.com/alxmrs/xarray-sql/issues/68) or Apache Ballista [#98](https://github.com/alxmrs/xarray-sql/issues/98).
|
|
151
149
|
- [ ] Demo: calculate Sea Surface Temperature from 1940 - Present in SQL [#36](https://github.com/alxmrs/xarray-sql/issues/36).
|
|
152
150
|
- [ ] Provide an option to integrate DataFusion directly to Zarr via Rust [#4](https://github.com/alxmrs/xarray-sql/issues/4).
|
|
@@ -166,6 +164,8 @@ I want to give a special thanks to the following folks and institutions:
|
|
|
166
164
|
and DataFusion-specific collaboration.
|
|
167
165
|
- The gracious volunteer data science students at [UCSD's DS3](https://www.ds3atucsd.com/) org,
|
|
168
166
|
who are working to make this library better.
|
|
167
|
+
- Andrew Huang for the sense of taste he brings to the project and consummate code
|
|
168
|
+
changes.
|
|
169
169
|
|
|
170
170
|
|
|
171
171
|
## License
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
|
2
|
+
<svg
|
|
3
|
+
version="1.1"
|
|
4
|
+
x="0px"
|
|
5
|
+
y="0px"
|
|
6
|
+
viewBox="50 115 420 395"
|
|
7
|
+
xml:space="preserve"
|
|
8
|
+
id="svg6"
|
|
9
|
+
sodipodi:docname="logo
|
|
10
|
+
inkscape:version="1.3.2 (091e20e, 2023-11-25)"
|
|
11
|
+
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
|
12
|
+
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
|
13
|
+
xmlns="http://www.w3.org/2000/svg"
|
|
14
|
+
xmlns:svg="http://www.w3.org/2000/svg"><defs
|
|
15
|
+
id="defs6" /><sodipodi:namedview
|
|
16
|
+
id="namedview6"
|
|
17
|
+
pagecolor="#ffffff"
|
|
18
|
+
bordercolor="#000000"
|
|
19
|
+
borderopacity="0.25"
|
|
20
|
+
inkscape:showpageshadow="2"
|
|
21
|
+
inkscape:pageopacity="0.0"
|
|
22
|
+
inkscape:pagecheckerboard="0"
|
|
23
|
+
inkscape:deskcolor="#d1d1d1"
|
|
24
|
+
showguides="false"
|
|
25
|
+
inkscape:zoom="0.59746835"
|
|
26
|
+
inkscape:cx="76.154661"
|
|
27
|
+
inkscape:cy="63.601695"
|
|
28
|
+
inkscape:window-width="1320"
|
|
29
|
+
inkscape:window-height="905"
|
|
30
|
+
inkscape:window-x="181"
|
|
31
|
+
inkscape:window-y="34"
|
|
32
|
+
inkscape:window-maximized="0"
|
|
33
|
+
inkscape:current-layer="g6" />
|
|
34
|
+
<style
|
|
35
|
+
type="text/css"
|
|
36
|
+
id="style1">
|
|
37
|
+
.st0{fill:#216C89;}
|
|
38
|
+
.st1{fill:#4993AA;}
|
|
39
|
+
.st2{fill:#0F4565;}
|
|
40
|
+
.st3{fill:#6BE8E8;}
|
|
41
|
+
.st4{fill:#9DEEF4;}
|
|
42
|
+
.st5{fill:#4ACFDD;}
|
|
43
|
+
.st6{fill:#E38017;}
|
|
44
|
+
.st7{fill:#16AFB5;}
|
|
45
|
+
</style>
|
|
46
|
+
<g
|
|
47
|
+
id="g6">
|
|
48
|
+
<!-- BOTTOM DATABASE (dark) -->
|
|
49
|
+
<path
|
|
50
|
+
class="st2"
|
|
51
|
+
d="m 65,362.92813 c 0,0 0,96.42499 0,96.42499 0,17.9075 70,27.55 112,27.55 42,0 112,-9.6425 112,-27.55 v -96.42499 c 0,17.90749 -70,27.54999 -112,27.54999 -42,0 -112,-9.6425 -112,-27.54999 z"
|
|
52
|
+
id="path1"
|
|
53
|
+
style="stroke-width:0.82991" />
|
|
54
|
+
<path
|
|
55
|
+
class="st0"
|
|
56
|
+
d="m 177,390.47812 c 42,0 112,-9.6425 112,-27.54999 v 96.42499 c 0,17.9075 -70,27.55 -112,27.55 z"
|
|
57
|
+
opacity="0.4"
|
|
58
|
+
id="path2"
|
|
59
|
+
style="stroke-width:0.82991" />
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
<!-- TOP DATABASE (teal) -->
|
|
64
|
+
<path
|
|
65
|
+
class="st5"
|
|
66
|
+
d="m 65,276.83438 c 0,0 0,75.76249 0,75.76249 0,17.90751 70,27.55001 112,27.55001 42,0 112,-9.6425 112,-27.55001 v -75.76249 c 0,17.90749 -70,27.54999 -112,27.54999 -42,0 -112,-9.6425 -112,-27.54999 z"
|
|
67
|
+
id="path3"
|
|
68
|
+
style="stroke-width:0.82991" />
|
|
69
|
+
<path
|
|
70
|
+
class="st3"
|
|
71
|
+
d="m 177,304.38437 c 42,0 112,-9.6425 112,-27.54999 v 75.76249 c 0,17.90751 -70,27.55001 -112,27.55001 z"
|
|
72
|
+
opacity="0.4"
|
|
73
|
+
id="path4"
|
|
74
|
+
style="stroke-width:0.82991" />
|
|
75
|
+
<path
|
|
76
|
+
class="st5"
|
|
77
|
+
d="m 66.576,190.40937 c 0,0 0,75.7625 0,75.7625 0,17.90751 70,27.55 112,27.55 42,0 112,-9.64249 112,-27.55 v -75.7625 c 0,17.9075 -70,27.55 -112,27.55 -42,0 -112,-9.6425 -112,-27.55 z"
|
|
78
|
+
id="path3-5"
|
|
79
|
+
style="fill:#4acfdd;stroke-width:0.82991" /><path
|
|
80
|
+
class="st3"
|
|
81
|
+
d="m 178.576,217.95937 c 42,0 112,-9.6425 112,-27.55 v 75.7625 c 0,17.90751 -70,27.55 -112,27.55 z"
|
|
82
|
+
opacity="0.4"
|
|
83
|
+
id="path4-7"
|
|
84
|
+
style="fill:#6be8e8;stroke-width:0.82991" /><ellipse
|
|
85
|
+
class="st4"
|
|
86
|
+
cx="178.576"
|
|
87
|
+
cy="183.52188"
|
|
88
|
+
rx="112"
|
|
89
|
+
ry="27.549999"
|
|
90
|
+
id="ellipse4-5"
|
|
91
|
+
style="fill:#9deef4;stroke-width:0.82991" />
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
<!-- DIAGONAL BARS -->
|
|
95
|
+
<polygon
|
|
96
|
+
class="st6"
|
|
97
|
+
points="377.48,412.74 308.66,482.2 308.66,346.56 377.48,277.09"
|
|
98
|
+
id="polygon5" />
|
|
99
|
+
<polygon
|
|
100
|
+
class="st7"
|
|
101
|
+
points="457.07,412.74 388.25,482.2 388.25,346.56 457.07,277.09"
|
|
102
|
+
id="polygon6" />
|
|
103
|
+
</g>
|
|
104
|
+
</svg>
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
--8<-- "CONTRIBUTING.md"
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# Examples
|
|
2
|
+
|
|
3
|
+
```python
|
|
4
|
+
import xarray as xr
|
|
5
|
+
import xarray_sql as xql
|
|
6
|
+
|
|
7
|
+
ds = xr.tutorial.open_dataset('air_temperature')
|
|
8
|
+
|
|
9
|
+
ctx = xql.XarrayContext()
|
|
10
|
+
ctx.from_dataset('air', ds, chunks=dict(time=24))
|
|
11
|
+
|
|
12
|
+
result = ctx.sql('''
|
|
13
|
+
SELECT
|
|
14
|
+
"lat", "lon", AVG("air") as air_avg
|
|
15
|
+
FROM
|
|
16
|
+
"air"
|
|
17
|
+
GROUP BY
|
|
18
|
+
"lat", "lon"
|
|
19
|
+
''')
|
|
20
|
+
|
|
21
|
+
df = result.to_pandas()
|
|
22
|
+
df.head()
|
|
23
|
+
```
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
--8<-- "README.md"
|
|
@@ -37,10 +37,21 @@ dependencies = [
|
|
|
37
37
|
|
|
38
38
|
[project.optional-dependencies]
|
|
39
39
|
test = [
|
|
40
|
+
"cftime",
|
|
40
41
|
"pytest",
|
|
41
42
|
"xarray[io]",
|
|
42
43
|
"gcsfs",
|
|
43
44
|
]
|
|
45
|
+
docs = [
|
|
46
|
+
"zensical",
|
|
47
|
+
"mkdocstrings[python]",
|
|
48
|
+
]
|
|
49
|
+
dev = [
|
|
50
|
+
"xarray_sql[docs]",
|
|
51
|
+
"pre-commit",
|
|
52
|
+
"pytest",
|
|
53
|
+
"watchfiles",
|
|
54
|
+
]
|
|
44
55
|
|
|
45
56
|
[project.urls]
|
|
46
57
|
Homepage = "https://github.com/alxmrs/xarray-sql"
|
|
@@ -51,7 +62,7 @@ features = ["pyo3/extension-module"]
|
|
|
51
62
|
module-name = "xarray_sql._native"
|
|
52
63
|
|
|
53
64
|
[tool.setuptools.packages.find]
|
|
54
|
-
exclude = ["demo", "perf_tests"]
|
|
65
|
+
exclude = ["demo", "perf_tests", "tests", "tests.*"]
|
|
55
66
|
|
|
56
67
|
[tool.pyink]
|
|
57
68
|
line-length = 80
|
|
@@ -85,6 +96,7 @@ ignore_missing_imports = true
|
|
|
85
96
|
[dependency-groups]
|
|
86
97
|
dev = [
|
|
87
98
|
"xarray_sql[test]",
|
|
99
|
+
"xarray_sql[docs]",
|
|
88
100
|
"py-spy>=0.4.0",
|
|
89
101
|
"pyink>=24.10.1",
|
|
90
102
|
"maturin>=1.9.1",
|
|
@@ -93,3 +105,6 @@ dev = [
|
|
|
93
105
|
[tool.uv]
|
|
94
106
|
# Rebuild package when any rust files change
|
|
95
107
|
cache-keys = [{file = "pyproject.toml"}, {file = "rust/Cargo.toml"}, {file = "**/*.rs"}]
|
|
108
|
+
|
|
109
|
+
[tool.pytest.ini_options]
|
|
110
|
+
testpaths = ["tests"]
|
|
File without changes
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pandas as pd
|
|
5
|
+
import xarray as xr
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def rand_wx(start: str, end: str) -> xr.Dataset:
|
|
9
|
+
np.random.seed(42)
|
|
10
|
+
lat = np.linspace(-90, 90, num=720)
|
|
11
|
+
lon = np.linspace(-180, 180, num=1440)
|
|
12
|
+
time = pd.date_range(start, end, freq="h")
|
|
13
|
+
level = np.array([1000, 500], dtype=np.int32)
|
|
14
|
+
reference_time = pd.Timestamp(start)
|
|
15
|
+
temperature = 15 + 8 * np.random.randn(720, 1440, len(time), len(level))
|
|
16
|
+
precipitation = 10 * np.random.rand(720, 1440, len(time), len(level))
|
|
17
|
+
return xr.Dataset(
|
|
18
|
+
data_vars=dict(
|
|
19
|
+
temperature=(["lat", "lon", "time", "level"], temperature),
|
|
20
|
+
precipitation=(["lat", "lon", "time", "level"], precipitation),
|
|
21
|
+
),
|
|
22
|
+
coords=dict(
|
|
23
|
+
lat=lat,
|
|
24
|
+
lon=lon,
|
|
25
|
+
time=time,
|
|
26
|
+
level=level,
|
|
27
|
+
reference_time=reference_time,
|
|
28
|
+
),
|
|
29
|
+
attrs=dict(description="Random weather."),
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def create_large_dataset(time_steps=1000, lat_points=100, lon_points=100):
|
|
34
|
+
"""Create a large xarray dataset for memory testing."""
|
|
35
|
+
np.random.seed(42)
|
|
36
|
+
|
|
37
|
+
time = pd.date_range("2020-01-01", periods=time_steps, freq="h")
|
|
38
|
+
lat = np.linspace(-90, 90, lat_points)
|
|
39
|
+
lon = np.linspace(-180, 180, lon_points)
|
|
40
|
+
|
|
41
|
+
temp_data = np.random.rand(time_steps, lat_points, lon_points) * 40 - 10
|
|
42
|
+
precip_data = np.random.rand(time_steps, lat_points, lon_points) * 100
|
|
43
|
+
|
|
44
|
+
return xr.Dataset(
|
|
45
|
+
{
|
|
46
|
+
"temperature": (["time", "lat", "lon"], temp_data),
|
|
47
|
+
"precipitation": (["time", "lat", "lon"], precip_data),
|
|
48
|
+
},
|
|
49
|
+
coords={"time": time, "lat": lat, "lon": lon},
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@pytest.fixture
|
|
54
|
+
def air():
|
|
55
|
+
ds = xr.tutorial.open_dataset("air_temperature")
|
|
56
|
+
chunks = {"time": 240}
|
|
57
|
+
return ds.chunk(chunks)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@pytest.fixture
|
|
61
|
+
def air_small(air):
|
|
62
|
+
return air.isel(time=slice(0, 12), lat=slice(0, 11), lon=slice(0, 10)).chunk(
|
|
63
|
+
{"time": 240}
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@pytest.fixture
|
|
68
|
+
def randwx():
|
|
69
|
+
return rand_wx("1995-01-13T00", "1995-01-13T01")
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@pytest.fixture
|
|
73
|
+
def large_ds():
|
|
74
|
+
return create_large_dataset().chunk({"time": 25})
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@pytest.fixture
|
|
78
|
+
def air_dataset_small():
|
|
79
|
+
ds = xr.tutorial.open_dataset("air_temperature").chunk({"time": 240})
|
|
80
|
+
return ds.isel(time=slice(0, 12), lat=slice(0, 11), lon=slice(0, 10))
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@pytest.fixture
|
|
84
|
+
def air_dataset_large():
|
|
85
|
+
return xr.tutorial.open_dataset("air_temperature").chunk({"time": 240})
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@pytest.fixture
|
|
89
|
+
def rasm_ds():
|
|
90
|
+
"""rasm uses cftime.DatetimeNoLeap (noleap / 365_day) for time."""
|
|
91
|
+
return xr.tutorial.open_dataset("rasm")
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@pytest.fixture
|
|
95
|
+
def weather_dataset():
|
|
96
|
+
ds = rand_wx("2023-01-01T00", "2023-01-01T12")
|
|
97
|
+
return ds.isel(time=slice(0, 6), lat=slice(0, 10), lon=slice(0, 10)).chunk(
|
|
98
|
+
{"time": 3}
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
@pytest.fixture
|
|
103
|
+
def synthetic_dataset():
|
|
104
|
+
return create_large_dataset(
|
|
105
|
+
time_steps=50, lat_points=20, lon_points=20
|
|
106
|
+
).chunk({"time": 25})
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
@pytest.fixture
|
|
110
|
+
def station_dataset():
|
|
111
|
+
return xr.Dataset(
|
|
112
|
+
{
|
|
113
|
+
"station_id": (["station"], [1, 2, 3, 4, 5]),
|
|
114
|
+
"elevation": (["station"], [100, 250, 500, 750, 1000]),
|
|
115
|
+
"name": (
|
|
116
|
+
["station"],
|
|
117
|
+
["Station_A", "Station_B", "Station_C", "Station_D", "Station_E"],
|
|
118
|
+
),
|
|
119
|
+
}
|
|
120
|
+
).chunk({"station": 5})
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
@pytest.fixture
|
|
124
|
+
def air_and_stations():
|
|
125
|
+
air = (
|
|
126
|
+
xr.tutorial.open_dataset("air_temperature")
|
|
127
|
+
.isel(time=slice(0, 12), lat=slice(0, 5), lon=slice(0, 8))
|
|
128
|
+
.chunk({"time": 6})
|
|
129
|
+
)
|
|
130
|
+
stations = xr.Dataset(
|
|
131
|
+
{
|
|
132
|
+
"station_id": (["station"], [101, 102, 103]),
|
|
133
|
+
"lat": (
|
|
134
|
+
["station"],
|
|
135
|
+
[air.lat.values[0], air.lat.values[2], air.lat.values[4]],
|
|
136
|
+
),
|
|
137
|
+
"lon": (
|
|
138
|
+
["station"],
|
|
139
|
+
[air.lon.values[1], air.lon.values[3], air.lon.values[5]],
|
|
140
|
+
),
|
|
141
|
+
"elevation": (["station"], [100, 250, 500]),
|
|
142
|
+
}
|
|
143
|
+
).chunk({"station": 3})
|
|
144
|
+
return air, stations
|