xarray_sql 0.2.1__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {xarray_sql-0.2.1 → xarray_sql-0.2.2}/.gitignore +2 -0
  2. {xarray_sql-0.2.1 → xarray_sql-0.2.2}/Cargo.lock +1 -1
  3. {xarray_sql-0.2.1 → xarray_sql-0.2.2}/Cargo.toml +1 -1
  4. {xarray_sql-0.2.1 → xarray_sql-0.2.2}/PKG-INFO +25 -16
  5. {xarray_sql-0.2.1 → xarray_sql-0.2.2}/README.md +15 -15
  6. xarray_sql-0.2.2/docs/assets/logo.svg +104 -0
  7. xarray_sql-0.2.2/docs/contributing.md +1 -0
  8. xarray_sql-0.2.2/docs/examples.md +23 -0
  9. xarray_sql-0.2.2/docs/index.md +1 -0
  10. xarray_sql-0.2.2/docs/reference/xarray_sql.md +8 -0
  11. {xarray_sql-0.2.1 → xarray_sql-0.2.2}/pyproject.toml +16 -1
  12. xarray_sql-0.2.2/tests/__init__.py +0 -0
  13. xarray_sql-0.2.2/tests/conftest.py +144 -0
  14. xarray_sql-0.2.2/tests/test_cft.py +176 -0
  15. xarray_sql-0.2.1/xarray_sql/df_test.py → xarray_sql-0.2.2/tests/test_df.py +78 -139
  16. xarray_sql-0.2.1/xarray_sql/reader_test.py → xarray_sql-0.2.2/tests/test_reader.py +3 -3
  17. xarray_sql-0.2.2/tests/test_sql.py +318 -0
  18. {xarray_sql-0.2.1 → xarray_sql-0.2.2}/xarray_sql/__init__.py +2 -0
  19. xarray_sql-0.2.2/xarray_sql/cftime.py +248 -0
  20. {xarray_sql-0.2.1 → xarray_sql-0.2.2}/xarray_sql/df.py +108 -41
  21. {xarray_sql-0.2.1 → xarray_sql-0.2.2}/xarray_sql/reader.py +9 -8
  22. xarray_sql-0.2.2/xarray_sql/sql.py +63 -0
  23. xarray_sql-0.2.2/zensical.toml +122 -0
  24. xarray_sql-0.2.1/xarray_sql/sql.py +0 -18
  25. xarray_sql-0.2.1/xarray_sql/sql_test.py +0 -194
  26. {xarray_sql-0.2.1 → xarray_sql-0.2.2}/LICENSE +0 -0
  27. {xarray_sql-0.2.1 → xarray_sql-0.2.2}/src/lib.rs +0 -0
  28. {xarray_sql-0.2.1 → xarray_sql-0.2.2}/xarray_sql/core.py +0 -0
@@ -12,3 +12,5 @@ test_data
12
12
  .chainlink
13
13
  .claude
14
14
  CHANGELOG.md
15
+ *.ipynb
16
+ /site
@@ -3375,7 +3375,7 @@ checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb"
3375
3375
 
3376
3376
  [[package]]
3377
3377
  name = "xarray_sql"
3378
- version = "0.2.1"
3378
+ version = "0.2.2"
3379
3379
  dependencies = [
3380
3380
  "arrow",
3381
3381
  "async-stream",
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "xarray_sql"
3
- version = "0.2.1"
3
+ version = "0.2.2"
4
4
  authors = ["Alex Merose"]
5
5
  edition = "2021"
6
6
  exclude = [
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xarray_sql
3
- Version: 0.2.1
3
+ Version: 0.2.2
4
4
  Classifier: Development Status :: 4 - Beta
5
5
  Classifier: Intended Audience :: Science/Research
6
6
  Classifier: Intended Audience :: Developers
@@ -19,9 +19,18 @@ Classifier: Topic :: Database :: Front-Ends
19
19
  Requires-Dist: dask>=2024.8.0
20
20
  Requires-Dist: datafusion==52.0.0
21
21
  Requires-Dist: xarray>=2024.7.0
22
+ Requires-Dist: xarray-sql[docs] ; extra == 'dev'
23
+ Requires-Dist: pre-commit ; extra == 'dev'
24
+ Requires-Dist: pytest ; extra == 'dev'
25
+ Requires-Dist: watchfiles ; extra == 'dev'
26
+ Requires-Dist: zensical ; extra == 'docs'
27
+ Requires-Dist: mkdocstrings[python] ; extra == 'docs'
28
+ Requires-Dist: cftime ; extra == 'test'
22
29
  Requires-Dist: pytest ; extra == 'test'
23
30
  Requires-Dist: xarray[io] ; extra == 'test'
24
31
  Requires-Dist: gcsfs ; extra == 'test'
32
+ Provides-Extra: dev
33
+ Provides-Extra: docs
25
34
  Provides-Extra: test
26
35
  License-File: LICENSE
27
36
  Summary: Querry Xarray with SQL.
@@ -128,11 +137,11 @@ That's it!
128
137
  _2025 update_: This library now implements a Dask-like `from_map` interface in
129
138
  pure DataFusion and PyArrow, but works with the same principle!
130
139
 
131
- _2026 update_: Instead of `from_map()`, we make factory functions from blocks of
132
- Xarray datasets that return RecordBatchReaders. These feed into a Rust-based
133
- DataFusion `TableProvider`. Every chunk is uses the Arrow in memory format to
134
- translate between Python and Rust. Even still, the core of what makes this idea
135
- work is the core `pivot()` operation from where this project began!
140
+ _2026 update_: Instead of `from_map()`, we create a way to translate Xarray chunks
141
+ into Arrow RecordBatches. We pass a Python callback into a DataFusion `TableProvider`
142
+ that lets the DB engine translate the underlying Dataset arrays into DataFusion partitions.
143
+ Ultimately, the initial insight of the `pivot()` function -- that any ndarray can be
144
+ translated into a 2D table -- underlies this performant query mechanism.
136
145
 
137
146
  ## Why does this work?
138
147
 
@@ -150,11 +159,6 @@ early users – "tire kickers", if you will. We'd love your input to shape the d
150
159
  project! Please, give this a try and [file issues](https://github.com/alxmrs/xarray-sql/issues) as
151
160
  you see fit. Check out our [contributing guide](CONTRIBUTING.md), too 😉.
152
161
 
153
- I can say that for now, the library is oriented towards making whole scans of
154
- Xarray Datasets. Common filter optimizations (even basic ones like an `.sel()` on
155
- core dimensions, let alone predicate push downs) are not fully implemented yet.
156
- However, these operations and more are on our roadmap.
157
-
158
162
  ## What would a deeper integration look like?
159
163
 
160
164
  I have a few ideas so far. One approach involves applying operations directly on
@@ -169,18 +173,21 @@ and BigQuery. More thoughts on this
169
173
  in [#4](https://github.com/alxmrs/xarray-sql/issues/4).
170
174
 
171
175
  _2025 update_: Something like this is being built across a few projects! The ones I know about are:
176
+
172
177
  - [CartoDB's Raquet](https://github.com/CartoDB/raquet)
173
178
  - The DataFusion community's [arrow-zarr](https://github.com/datafusion-contrib/arrow-zarr)
174
179
 
175
- As of writing, this project is [amid integrating](https://github.com/alxmrs/xarray-sql/pull/69) a
176
- rust-based DataFusion backend provided by arrow-zarr.
180
+ _2026 update_: A colleague and I are experimenting with native Zarr RDBMS engines. Check out:
181
+
182
+ - [Zarr-Datafusion](https://lib.rs/crates/zarr-datafusion)
183
+ - [DuckDB-Zarr](https://github.com/hobbes-bot/duckdb-zarr)
177
184
 
178
185
  ## Roadmap
179
186
 
180
187
  - [x] ~Lazy evaluation via the pyarrow Dataset interface [#93](https://github.com/alxmrs/xarray-sql/issues/93).~ _Implemented in [#100](https://github.com/alxmrs/xarray-sql/pull/100)_
181
- - [ ] Support proper parallelism via proper partition handling on the rust/datafusion side. [#106](https://github.com/alxmrs/xarray-sql/issues/106)
182
- - [ ] Support core datafusion optimizations to scan less data, like [104](https://github.com/alxmrs/xarray-sql/issues/104), ...
183
- - [ ] Translate a single Zarr to a collection of tables via DataFusion's catalog interface [#85](https://github.com/alxmrs/xarray-sql/issues/85).
188
+ - [x] Support proper parallelism via proper partition handling on the rust/datafusion side. [#106](https://github.com/alxmrs/xarray-sql/issues/106)
189
+ - [x] Support core datafusion optimizations to scan less data, like [104](https://github.com/alxmrs/xarray-sql/issues/104), ...
190
+ - [ ] Translate a single Zarr to a collection of tables [#85](https://github.com/alxmrs/xarray-sql/issues/85).
184
191
  - [ ] Distributed beyond a single node through the DataFusion integration with Ray Datasets [#68](https://github.com/alxmrs/xarray-sql/issues/68) or Apache Ballista [#98](https://github.com/alxmrs/xarray-sql/issues/98).
185
192
  - [ ] Demo: calculate Sea Surface Temperature from 1940 - Present in SQL [#36](https://github.com/alxmrs/xarray-sql/issues/36).
186
193
  - [ ] Provide an option to integrate DataFusion directly to Zarr via Rust [#4](https://github.com/alxmrs/xarray-sql/issues/4).
@@ -200,6 +207,8 @@ I want to give a special thanks to the following folks and institutions:
200
207
  and DataFusion-specific collaboration.
201
208
  - The gracious volunteer data science students at [UCSD's DS3](https://www.ds3atucsd.com/) org,
202
209
  who are working to make this library better.
210
+ - Andrew Huang for the sense of taste he brings to the project and consummate code
211
+ changes.
203
212
 
204
213
 
205
214
  ## License
@@ -94,11 +94,11 @@ That's it!
94
94
  _2025 update_: This library now implements a Dask-like `from_map` interface in
95
95
  pure DataFusion and PyArrow, but works with the same principle!
96
96
 
97
- _2026 update_: Instead of `from_map()`, we make factory functions from blocks of
98
- Xarray datasets that return RecordBatchReaders. These feed into a Rust-based
99
- DataFusion `TableProvider`. Every chunk is uses the Arrow in memory format to
100
- translate between Python and Rust. Even still, the core of what makes this idea
101
- work is the core `pivot()` operation from where this project began!
97
+ _2026 update_: Instead of `from_map()`, we create a way to translate Xarray chunks
98
+ into Arrow RecordBatches. We pass a Python callback into a DataFusion `TableProvider`
99
+ that lets the DB engine translate the underlying Dataset arrays into DataFusion partitions.
100
+ Ultimately, the initial insight of the `pivot()` function -- that any ndarray can be
101
+ translated into a 2D table -- underlies this performant query mechanism.
102
102
 
103
103
  ## Why does this work?
104
104
 
@@ -116,11 +116,6 @@ early users – "tire kickers", if you will. We'd love your input to shape the d
116
116
  project! Please, give this a try and [file issues](https://github.com/alxmrs/xarray-sql/issues) as
117
117
  you see fit. Check out our [contributing guide](CONTRIBUTING.md), too 😉.
118
118
 
119
- I can say that for now, the library is oriented towards making whole scans of
120
- Xarray Datasets. Common filter optimizations (even basic ones like an `.sel()` on
121
- core dimensions, let alone predicate push downs) are not fully implemented yet.
122
- However, these operations and more are on our roadmap.
123
-
124
119
  ## What would a deeper integration look like?
125
120
 
126
121
  I have a few ideas so far. One approach involves applying operations directly on
@@ -135,18 +130,21 @@ and BigQuery. More thoughts on this
135
130
  in [#4](https://github.com/alxmrs/xarray-sql/issues/4).
136
131
 
137
132
  _2025 update_: Something like this is being built across a few projects! The ones I know about are:
133
+
138
134
  - [CartoDB's Raquet](https://github.com/CartoDB/raquet)
139
135
  - The DataFusion community's [arrow-zarr](https://github.com/datafusion-contrib/arrow-zarr)
140
136
 
141
- As of writing, this project is [amid integrating](https://github.com/alxmrs/xarray-sql/pull/69) a
142
- rust-based DataFusion backend provided by arrow-zarr.
137
+ _2026 update_: A colleague and I are experimenting with native Zarr RDBMS engines. Check out:
138
+
139
+ - [Zarr-Datafusion](https://lib.rs/crates/zarr-datafusion)
140
+ - [DuckDB-Zarr](https://github.com/hobbes-bot/duckdb-zarr)
143
141
 
144
142
  ## Roadmap
145
143
 
146
144
  - [x] ~Lazy evaluation via the pyarrow Dataset interface [#93](https://github.com/alxmrs/xarray-sql/issues/93).~ _Implemented in [#100](https://github.com/alxmrs/xarray-sql/pull/100)_
147
- - [ ] Support proper parallelism via proper partition handling on the rust/datafusion side. [#106](https://github.com/alxmrs/xarray-sql/issues/106)
148
- - [ ] Support core datafusion optimizations to scan less data, like [104](https://github.com/alxmrs/xarray-sql/issues/104), ...
149
- - [ ] Translate a single Zarr to a collection of tables via DataFusion's catalog interface [#85](https://github.com/alxmrs/xarray-sql/issues/85).
145
+ - [x] Support proper parallelism via proper partition handling on the rust/datafusion side. [#106](https://github.com/alxmrs/xarray-sql/issues/106)
146
+ - [x] Support core datafusion optimizations to scan less data, like [104](https://github.com/alxmrs/xarray-sql/issues/104), ...
147
+ - [ ] Translate a single Zarr to a collection of tables [#85](https://github.com/alxmrs/xarray-sql/issues/85).
150
148
  - [ ] Distributed beyond a single node through the DataFusion integration with Ray Datasets [#68](https://github.com/alxmrs/xarray-sql/issues/68) or Apache Ballista [#98](https://github.com/alxmrs/xarray-sql/issues/98).
151
149
  - [ ] Demo: calculate Sea Surface Temperature from 1940 - Present in SQL [#36](https://github.com/alxmrs/xarray-sql/issues/36).
152
150
  - [ ] Provide an option to integrate DataFusion directly to Zarr via Rust [#4](https://github.com/alxmrs/xarray-sql/issues/4).
@@ -166,6 +164,8 @@ I want to give a special thanks to the following folks and institutions:
166
164
  and DataFusion-specific collaboration.
167
165
  - The gracious volunteer data science students at [UCSD's DS3](https://www.ds3atucsd.com/) org,
168
166
  who are working to make this library better.
167
+ - Andrew Huang for the sense of taste he brings to the project and consummate code
168
+ changes.
169
169
 
170
170
 
171
171
  ## License
@@ -0,0 +1,104 @@
1
+ <?xml version="1.0" encoding="UTF-8" standalone="no"?>
2
+ <svg
3
+ version="1.1"
4
+ x="0px"
5
+ y="0px"
6
+ viewBox="50 115 420 395"
7
+ xml:space="preserve"
8
+ id="svg6"
9
+ sodipodi:docname="logo
10
+ inkscape:version="1.3.2 (091e20e, 2023-11-25)"
11
+ xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
12
+ xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
13
+ xmlns="http://www.w3.org/2000/svg"
14
+ xmlns:svg="http://www.w3.org/2000/svg"><defs
15
+ id="defs6" /><sodipodi:namedview
16
+ id="namedview6"
17
+ pagecolor="#ffffff"
18
+ bordercolor="#000000"
19
+ borderopacity="0.25"
20
+ inkscape:showpageshadow="2"
21
+ inkscape:pageopacity="0.0"
22
+ inkscape:pagecheckerboard="0"
23
+ inkscape:deskcolor="#d1d1d1"
24
+ showguides="false"
25
+ inkscape:zoom="0.59746835"
26
+ inkscape:cx="76.154661"
27
+ inkscape:cy="63.601695"
28
+ inkscape:window-width="1320"
29
+ inkscape:window-height="905"
30
+ inkscape:window-x="181"
31
+ inkscape:window-y="34"
32
+ inkscape:window-maximized="0"
33
+ inkscape:current-layer="g6" />
34
+ <style
35
+ type="text/css"
36
+ id="style1">
37
+ .st0{fill:#216C89;}
38
+ .st1{fill:#4993AA;}
39
+ .st2{fill:#0F4565;}
40
+ .st3{fill:#6BE8E8;}
41
+ .st4{fill:#9DEEF4;}
42
+ .st5{fill:#4ACFDD;}
43
+ .st6{fill:#E38017;}
44
+ .st7{fill:#16AFB5;}
45
+ </style>
46
+ <g
47
+ id="g6">
48
+ <!-- BOTTOM DATABASE (dark) -->
49
+ <path
50
+ class="st2"
51
+ d="m 65,362.92813 c 0,0 0,96.42499 0,96.42499 0,17.9075 70,27.55 112,27.55 42,0 112,-9.6425 112,-27.55 v -96.42499 c 0,17.90749 -70,27.54999 -112,27.54999 -42,0 -112,-9.6425 -112,-27.54999 z"
52
+ id="path1"
53
+ style="stroke-width:0.82991" />
54
+ <path
55
+ class="st0"
56
+ d="m 177,390.47812 c 42,0 112,-9.6425 112,-27.54999 v 96.42499 c 0,17.9075 -70,27.55 -112,27.55 z"
57
+ opacity="0.4"
58
+ id="path2"
59
+ style="stroke-width:0.82991" />
60
+
61
+
62
+
63
+ <!-- TOP DATABASE (teal) -->
64
+ <path
65
+ class="st5"
66
+ d="m 65,276.83438 c 0,0 0,75.76249 0,75.76249 0,17.90751 70,27.55001 112,27.55001 42,0 112,-9.6425 112,-27.55001 v -75.76249 c 0,17.90749 -70,27.54999 -112,27.54999 -42,0 -112,-9.6425 -112,-27.54999 z"
67
+ id="path3"
68
+ style="stroke-width:0.82991" />
69
+ <path
70
+ class="st3"
71
+ d="m 177,304.38437 c 42,0 112,-9.6425 112,-27.54999 v 75.76249 c 0,17.90751 -70,27.55001 -112,27.55001 z"
72
+ opacity="0.4"
73
+ id="path4"
74
+ style="stroke-width:0.82991" />
75
+ <path
76
+ class="st5"
77
+ d="m 66.576,190.40937 c 0,0 0,75.7625 0,75.7625 0,17.90751 70,27.55 112,27.55 42,0 112,-9.64249 112,-27.55 v -75.7625 c 0,17.9075 -70,27.55 -112,27.55 -42,0 -112,-9.6425 -112,-27.55 z"
78
+ id="path3-5"
79
+ style="fill:#4acfdd;stroke-width:0.82991" /><path
80
+ class="st3"
81
+ d="m 178.576,217.95937 c 42,0 112,-9.6425 112,-27.55 v 75.7625 c 0,17.90751 -70,27.55 -112,27.55 z"
82
+ opacity="0.4"
83
+ id="path4-7"
84
+ style="fill:#6be8e8;stroke-width:0.82991" /><ellipse
85
+ class="st4"
86
+ cx="178.576"
87
+ cy="183.52188"
88
+ rx="112"
89
+ ry="27.549999"
90
+ id="ellipse4-5"
91
+ style="fill:#9deef4;stroke-width:0.82991" />
92
+
93
+
94
+ <!-- DIAGONAL BARS -->
95
+ <polygon
96
+ class="st6"
97
+ points="377.48,412.74 308.66,482.2 308.66,346.56 377.48,277.09"
98
+ id="polygon5" />
99
+ <polygon
100
+ class="st7"
101
+ points="457.07,412.74 388.25,482.2 388.25,346.56 457.07,277.09"
102
+ id="polygon6" />
103
+ </g>
104
+ </svg>
@@ -0,0 +1 @@
1
+ --8<-- "CONTRIBUTING.md"
@@ -0,0 +1,23 @@
1
+ # Examples
2
+
3
+ ```python
4
+ import xarray as xr
5
+ import xarray_sql as xql
6
+
7
+ ds = xr.tutorial.open_dataset('air_temperature')
8
+
9
+ ctx = xql.XarrayContext()
10
+ ctx.from_dataset('air', ds, chunks=dict(time=24))
11
+
12
+ result = ctx.sql('''
13
+ SELECT
14
+ "lat", "lon", AVG("air") as air_avg
15
+ FROM
16
+ "air"
17
+ GROUP BY
18
+ "lat", "lon"
19
+ ''')
20
+
21
+ df = result.to_pandas()
22
+ df.head()
23
+ ```
@@ -0,0 +1 @@
1
+ --8<-- "README.md"
@@ -0,0 +1,8 @@
1
+ # xarray-sql
2
+
3
+ ::: xarray_sql
4
+ options:
5
+ show_root_heading: true
6
+ show_source: false
7
+ members: true
8
+ show_submodules: true
@@ -37,10 +37,21 @@ dependencies = [
37
37
 
38
38
  [project.optional-dependencies]
39
39
  test = [
40
+ "cftime",
40
41
  "pytest",
41
42
  "xarray[io]",
42
43
  "gcsfs",
43
44
  ]
45
+ docs = [
46
+ "zensical",
47
+ "mkdocstrings[python]",
48
+ ]
49
+ dev = [
50
+ "xarray_sql[docs]",
51
+ "pre-commit",
52
+ "pytest",
53
+ "watchfiles",
54
+ ]
44
55
 
45
56
  [project.urls]
46
57
  Homepage = "https://github.com/alxmrs/xarray-sql"
@@ -51,7 +62,7 @@ features = ["pyo3/extension-module"]
51
62
  module-name = "xarray_sql._native"
52
63
 
53
64
  [tool.setuptools.packages.find]
54
- exclude = ["demo", "perf_tests"]
65
+ exclude = ["demo", "perf_tests", "tests", "tests.*"]
55
66
 
56
67
  [tool.pyink]
57
68
  line-length = 80
@@ -85,6 +96,7 @@ ignore_missing_imports = true
85
96
  [dependency-groups]
86
97
  dev = [
87
98
  "xarray_sql[test]",
99
+ "xarray_sql[docs]",
88
100
  "py-spy>=0.4.0",
89
101
  "pyink>=24.10.1",
90
102
  "maturin>=1.9.1",
@@ -93,3 +105,6 @@ dev = [
93
105
  [tool.uv]
94
106
  # Rebuild package when any rust files change
95
107
  cache-keys = [{file = "pyproject.toml"}, {file = "rust/Cargo.toml"}, {file = "**/*.rs"}]
108
+
109
+ [tool.pytest.ini_options]
110
+ testpaths = ["tests"]
File without changes
@@ -0,0 +1,144 @@
1
+ import pytest
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+ import xarray as xr
6
+
7
+
8
+ def rand_wx(start: str, end: str) -> xr.Dataset:
9
+ np.random.seed(42)
10
+ lat = np.linspace(-90, 90, num=720)
11
+ lon = np.linspace(-180, 180, num=1440)
12
+ time = pd.date_range(start, end, freq="h")
13
+ level = np.array([1000, 500], dtype=np.int32)
14
+ reference_time = pd.Timestamp(start)
15
+ temperature = 15 + 8 * np.random.randn(720, 1440, len(time), len(level))
16
+ precipitation = 10 * np.random.rand(720, 1440, len(time), len(level))
17
+ return xr.Dataset(
18
+ data_vars=dict(
19
+ temperature=(["lat", "lon", "time", "level"], temperature),
20
+ precipitation=(["lat", "lon", "time", "level"], precipitation),
21
+ ),
22
+ coords=dict(
23
+ lat=lat,
24
+ lon=lon,
25
+ time=time,
26
+ level=level,
27
+ reference_time=reference_time,
28
+ ),
29
+ attrs=dict(description="Random weather."),
30
+ )
31
+
32
+
33
+ def create_large_dataset(time_steps=1000, lat_points=100, lon_points=100):
34
+ """Create a large xarray dataset for memory testing."""
35
+ np.random.seed(42)
36
+
37
+ time = pd.date_range("2020-01-01", periods=time_steps, freq="h")
38
+ lat = np.linspace(-90, 90, lat_points)
39
+ lon = np.linspace(-180, 180, lon_points)
40
+
41
+ temp_data = np.random.rand(time_steps, lat_points, lon_points) * 40 - 10
42
+ precip_data = np.random.rand(time_steps, lat_points, lon_points) * 100
43
+
44
+ return xr.Dataset(
45
+ {
46
+ "temperature": (["time", "lat", "lon"], temp_data),
47
+ "precipitation": (["time", "lat", "lon"], precip_data),
48
+ },
49
+ coords={"time": time, "lat": lat, "lon": lon},
50
+ )
51
+
52
+
53
+ @pytest.fixture
54
+ def air():
55
+ ds = xr.tutorial.open_dataset("air_temperature")
56
+ chunks = {"time": 240}
57
+ return ds.chunk(chunks)
58
+
59
+
60
+ @pytest.fixture
61
+ def air_small(air):
62
+ return air.isel(time=slice(0, 12), lat=slice(0, 11), lon=slice(0, 10)).chunk(
63
+ {"time": 240}
64
+ )
65
+
66
+
67
+ @pytest.fixture
68
+ def randwx():
69
+ return rand_wx("1995-01-13T00", "1995-01-13T01")
70
+
71
+
72
+ @pytest.fixture
73
+ def large_ds():
74
+ return create_large_dataset().chunk({"time": 25})
75
+
76
+
77
+ @pytest.fixture
78
+ def air_dataset_small():
79
+ ds = xr.tutorial.open_dataset("air_temperature").chunk({"time": 240})
80
+ return ds.isel(time=slice(0, 12), lat=slice(0, 11), lon=slice(0, 10))
81
+
82
+
83
+ @pytest.fixture
84
+ def air_dataset_large():
85
+ return xr.tutorial.open_dataset("air_temperature").chunk({"time": 240})
86
+
87
+
88
+ @pytest.fixture
89
+ def rasm_ds():
90
+ """rasm uses cftime.DatetimeNoLeap (noleap / 365_day) for time."""
91
+ return xr.tutorial.open_dataset("rasm")
92
+
93
+
94
+ @pytest.fixture
95
+ def weather_dataset():
96
+ ds = rand_wx("2023-01-01T00", "2023-01-01T12")
97
+ return ds.isel(time=slice(0, 6), lat=slice(0, 10), lon=slice(0, 10)).chunk(
98
+ {"time": 3}
99
+ )
100
+
101
+
102
+ @pytest.fixture
103
+ def synthetic_dataset():
104
+ return create_large_dataset(
105
+ time_steps=50, lat_points=20, lon_points=20
106
+ ).chunk({"time": 25})
107
+
108
+
109
+ @pytest.fixture
110
+ def station_dataset():
111
+ return xr.Dataset(
112
+ {
113
+ "station_id": (["station"], [1, 2, 3, 4, 5]),
114
+ "elevation": (["station"], [100, 250, 500, 750, 1000]),
115
+ "name": (
116
+ ["station"],
117
+ ["Station_A", "Station_B", "Station_C", "Station_D", "Station_E"],
118
+ ),
119
+ }
120
+ ).chunk({"station": 5})
121
+
122
+
123
+ @pytest.fixture
124
+ def air_and_stations():
125
+ air = (
126
+ xr.tutorial.open_dataset("air_temperature")
127
+ .isel(time=slice(0, 12), lat=slice(0, 5), lon=slice(0, 8))
128
+ .chunk({"time": 6})
129
+ )
130
+ stations = xr.Dataset(
131
+ {
132
+ "station_id": (["station"], [101, 102, 103]),
133
+ "lat": (
134
+ ["station"],
135
+ [air.lat.values[0], air.lat.values[2], air.lat.values[4]],
136
+ ),
137
+ "lon": (
138
+ ["station"],
139
+ [air.lon.values[1], air.lon.values[3], air.lon.values[5]],
140
+ ),
141
+ "elevation": (["station"], [100, 250, 500]),
142
+ }
143
+ ).chunk({"station": 3})
144
+ return air, stations