tonik 0.0.10__tar.gz → 0.0.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: tonik
3
- Version: 0.0.10
3
+ Version: 0.0.12
4
4
  Summary: Store time series data as HDF5 files and access them through an API.
5
5
  Project-URL: Homepage, https://tsc-tools.github.io/tonik
6
6
  Project-URL: Issues, https://github.com/tsc-tools/tonik/issues
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "tonik"
7
- version = "0.0.10"
7
+ version = "0.0.12"
8
8
  authors = [
9
9
  { name="Yannik Behr", email="y.behr@gns.cri.nz" },
10
10
  { name="Christof Mueller", email="c.mueller@gns.cri.nz" }
@@ -155,11 +155,7 @@ class Path(object):
155
155
 
156
156
  xd_index = dict(datetime=slice(self.starttime, self.endtime))
157
157
  with xr.open_dataset(filename, group='original', engine=self.backend) as ds:
158
- try:
159
- rq = ds.loc[xd_index].load()
160
- except KeyError:
161
- ds = ds.sortby("datetime")
162
- rq = ds.loc[xd_index].load()
158
+ rq = ds.loc[xd_index].load()
163
159
 
164
160
  # Stack features
165
161
  if stack_length is not None:
@@ -0,0 +1,78 @@
1
+ import logging
2
+ import os
3
+
4
+ import xarray as xr
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+
9
+ def merge_arrays(xds_old: xr.DataArray, xds_new: xr.DataArray) -> xr.DataArray:
10
+ """
11
+ Merge two xarray datasets with the same datetime index.
12
+
13
+ Parameters
14
+ ----------
15
+ xds_old : xr.DataArray
16
+ Old array.
17
+ xds_new : xr.DataArray
18
+ New array.
19
+
20
+ Returns
21
+ -------
22
+ xr.DataArray
23
+ Merged array.
24
+ """
25
+ xda_old = xds_old.drop_duplicates(
26
+ 'datetime', keep='last')
27
+ xda_new = xds_new.drop_duplicates(
28
+ 'datetime', keep='last')
29
+ xda_new = xda_new.combine_first(xda_old)
30
+ return xda_new
31
+
32
+
33
+ def xarray2zarr(xds: xr.Dataset, path: str, mode: str = 'a'):
34
+ """
35
+ Write xarray dataset to zarr files.
36
+
37
+ Parameters
38
+ ----------
39
+ xds : xr.Dataset
40
+ Dataset to write.
41
+ path : str
42
+ Path to write the dataset.
43
+ mode : str, optional
44
+ Write mode, by default 'a'.
45
+
46
+ Returns
47
+ -------
48
+ None
49
+ """
50
+ for feature in xds.data_vars.keys():
51
+ fout = os.path.join(path, feature + '.zarr')
52
+ if not os.path.exists(fout) or mode == 'w':
53
+ xds[feature].to_zarr(
54
+ fout, group='original', mode='w')
55
+ else:
56
+ xds_existing = xr.open_zarr(fout, group='original')
57
+ if xds_existing.datetime[0] > xds.datetime[0] or xds_existing.datetime[-1] > xds.datetime[-1]:
58
+ xda_new = merge_arrays(xds_existing[feature], xds[feature])
59
+ xda_new.to_zarr(fout, group='original', mode='w')
60
+ else:
61
+ try:
62
+ overlap = xds_existing.datetime.where(
63
+ xds_existing.datetime == xds.datetime)
64
+ if overlap.size > 0:
65
+ xds[feature].loc[dict(datetime=overlap)].to_zarr(
66
+ fout, group='original', mode='r+', region='auto')
67
+ xds[feature].drop_sel(datetime=overlap).to_zarr(
68
+ fout, group='original', mode='a', append_dim="datetime")
69
+ else:
70
+ xds[feature].to_zarr(
71
+ fout, group='original', append_dim='datetime')
72
+ except Exception as e:
73
+ msg = f"Appending {feature} to {fout} failed: {e}\n"
74
+ msg += "Attempting to merge the two datasets."
75
+ logger.error(msg)
76
+ # remove duplicate datetime entries
77
+ xda_new = merge_arrays(xds_existing[feature], xds[feature])
78
+ xda_new.to_zarr(fout, group='original', mode='w')
@@ -161,18 +161,21 @@ def test_xarray2zarr_outofsequence(tmp_path_factory):
161
161
  """
162
162
  temp_dir = tmp_path_factory.mktemp('test_xarray2zarr')
163
163
  start = datetime(2022, 7, 18, 8, 0, 0)
164
+ middle = datetime(2022, 7, 18, 12, 0, 0)
164
165
  end = datetime(2022, 7, 19, 12, 0, 0)
165
- xdf1 = generate_test_data(dim=1, ndays=1, tstart=start)
166
- xdf2 = generate_test_data(dim=1, ndays=1, tstart=end)
166
+ xdf1 = generate_test_data(dim=1, intervals=3, tstart=start)
167
+ xdf2 = generate_test_data(dim=1, intervals=3, tstart=middle)
168
+ xdf3 = generate_test_data(dim=1, intervals=3, tstart=end)
167
169
  g = Storage('test_experiment', rootdir=temp_dir,
168
170
  starttime=start, endtime=end + timedelta(days=1),
169
171
  backend='zarr')
170
172
  c = g.get_substore('MDR', '00', 'HHZ')
171
- c.save(xdf2)
173
+ c.save(xdf3)
172
174
  c.save(xdf1)
175
+ c.save(xdf2)
173
176
  xdf_test = c('rsam')
174
177
  np.testing.assert_array_equal(
175
- xdf_test.datetime.values, xdf1.merge(xdf2).datetime.values)
178
+ xdf_test.datetime.values, xr.merge([xdf1, xdf2, xdf3]).datetime.values)
176
179
 
177
180
 
178
181
  def test_xarray2zarr_duplicates(tmp_path_factory):
@@ -1,37 +0,0 @@
1
- import logging
2
- import os
3
-
4
- import xarray as xr
5
-
6
- logger = logging.getLogger(__name__)
7
-
8
-
9
- def xarray2zarr(xds, path, mode='a'):
10
- for feature in xds.data_vars.keys():
11
- fout = os.path.join(path, feature + '.zarr')
12
- if not os.path.exists(fout) or mode == 'w':
13
- xds[feature].to_zarr(
14
- fout, group='original', mode='w')
15
- else:
16
- xds_existing = xr.open_zarr(fout, group='original')
17
- try:
18
- overlap = xds_existing.datetime.where(
19
- xds_existing.datetime == xds.datetime)
20
- if overlap.size > 0:
21
- xds[feature].loc[dict(datetime=overlap)].to_zarr(
22
- fout, group='original', mode='r+', region='auto')
23
- xds[feature].drop_sel(datetime=overlap).to_zarr(
24
- fout, group='original', mode='a', append_dim="datetime")
25
- else:
26
- xds[feature].to_zarr(
27
- fout, group='original', append_dim='datetime')
28
- except Exception as e:
29
- msg = f"Appending {feature} to {fout} failed: {e}\n"
30
- msg += "Attempting to merge the two datasets."
31
- logger.error(msg)
32
- # remove duplicate datetime entries
33
- xda_existing = xds_existing[feature].drop_duplicates(
34
- 'datetime', keep='last')
35
- xda_new = xds[feature].drop_duplicates('datetime', keep='last')
36
- xda_new = xda_new.combine_first(xda_existing)
37
- xda_new.to_zarr(fout, group='original', mode='w')
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes