tonik 0.0.10__tar.gz → 0.0.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: tonik
3
- Version: 0.0.10
3
+ Version: 0.0.11
4
4
  Summary: Store time series data as HDF5 files and access them through an API.
5
5
  Project-URL: Homepage, https://tsc-tools.github.io/tonik
6
6
  Project-URL: Issues, https://github.com/tsc-tools/tonik/issues
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "tonik"
7
- version = "0.0.10"
7
+ version = "0.0.11"
8
8
  authors = [
9
9
  { name="Yannik Behr", email="y.behr@gns.cri.nz" },
10
10
  { name="Christof Mueller", email="c.mueller@gns.cri.nz" }
@@ -155,11 +155,7 @@ class Path(object):
155
155
 
156
156
  xd_index = dict(datetime=slice(self.starttime, self.endtime))
157
157
  with xr.open_dataset(filename, group='original', engine=self.backend) as ds:
158
- try:
159
- rq = ds.loc[xd_index].load()
160
- except KeyError:
161
- ds = ds.sortby("datetime")
162
- rq = ds.loc[xd_index].load()
158
+ rq = ds.loc[xd_index].load()
163
159
 
164
160
  # Stack features
165
161
  if stack_length is not None:
@@ -0,0 +1,42 @@
1
+ import logging
2
+ import os
3
+
4
+ import xarray as xr
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+
9
+ def xarray2zarr(xds, path, mode='a'):
10
+ for feature in xds.data_vars.keys():
11
+ fout = os.path.join(path, feature + '.zarr')
12
+ if not os.path.exists(fout) or mode == 'w':
13
+ xds[feature].to_zarr(
14
+ fout, group='original', mode='w')
15
+ else:
16
+ xds_existing = xr.open_zarr(fout, group='original')
17
+ if xds_existing.datetime[0] > xds.datetime[0] or xds_existing.datetime[-1] > xds.datetime[-1]:
18
+ xds_new = xr.merge([xds_existing[feature], xds[feature]])
19
+ xds_new.to_zarr(fout, group='original', mode='w')
20
+ else:
21
+ try:
22
+ overlap = xds_existing.datetime.where(
23
+ xds_existing.datetime == xds.datetime)
24
+ if overlap.size > 0:
25
+ xds[feature].loc[dict(datetime=overlap)].to_zarr(
26
+ fout, group='original', mode='r+', region='auto')
27
+ xds[feature].drop_sel(datetime=overlap).to_zarr(
28
+ fout, group='original', mode='a', append_dim="datetime")
29
+ else:
30
+ xds[feature].to_zarr(
31
+ fout, group='original', append_dim='datetime')
32
+ except Exception as e:
33
+ msg = f"Appending {feature} to {fout} failed: {e}\n"
34
+ msg += "Attempting to merge the two datasets."
35
+ logger.error(msg)
36
+ # remove duplicate datetime entries
37
+ xda_existing = xds_existing[feature].drop_duplicates(
38
+ 'datetime', keep='last')
39
+ xda_new = xds[feature].drop_duplicates(
40
+ 'datetime', keep='last')
41
+ xda_new = xda_new.combine_first(xda_existing)
42
+ xda_new.to_zarr(fout, group='original', mode='w')
@@ -161,18 +161,21 @@ def test_xarray2zarr_outofsequence(tmp_path_factory):
161
161
  """
162
162
  temp_dir = tmp_path_factory.mktemp('test_xarray2zarr')
163
163
  start = datetime(2022, 7, 18, 8, 0, 0)
164
+ middle = datetime(2022, 7, 18, 12, 0, 0)
164
165
  end = datetime(2022, 7, 19, 12, 0, 0)
165
- xdf1 = generate_test_data(dim=1, ndays=1, tstart=start)
166
- xdf2 = generate_test_data(dim=1, ndays=1, tstart=end)
166
+ xdf1 = generate_test_data(dim=1, intervals=3, tstart=start)
167
+ xdf2 = generate_test_data(dim=1, intervals=3, tstart=middle)
168
+ xdf3 = generate_test_data(dim=1, intervals=3, tstart=end)
167
169
  g = Storage('test_experiment', rootdir=temp_dir,
168
170
  starttime=start, endtime=end + timedelta(days=1),
169
171
  backend='zarr')
170
172
  c = g.get_substore('MDR', '00', 'HHZ')
171
- c.save(xdf2)
173
+ c.save(xdf3)
172
174
  c.save(xdf1)
175
+ c.save(xdf2)
173
176
  xdf_test = c('rsam')
174
177
  np.testing.assert_array_equal(
175
- xdf_test.datetime.values, xdf1.merge(xdf2).datetime.values)
178
+ xdf_test.datetime.values, xr.merge([xdf1, xdf2, xdf3]).datetime.values)
176
179
 
177
180
 
178
181
  def test_xarray2zarr_duplicates(tmp_path_factory):
@@ -1,37 +0,0 @@
1
- import logging
2
- import os
3
-
4
- import xarray as xr
5
-
6
- logger = logging.getLogger(__name__)
7
-
8
-
9
- def xarray2zarr(xds, path, mode='a'):
10
- for feature in xds.data_vars.keys():
11
- fout = os.path.join(path, feature + '.zarr')
12
- if not os.path.exists(fout) or mode == 'w':
13
- xds[feature].to_zarr(
14
- fout, group='original', mode='w')
15
- else:
16
- xds_existing = xr.open_zarr(fout, group='original')
17
- try:
18
- overlap = xds_existing.datetime.where(
19
- xds_existing.datetime == xds.datetime)
20
- if overlap.size > 0:
21
- xds[feature].loc[dict(datetime=overlap)].to_zarr(
22
- fout, group='original', mode='r+', region='auto')
23
- xds[feature].drop_sel(datetime=overlap).to_zarr(
24
- fout, group='original', mode='a', append_dim="datetime")
25
- else:
26
- xds[feature].to_zarr(
27
- fout, group='original', append_dim='datetime')
28
- except Exception as e:
29
- msg = f"Appending {feature} to {fout} failed: {e}\n"
30
- msg += "Attempting to merge the two datasets."
31
- logger.error(msg)
32
- # remove duplicate datetime entries
33
- xda_existing = xds_existing[feature].drop_duplicates(
34
- 'datetime', keep='last')
35
- xda_new = xds[feature].drop_duplicates('datetime', keep='last')
36
- xda_new = xda_new.combine_first(xda_existing)
37
- xda_new.to_zarr(fout, group='original', mode='w')
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes