tonik 0.0.11__py3-none-any.whl → 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tonik/storage.py CHANGED
@@ -6,7 +6,7 @@ import re
6
6
  import pandas as pd
7
7
  import xarray as xr
8
8
 
9
- from .xarray2hdf5 import xarray2hdf5
9
+ from .xarray2netcdf import xarray2netcdf
10
10
  from .xarray2zarr import xarray2zarr
11
11
 
12
12
  LOGGING_CONFIG = {
@@ -102,7 +102,7 @@ class Path(object):
102
102
 
103
103
  def feature_path(self, feature):
104
104
 
105
- if self.backend == 'h5netcdf':
105
+ if self.backend == 'netcdf':
106
106
  file_ending = '.nc'
107
107
  elif self.backend == 'zarr':
108
108
  file_ending = '.zarr'
@@ -154,7 +154,8 @@ class Path(object):
154
154
  format(stack_length, interval, num_periods))
155
155
 
156
156
  xd_index = dict(datetime=slice(self.starttime, self.endtime))
157
- with xr.open_dataset(filename, group='original', engine=self.backend) as ds:
157
+ engine = 'h5netcdf' if self.backend == 'netcdf' else self.backend
158
+ with xr.open_dataset(filename, group='original', engine=engine) as ds:
158
159
  rq = ds.loc[xd_index].load()
159
160
 
160
161
  # Stack features
@@ -187,8 +188,8 @@ class Path(object):
187
188
  """
188
189
  Save a feature to disk
189
190
  """
190
- if self.backend == 'h5netcdf':
191
- xarray2hdf5(data, self.path, **kwargs)
191
+ if self.backend == 'netcdf':
192
+ xarray2netcdf(data, self.path, **kwargs)
192
193
  elif self.backend == 'zarr':
193
194
  xarray2zarr(data, self.path, **kwargs)
194
195
 
@@ -214,7 +215,7 @@ class Storage(Path):
214
215
  >>> rsam = c("rsam")
215
216
  """
216
217
 
217
- def __init__(self, name, rootdir, starttime=None, endtime=None, create=True, backend='zarr'):
218
+ def __init__(self, name, rootdir, starttime=None, endtime=None, create=True, backend='netcdf'):
218
219
  self.stores = set()
219
220
  self.starttime = starttime
220
221
  self.endtime = endtime
tonik/utils.py CHANGED
@@ -56,3 +56,36 @@ def generate_test_data(dim=1, ndays=30, nfreqs=10,
56
56
  xds.attrs['station'] = 'MDR'
57
57
  xds.attrs['interval'] = '10min'
58
58
  return xds
59
+
60
+
61
+ def merge_arrays(xds_old: xr.DataArray, xds_new: xr.DataArray,
62
+ resolution: float = None) -> xr.DataArray:
63
+ """
64
+ Merge two xarray datasets with the same datetime index.
65
+
66
+ Parameters
67
+ ----------
68
+ xds_old : xr.DataArray
69
+ Old array.
70
+ xds_new : xr.DataArray
71
+ New array.
72
+ resolution : float
73
+ Time resolution in hours.
74
+
75
+ Returns
76
+ -------
77
+ xr.DataArray
78
+ Merged array.
79
+ """
80
+ xda_old = xds_old.drop_duplicates(
81
+ 'datetime', keep='last')
82
+ xda_new = xds_new.drop_duplicates(
83
+ 'datetime', keep='last')
84
+ xda_new = xda_new.combine_first(xda_old)
85
+ if resolution is not None:
86
+ new_dates = pd.date_range(
87
+ xda_new.datetime.values[0],
88
+ xda_new.datetime.values[-1],
89
+ freq=f'{resolution}h')
90
+ xda_new = xda_new.reindex(datetime=new_dates)
91
+ return xda_new
@@ -1,15 +1,18 @@
1
- from datetime import datetime
2
1
  import logging
3
2
  import os
3
+ from datetime import datetime
4
4
  from warnings import filterwarnings
5
5
 
6
- from cftime import num2date, date2num
7
6
  import h5netcdf
8
7
  import numpy as np
8
+ import xarray as xr
9
+ from cftime import date2num, num2date
10
+
11
+ from .utils import merge_arrays
9
12
 
10
13
 
11
- def xarray2hdf5(xArray, fdir, rootGroupName="original", timedim="datetime",
12
- archive_starttime=datetime(2000, 1, 1), resolution=None):
14
+ def xarray2netcdf(xArray, fdir, rootGroupName="original", timedim="datetime",
15
+ archive_starttime=datetime(2000, 1, 1), resolution=None):
13
16
  """
14
17
  Store an xarray dataset as an HDF5 file.
15
18
 
@@ -31,23 +34,35 @@ def xarray2hdf5(xArray, fdir, rootGroupName="original", timedim="datetime",
31
34
  determined from the data.
32
35
  """
33
36
  filterwarnings(action='ignore', category=DeprecationWarning,
34
- message='`np.bool` is a deprecated alias')
37
+ message='`np.bool` is a deprecated alias')
35
38
 
36
- starttime = xArray[timedim].values[0].astype('datetime64[us]').astype(datetime)
37
- starttime = min(starttime, archive_starttime)
39
+ data_starttime = xArray[timedim].values[0].astype(
40
+ 'datetime64[us]').astype(datetime)
41
+ starttime = min(data_starttime, archive_starttime)
38
42
  if resolution is None:
39
43
  resolution = (np.diff(xArray[timedim])/np.timedelta64(1, 'h'))[0]
40
44
 
41
45
  for featureName in list(xArray.data_vars.keys()):
42
- h5file = os.path.join(fdir, featureName +'.nc')
46
+ h5file = os.path.join(fdir, featureName + '.nc')
47
+ mode = 'w'
48
+ if os.path.isfile(h5file):
49
+ if archive_starttime > data_starttime:
50
+ xds_existing = xr.open_dataset(
51
+ h5file, group='original', engine='h5netcdf')
52
+ xda_new = merge_arrays(
53
+ xds_existing[featureName], xArray[featureName],
54
+ resolution=resolution)
55
+ xds_existing.close()
56
+ xda_new.to_netcdf(h5file, group='original',
57
+ mode='w', engine='h5netcdf')
58
+ continue
59
+ mode = 'a'
43
60
 
44
- mode = 'a' if os.path.isfile(h5file) else 'w'
45
-
46
61
  with h5netcdf.File(h5file, mode) as h5f:
47
62
  try:
48
63
  rootGrp = _create_h5_Structure(rootGroupName, featureName,
49
64
  h5f, xArray, starttime, timedim)
50
- except ValueError: # group already exists, append
65
+ except ValueError: # group already exists, append
51
66
  rootGrp = h5f[rootGroupName]
52
67
 
53
68
  # determine indices
@@ -75,7 +90,8 @@ def xarray2hdf5(xArray, fdir, rootGroupName="original", timedim="datetime",
75
90
  try:
76
91
  _setMetaInfo(featureName, h5f, xArray)
77
92
  except KeyError as e:
78
- logging.warning(f"Could not set all meta info for {featureName}: {e}")
93
+ logging.warning(
94
+ f"Could not set all meta info for {featureName}: {e}")
79
95
 
80
96
 
81
97
  def _create_h5_Structure(defaultGroupName, featureName, h5f, xArray, starttime, timedim):
@@ -85,15 +101,16 @@ def _create_h5_Structure(defaultGroupName, featureName, h5f, xArray, starttime,
85
101
  coordinates.attrs['units'] = 'hours since 1970-01-01 00:00:00.0'
86
102
  coordinates.attrs['calendar'] = 'gregorian'
87
103
  rootGrp.attrs['starttime'] = str(starttime)
88
- for label, size in xArray.dims.items():
104
+ for label, size in xArray.dims.items():
89
105
  if not np.issubdtype(xArray[label].dtype, np.datetime64):
90
- rootGrp.dimensions[label] = size
106
+ rootGrp.dimensions[label] = size
91
107
  coordinates = rootGrp.create_variable(label, (label,), float)
92
108
  coordinates[:] = xArray[label].values
93
109
  # Note: xArray.dims returns a dictionary of dimensions that are not necesarily
94
110
  # in the right order; xArray[featureName].dims returns a tuple with dimension
95
111
  # names in the correct order
96
- rootGrp.create_variable(featureName, tuple(xArray[featureName].dims), dtype=float, fillvalue=0.)
112
+ rootGrp.create_variable(featureName, tuple(
113
+ xArray[featureName].dims), dtype=float, fillvalue=0.)
97
114
  return rootGrp
98
115
 
99
116
 
@@ -102,4 +119,3 @@ def _setMetaInfo(featureName, h5f, xArray):
102
119
  h5f.attrs['latitude'] = -42
103
120
  h5f.attrs['longitude'] = 168
104
121
  h5f.attrs['datatype'] = featureName
105
-
tonik/xarray2zarr.py CHANGED
@@ -3,10 +3,28 @@ import os
3
3
 
4
4
  import xarray as xr
5
5
 
6
+ from .utils import merge_arrays
7
+
6
8
  logger = logging.getLogger(__name__)
7
9
 
8
10
 
9
- def xarray2zarr(xds, path, mode='a'):
11
+ def xarray2zarr(xds: xr.Dataset, path: str, mode: str = 'a'):
12
+ """
13
+ Write xarray dataset to zarr files.
14
+
15
+ Parameters
16
+ ----------
17
+ xds : xr.Dataset
18
+ Dataset to write.
19
+ path : str
20
+ Path to write the dataset.
21
+ mode : str, optional
22
+ Write mode, by default 'a'.
23
+
24
+ Returns
25
+ -------
26
+ None
27
+ """
10
28
  for feature in xds.data_vars.keys():
11
29
  fout = os.path.join(path, feature + '.zarr')
12
30
  if not os.path.exists(fout) or mode == 'w':
@@ -15,8 +33,8 @@ def xarray2zarr(xds, path, mode='a'):
15
33
  else:
16
34
  xds_existing = xr.open_zarr(fout, group='original')
17
35
  if xds_existing.datetime[0] > xds.datetime[0] or xds_existing.datetime[-1] > xds.datetime[-1]:
18
- xds_new = xr.merge([xds_existing[feature], xds[feature]])
19
- xds_new.to_zarr(fout, group='original', mode='w')
36
+ xda_new = merge_arrays(xds_existing[feature], xds[feature])
37
+ xda_new.to_zarr(fout, group='original', mode='w')
20
38
  else:
21
39
  try:
22
40
  overlap = xds_existing.datetime.where(
@@ -34,9 +52,5 @@ def xarray2zarr(xds, path, mode='a'):
34
52
  msg += "Attempting to merge the two datasets."
35
53
  logger.error(msg)
36
54
  # remove duplicate datetime entries
37
- xda_existing = xds_existing[feature].drop_duplicates(
38
- 'datetime', keep='last')
39
- xda_new = xds[feature].drop_duplicates(
40
- 'datetime', keep='last')
41
- xda_new = xda_new.combine_first(xda_existing)
55
+ xda_new = merge_arrays(xds_existing[feature], xds[feature])
42
56
  xda_new.to_zarr(fout, group='original', mode='w')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: tonik
3
- Version: 0.0.11
3
+ Version: 0.1.0
4
4
  Summary: Store time series data as HDF5 files and access them through an API.
5
5
  Project-URL: Homepage, https://tsc-tools.github.io/tonik
6
6
  Project-URL: Issues, https://github.com/tsc-tools/tonik/issues
@@ -0,0 +1,12 @@
1
+ tonik/__init__.py,sha256=ZBVGh4dm_l9xwiBGb33O5QV9MfZeNiEd3DBDAm6DiHk,511
2
+ tonik/api.py,sha256=8YS0WCMlm5xs2N6V8n9BiP87ywJC_BUzYsPvWR9KWJk,6499
3
+ tonik/storage.py,sha256=Oh3BBQL13yLYHlgk8-a-wstmlUC2vNJ0yi_fnyETK_g,11237
4
+ tonik/utils.py,sha256=3nSRU_GnV6arP4e63YHn4oEV8XbqzVAW8FCvQVIwGdg,2757
5
+ tonik/xarray2netcdf.py,sha256=ey7lY4czlMREUt56SzE3quXNzeqXxV0Ru_EatsJpynA,5088
6
+ tonik/xarray2zarr.py,sha256=jTrHFpjN3lEDIJEI6RyPvvbW2jZsfYwGE_LQaZ6dwx8,2099
7
+ tonik/package_data/index.html,sha256=GKDClUhIam_fAYbNfzAolORhSCG3ae1wW3VjWCg4PMk,2732
8
+ tonik-0.1.0.dist-info/METADATA,sha256=VtdNFXyUplO5s_TmR8xEgW0NdMtKgI4Zl2rWLhxpnK0,1938
9
+ tonik-0.1.0.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
10
+ tonik-0.1.0.dist-info/entry_points.txt,sha256=VnGfC5qAzpntEHAb5pooUEpYABSgOfQoNhCEtLDJyf8,45
11
+ tonik-0.1.0.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
12
+ tonik-0.1.0.dist-info/RECORD,,
@@ -1,12 +0,0 @@
1
- tonik/__init__.py,sha256=ZBVGh4dm_l9xwiBGb33O5QV9MfZeNiEd3DBDAm6DiHk,511
2
- tonik/api.py,sha256=8YS0WCMlm5xs2N6V8n9BiP87ywJC_BUzYsPvWR9KWJk,6499
3
- tonik/storage.py,sha256=Ts6jKLqwcDQvPIea-swBCpnEjQr_xnAjOl-hkUelTn4,11165
4
- tonik/utils.py,sha256=_TxXf9o9fOvtuOvGO6-ww9F5m0QelHyfQzQw8RGjTV4,1868
5
- tonik/xarray2hdf5.py,sha256=cekO9vo9ZRlr0VndswJjPC27CEVD3TpRVKLAJ-aAO0g,4465
6
- tonik/xarray2zarr.py,sha256=0bWMP_PZ0FLr9AlxKrC5M4aJRh_VGkUfJ-A-BGgKqM8,1979
7
- tonik/package_data/index.html,sha256=GKDClUhIam_fAYbNfzAolORhSCG3ae1wW3VjWCg4PMk,2732
8
- tonik-0.0.11.dist-info/METADATA,sha256=cXj6-wI7M6kWVvcffHe3W9BJQVdJfkNigzcrd0s5XlQ,1939
9
- tonik-0.0.11.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
10
- tonik-0.0.11.dist-info/entry_points.txt,sha256=VnGfC5qAzpntEHAb5pooUEpYABSgOfQoNhCEtLDJyf8,45
11
- tonik-0.0.11.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
12
- tonik-0.0.11.dist-info/RECORD,,
File without changes