tonik 0.0.12__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tonik/storage.py CHANGED
@@ -6,7 +6,7 @@ import re
6
6
  import pandas as pd
7
7
  import xarray as xr
8
8
 
9
- from .xarray2hdf5 import xarray2hdf5
9
+ from .xarray2netcdf import xarray2netcdf
10
10
  from .xarray2zarr import xarray2zarr
11
11
 
12
12
  LOGGING_CONFIG = {
@@ -76,6 +76,7 @@ class Path(object):
76
76
  self.name = name
77
77
  self.create = create
78
78
  self.backend = backend
79
+ self.engine = 'h5netcdf' if self.backend == 'netcdf' else self.backend
79
80
  self.path = os.path.join(parentdir, name)
80
81
  if create:
81
82
  try:
@@ -102,7 +103,7 @@ class Path(object):
102
103
 
103
104
  def feature_path(self, feature):
104
105
 
105
- if self.backend == 'h5netcdf':
106
+ if self.backend == 'netcdf':
106
107
  file_ending = '.nc'
107
108
  elif self.backend == 'zarr':
108
109
  file_ending = '.zarr'
@@ -154,7 +155,7 @@ class Path(object):
154
155
  format(stack_length, interval, num_periods))
155
156
 
156
157
  xd_index = dict(datetime=slice(self.starttime, self.endtime))
157
- with xr.open_dataset(filename, group='original', engine=self.backend) as ds:
158
+ with xr.open_dataset(filename, group='original', engine=self.engine) as ds:
158
159
  rq = ds.loc[xd_index].load()
159
160
 
160
161
  # Stack features
@@ -187,11 +188,19 @@ class Path(object):
187
188
  """
188
189
  Save a feature to disk
189
190
  """
190
- if self.backend == 'h5netcdf':
191
- xarray2hdf5(data, self.path, **kwargs)
191
+ if self.backend == 'netcdf':
192
+ xarray2netcdf(data, self.path, **kwargs)
192
193
  elif self.backend == 'zarr':
193
194
  xarray2zarr(data, self.path, **kwargs)
194
195
 
196
+ def shape(self, feature):
197
+ """
198
+ Get shape of a feature on disk
199
+ """
200
+ filename = self.feature_path(feature)
201
+ with xr.open_dataset(filename, group='original', engine=self.engine) as ds:
202
+ return ds.sizes
203
+
195
204
 
196
205
  class Storage(Path):
197
206
  """
@@ -214,7 +223,7 @@ class Storage(Path):
214
223
  >>> rsam = c("rsam")
215
224
  """
216
225
 
217
- def __init__(self, name, rootdir, starttime=None, endtime=None, create=True, backend='zarr'):
226
+ def __init__(self, name, rootdir, starttime=None, endtime=None, create=True, backend='netcdf'):
218
227
  self.stores = set()
219
228
  self.starttime = starttime
220
229
  self.endtime = endtime
tonik/utils.py CHANGED
@@ -56,3 +56,36 @@ def generate_test_data(dim=1, ndays=30, nfreqs=10,
56
56
  xds.attrs['station'] = 'MDR'
57
57
  xds.attrs['interval'] = '10min'
58
58
  return xds
59
+
60
+
61
+ def merge_arrays(xds_old: xr.DataArray, xds_new: xr.DataArray,
62
+ resolution: float = None) -> xr.DataArray:
63
+ """
64
+ Merge two xarray datasets with the same datetime index.
65
+
66
+ Parameters
67
+ ----------
68
+ xds_old : xr.DataArray
69
+ Old array.
70
+ xds_new : xr.DataArray
71
+ New array.
72
+ resolution : float
73
+ Time resolution in hours.
74
+
75
+ Returns
76
+ -------
77
+ xr.DataArray
78
+ Merged array.
79
+ """
80
+ xda_old = xds_old.drop_duplicates(
81
+ 'datetime', keep='last')
82
+ xda_new = xds_new.drop_duplicates(
83
+ 'datetime', keep='last')
84
+ xda_new = xda_new.combine_first(xda_old)
85
+ if resolution is not None:
86
+ new_dates = pd.date_range(
87
+ xda_new.datetime.values[0],
88
+ xda_new.datetime.values[-1],
89
+ freq=f'{resolution}h')
90
+ xda_new = xda_new.reindex(datetime=new_dates)
91
+ return xda_new
@@ -1,15 +1,19 @@
1
- from datetime import datetime
2
1
  import logging
3
2
  import os
3
+ from datetime import datetime
4
4
  from warnings import filterwarnings
5
5
 
6
- from cftime import num2date, date2num
7
6
  import h5netcdf
8
7
  import numpy as np
8
+ import xarray as xr
9
+ from cftime import date2num, num2date
10
+
11
+ from .utils import merge_arrays
9
12
 
10
13
 
11
- def xarray2hdf5(xArray, fdir, rootGroupName="original", timedim="datetime",
12
- archive_starttime=datetime(2000, 1, 1), resolution=None):
14
+ def xarray2netcdf(xArray, fdir, rootGroupName="original", timedim="datetime",
15
+ archive_starttime=datetime(2000, 1, 1), resolution=None,
16
+ mode='a'):
13
17
  """
14
18
  Store an xarray dataset as an HDF5 file.
15
19
 
@@ -31,23 +35,35 @@ def xarray2hdf5(xArray, fdir, rootGroupName="original", timedim="datetime",
31
35
  determined from the data.
32
36
  """
33
37
  filterwarnings(action='ignore', category=DeprecationWarning,
34
- message='`np.bool` is a deprecated alias')
38
+ message='`np.bool` is a deprecated alias')
35
39
 
36
- starttime = xArray[timedim].values[0].astype('datetime64[us]').astype(datetime)
37
- starttime = min(starttime, archive_starttime)
40
+ data_starttime = xArray[timedim].values[0].astype(
41
+ 'datetime64[us]').astype(datetime)
42
+ starttime = min(data_starttime, archive_starttime)
38
43
  if resolution is None:
39
44
  resolution = (np.diff(xArray[timedim])/np.timedelta64(1, 'h'))[0]
40
45
 
41
46
  for featureName in list(xArray.data_vars.keys()):
42
- h5file = os.path.join(fdir, featureName +'.nc')
47
+ h5file = os.path.join(fdir, featureName + '.nc')
48
+ _mode = 'w'
49
+ if os.path.isfile(h5file) and mode == 'a':
50
+ if archive_starttime > data_starttime:
51
+ xds_existing = xr.open_dataset(
52
+ h5file, group='original', engine='h5netcdf')
53
+ xda_new = merge_arrays(
54
+ xds_existing[featureName], xArray[featureName],
55
+ resolution=resolution)
56
+ xds_existing.close()
57
+ xda_new.to_netcdf(h5file, group='original',
58
+ mode='w', engine='h5netcdf')
59
+ continue
60
+ _mode = 'a'
43
61
 
44
- mode = 'a' if os.path.isfile(h5file) else 'w'
45
-
46
- with h5netcdf.File(h5file, mode) as h5f:
62
+ with h5netcdf.File(h5file, _mode) as h5f:
47
63
  try:
48
64
  rootGrp = _create_h5_Structure(rootGroupName, featureName,
49
65
  h5f, xArray, starttime, timedim)
50
- except ValueError: # group already exists, append
66
+ except ValueError: # group already exists, append
51
67
  rootGrp = h5f[rootGroupName]
52
68
 
53
69
  # determine indices
@@ -75,7 +91,8 @@ def xarray2hdf5(xArray, fdir, rootGroupName="original", timedim="datetime",
75
91
  try:
76
92
  _setMetaInfo(featureName, h5f, xArray)
77
93
  except KeyError as e:
78
- logging.warning(f"Could not set all meta info for {featureName}: {e}")
94
+ logging.warning(
95
+ f"Could not set all meta info for {featureName}: {e}")
79
96
 
80
97
 
81
98
  def _create_h5_Structure(defaultGroupName, featureName, h5f, xArray, starttime, timedim):
@@ -85,15 +102,16 @@ def _create_h5_Structure(defaultGroupName, featureName, h5f, xArray, starttime,
85
102
  coordinates.attrs['units'] = 'hours since 1970-01-01 00:00:00.0'
86
103
  coordinates.attrs['calendar'] = 'gregorian'
87
104
  rootGrp.attrs['starttime'] = str(starttime)
88
- for label, size in xArray.dims.items():
105
+ for label, size in xArray.dims.items():
89
106
  if not np.issubdtype(xArray[label].dtype, np.datetime64):
90
- rootGrp.dimensions[label] = size
107
+ rootGrp.dimensions[label] = size
91
108
  coordinates = rootGrp.create_variable(label, (label,), float)
92
109
  coordinates[:] = xArray[label].values
93
110
  # Note: xArray.dims returns a dictionary of dimensions that are not necesarily
94
111
  # in the right order; xArray[featureName].dims returns a tuple with dimension
95
112
  # names in the correct order
96
- rootGrp.create_variable(featureName, tuple(xArray[featureName].dims), dtype=float, fillvalue=0.)
113
+ rootGrp.create_variable(featureName, tuple(
114
+ xArray[featureName].dims), dtype=float, fillvalue=0.)
97
115
  return rootGrp
98
116
 
99
117
 
@@ -102,4 +120,3 @@ def _setMetaInfo(featureName, h5f, xArray):
102
120
  h5f.attrs['latitude'] = -42
103
121
  h5f.attrs['longitude'] = 168
104
122
  h5f.attrs['datatype'] = featureName
105
-
tonik/xarray2zarr.py CHANGED
@@ -3,31 +3,9 @@ import os
3
3
 
4
4
  import xarray as xr
5
5
 
6
- logger = logging.getLogger(__name__)
7
-
8
-
9
- def merge_arrays(xds_old: xr.DataArray, xds_new: xr.DataArray) -> xr.DataArray:
10
- """
11
- Merge two xarray datasets with the same datetime index.
6
+ from .utils import merge_arrays
12
7
 
13
- Parameters
14
- ----------
15
- xds_old : xr.DataArray
16
- Old array.
17
- xds_new : xr.DataArray
18
- New array.
19
-
20
- Returns
21
- -------
22
- xr.DataArray
23
- Merged array.
24
- """
25
- xda_old = xds_old.drop_duplicates(
26
- 'datetime', keep='last')
27
- xda_new = xds_new.drop_duplicates(
28
- 'datetime', keep='last')
29
- xda_new = xda_new.combine_first(xda_old)
30
- return xda_new
8
+ logger = logging.getLogger(__name__)
31
9
 
32
10
 
33
11
  def xarray2zarr(xds: xr.Dataset, path: str, mode: str = 'a'):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: tonik
3
- Version: 0.0.12
3
+ Version: 0.1.1
4
4
  Summary: Store time series data as HDF5 files and access them through an API.
5
5
  Project-URL: Homepage, https://tsc-tools.github.io/tonik
6
6
  Project-URL: Issues, https://github.com/tsc-tools/tonik/issues
@@ -0,0 +1,12 @@
1
+ tonik/__init__.py,sha256=ZBVGh4dm_l9xwiBGb33O5QV9MfZeNiEd3DBDAm6DiHk,511
2
+ tonik/api.py,sha256=8YS0WCMlm5xs2N6V8n9BiP87ywJC_BUzYsPvWR9KWJk,6499
3
+ tonik/storage.py,sha256=oy7Nz89zeiZchsqpouttzWhSL6UgenUQZiaMX6SdpsI,11499
4
+ tonik/utils.py,sha256=3nSRU_GnV6arP4e63YHn4oEV8XbqzVAW8FCvQVIwGdg,2757
5
+ tonik/xarray2netcdf.py,sha256=aZSSMADmS0DpnprT5TBqERGxrycZTPTNuQhxm1rOsh0,5135
6
+ tonik/xarray2zarr.py,sha256=jTrHFpjN3lEDIJEI6RyPvvbW2jZsfYwGE_LQaZ6dwx8,2099
7
+ tonik/package_data/index.html,sha256=GKDClUhIam_fAYbNfzAolORhSCG3ae1wW3VjWCg4PMk,2732
8
+ tonik-0.1.1.dist-info/METADATA,sha256=kSVqgQLr_k3j7s3mfcAnY5nt4LrA-xplhYFiQFLnUIw,1938
9
+ tonik-0.1.1.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
10
+ tonik-0.1.1.dist-info/entry_points.txt,sha256=VnGfC5qAzpntEHAb5pooUEpYABSgOfQoNhCEtLDJyf8,45
11
+ tonik-0.1.1.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
12
+ tonik-0.1.1.dist-info/RECORD,,
@@ -1,12 +0,0 @@
1
- tonik/__init__.py,sha256=ZBVGh4dm_l9xwiBGb33O5QV9MfZeNiEd3DBDAm6DiHk,511
2
- tonik/api.py,sha256=8YS0WCMlm5xs2N6V8n9BiP87ywJC_BUzYsPvWR9KWJk,6499
3
- tonik/storage.py,sha256=Ts6jKLqwcDQvPIea-swBCpnEjQr_xnAjOl-hkUelTn4,11165
4
- tonik/utils.py,sha256=_TxXf9o9fOvtuOvGO6-ww9F5m0QelHyfQzQw8RGjTV4,1868
5
- tonik/xarray2hdf5.py,sha256=cekO9vo9ZRlr0VndswJjPC27CEVD3TpRVKLAJ-aAO0g,4465
6
- tonik/xarray2zarr.py,sha256=8osAiQSmiMTEGVoqWaCT7kqPvuzpGVZalNt4OhxmpCY,2619
7
- tonik/package_data/index.html,sha256=GKDClUhIam_fAYbNfzAolORhSCG3ae1wW3VjWCg4PMk,2732
8
- tonik-0.0.12.dist-info/METADATA,sha256=eHgl8-z2yNDzEuxHdgHbqDNxONe1HE-igHQ5gXLFAtY,1939
9
- tonik-0.0.12.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
10
- tonik-0.0.12.dist-info/entry_points.txt,sha256=VnGfC5qAzpntEHAb5pooUEpYABSgOfQoNhCEtLDJyf8,45
11
- tonik-0.0.12.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
12
- tonik-0.0.12.dist-info/RECORD,,
File without changes