tonik 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tonik/storage.py CHANGED
@@ -1,9 +1,7 @@
1
1
  import logging
2
2
  import logging.config
3
3
  import os
4
- import re
5
4
 
6
- import pandas as pd
7
5
  import xarray as xr
8
6
 
9
7
  from .xarray2netcdf import xarray2netcdf
@@ -113,69 +111,27 @@ class Path(object):
113
111
  self.children[feature] = Path(feature + file_ending, self.path)
114
112
  return _feature_path
115
113
 
116
- def __call__(self, feature, stack_length=None, interval='10min'):
114
+ def __call__(self, feature, group='original'):
117
115
  """
118
116
  Request a particular feature
119
117
 
120
118
  :param feature: Feature name
121
119
  :type feature: str
122
- :param stack_length: length of moving average in time
123
- :type stack_length: str
124
120
 
125
121
  """
126
- if self.endtime <= self.starttime:
122
+ if self.endtime < self.starttime:
127
123
  raise ValueError('Startime has to be smaller than endtime.')
128
124
 
129
125
  filename = self.feature_path(feature)
130
126
 
131
127
  logger.debug(
132
128
  f"Reading feature {feature} between {self.starttime} and {self.endtime}")
133
- num_periods = None
134
- if stack_length is not None:
135
- valid_stack_units = ['W', 'D', 'h', 'T', 'min', 'S']
136
- if re.match(r'\d*\s*(\w*)', stack_length).group(1) not in valid_stack_units:
137
- raise ValueError(
138
- 'Stack length should be one of: {}'.
139
- format(', '.join(valid_stack_units))
140
- )
141
-
142
- if pd.to_timedelta(stack_length) < pd.to_timedelta(interval):
143
- raise ValueError('Stack length {} is less than interval {}'.
144
- format(stack_length, interval))
145
-
146
- # Rewind starttime to account for stack length
147
- self.starttime -= pd.to_timedelta(stack_length)
148
-
149
- num_periods = (pd.to_timedelta(stack_length) /
150
- pd.to_timedelta(interval))
151
- if not num_periods.is_integer():
152
- raise ValueError(
153
- 'Stack length {} / interval {} = {}, but it needs'
154
- ' to be a whole number'.
155
- format(stack_length, interval, num_periods))
156
129
 
157
130
  xd_index = dict(datetime=slice(self.starttime, self.endtime))
158
- with xr.open_dataset(filename, group='original', engine=self.engine) as ds:
131
+ with xr.open_dataset(filename, group=group, engine=self.engine) as ds:
159
132
  rq = ds[feature].loc[xd_index].load()
160
133
  rq.attrs = ds.attrs
161
134
 
162
- # Stack features
163
- if stack_length is not None:
164
- logger.debug("Stacking feature...")
165
- try:
166
- xdf = rq.rolling(datetime=int(num_periods),
167
- center=False,
168
- min_periods=1).mean()
169
- # Return requested timeframe to that defined in initialisation
170
- self.starttime += pd.to_timedelta(stack_length)
171
- xdf_new = xdf.loc[self.starttime:self.endtime]
172
- xdf_new = xdf_new.rename(feature)
173
- except ValueError as e:
174
- logger.error(e)
175
- logger.error('Stack length {} is not valid for feature {}'.
176
- format(stack_length, feature))
177
- else:
178
- return xdf_new
179
135
  return rq
180
136
 
181
137
  def load(self, *args, **kwargs):
tonik/xarray2netcdf.py CHANGED
@@ -11,7 +11,7 @@ from cftime import date2num, num2date
11
11
  from .utils import merge_arrays
12
12
 
13
13
 
14
- def xarray2netcdf(xArray, fdir, rootGroupName="original", timedim="datetime",
14
+ def xarray2netcdf(xArray, fdir, group="original", timedim="datetime",
15
15
  archive_starttime=datetime(2000, 1, 1), resolution=None,
16
16
  mode='a'):
17
17
  """
@@ -23,7 +23,7 @@ def xarray2netcdf(xArray, fdir, rootGroupName="original", timedim="datetime",
23
23
  Data to store.
24
24
  fdir : str
25
25
  Directory to store data under.
26
- rootGroupName : str
26
+ group : str
27
27
  Hdf5 group name.
28
28
  timedim : str
29
29
  Name of time dimension.
@@ -49,22 +49,22 @@ def xarray2netcdf(xArray, fdir, rootGroupName="original", timedim="datetime",
49
49
  if os.path.isfile(h5file) and mode == 'a':
50
50
  if archive_starttime > data_starttime:
51
51
  xds_existing = xr.open_dataset(
52
- h5file, group='original', engine='h5netcdf')
52
+ h5file, group=group, engine='h5netcdf')
53
53
  xda_new = merge_arrays(
54
54
  xds_existing[featureName], xArray[featureName],
55
55
  resolution=resolution)
56
56
  xds_existing.close()
57
- xda_new.to_netcdf(h5file, group='original',
57
+ xda_new.to_netcdf(h5file, group=group,
58
58
  mode='w', engine='h5netcdf')
59
59
  continue
60
60
  _mode = 'a'
61
61
 
62
62
  with h5netcdf.File(h5file, _mode) as h5f:
63
63
  try:
64
- rootGrp = _create_h5_Structure(rootGroupName, featureName,
64
+ rootGrp = _create_h5_Structure(group, featureName,
65
65
  h5f, xArray, starttime, timedim)
66
66
  except ValueError: # group already exists, append
67
- rootGrp = h5f[rootGroupName]
67
+ rootGrp = h5f[group]
68
68
 
69
69
  # determine indices
70
70
  new_time = date2num(xArray[timedim].values.astype('datetime64[us]').astype(datetime),
tonik/xarray2zarr.py CHANGED
@@ -2,13 +2,14 @@ import logging
2
2
  import os
3
3
 
4
4
  import xarray as xr
5
+ from zarr.errors import PathNotFoundError
5
6
 
6
7
  from .utils import merge_arrays
7
8
 
8
9
  logger = logging.getLogger(__name__)
9
10
 
10
11
 
11
- def xarray2zarr(xds: xr.Dataset, path: str, mode: str = 'a'):
12
+ def xarray2zarr(xds: xr.Dataset, path: str, mode: str = 'a', group='original'):
12
13
  """
13
14
  Write xarray dataset to zarr files.
14
15
 
@@ -20,6 +21,8 @@ def xarray2zarr(xds: xr.Dataset, path: str, mode: str = 'a'):
20
21
  Path to write the dataset.
21
22
  mode : str, optional
22
23
  Write mode, by default 'a'.
24
+ group : str, optional
25
+ Group name, by default 'original'
23
26
 
24
27
  Returns
25
28
  -------
@@ -29,28 +32,32 @@ def xarray2zarr(xds: xr.Dataset, path: str, mode: str = 'a'):
29
32
  fout = os.path.join(path, feature + '.zarr')
30
33
  if not os.path.exists(fout) or mode == 'w':
31
34
  xds[feature].to_zarr(
32
- fout, group='original', mode='w')
35
+ fout, group=group, mode='w')
33
36
  else:
34
- xds_existing = xr.open_zarr(fout, group='original')
37
+ try:
38
+ xds_existing = xr.open_zarr(fout, group=group)
39
+ except PathNotFoundError:
40
+ xds[feature].to_zarr(fout, group=group, mode='a')
41
+ continue
35
42
  if xds_existing.datetime[0] > xds.datetime[0] or xds_existing.datetime[-1] > xds.datetime[-1]:
36
43
  xda_new = merge_arrays(xds_existing[feature], xds[feature])
37
- xda_new.to_zarr(fout, group='original', mode='w')
44
+ xda_new.to_zarr(fout, group=group, mode='w')
38
45
  else:
39
46
  try:
40
47
  overlap = xds_existing.datetime.where(
41
48
  xds_existing.datetime == xds.datetime)
42
49
  if overlap.size > 0:
43
50
  xds[feature].loc[dict(datetime=overlap)].to_zarr(
44
- fout, group='original', mode='r+', region='auto')
51
+ fout, group=group, mode='r+', region='auto')
45
52
  xds[feature].drop_sel(datetime=overlap).to_zarr(
46
- fout, group='original', mode='a', append_dim="datetime")
53
+ fout, group=group, mode='a', append_dim="datetime")
47
54
  else:
48
55
  xds[feature].to_zarr(
49
- fout, group='original', append_dim='datetime')
56
+ fout, group=group, append_dim='datetime')
50
57
  except Exception as e:
51
58
  msg = f"Appending {feature} to {fout} failed: {e}\n"
52
59
  msg += "Attempting to merge the two datasets."
53
60
  logger.error(msg)
54
61
  # remove duplicate datetime entries
55
62
  xda_new = merge_arrays(xds_existing[feature], xds[feature])
56
- xda_new.to_zarr(fout, group='original', mode='w')
63
+ xda_new.to_zarr(fout, group=group, mode='w')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: tonik
3
- Version: 0.1.3
3
+ Version: 0.1.4
4
4
  Summary: Store time series data as HDF5 files and access them through an API.
5
5
  Project-URL: Homepage, https://tsc-tools.github.io/tonik
6
6
  Project-URL: Issues, https://github.com/tsc-tools/tonik/issues
@@ -0,0 +1,12 @@
1
+ tonik/__init__.py,sha256=ZBVGh4dm_l9xwiBGb33O5QV9MfZeNiEd3DBDAm6DiHk,511
2
+ tonik/api.py,sha256=8YS0WCMlm5xs2N6V8n9BiP87ywJC_BUzYsPvWR9KWJk,6499
3
+ tonik/storage.py,sha256=GNJ6w9VHOeTR_ZJMZ-Ipqe3nFK2I91fkHYwg1k9bEuo,9470
4
+ tonik/utils.py,sha256=3nSRU_GnV6arP4e63YHn4oEV8XbqzVAW8FCvQVIwGdg,2757
5
+ tonik/xarray2netcdf.py,sha256=Bjt7kytnrlBgZrVNmRGir9TfNqyvW_ZNrwYBNvxiWio,5199
6
+ tonik/xarray2zarr.py,sha256=EhEo5kqzrKyXR37RX2zYtgOAviZdRqstZi_4ZtUmSDc,2342
7
+ tonik/package_data/index.html,sha256=GKDClUhIam_fAYbNfzAolORhSCG3ae1wW3VjWCg4PMk,2732
8
+ tonik-0.1.4.dist-info/METADATA,sha256=GkFoQugVoozfs2jTWNchQujP5RDATzeo1s7rqkx63GQ,1938
9
+ tonik-0.1.4.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
10
+ tonik-0.1.4.dist-info/entry_points.txt,sha256=VnGfC5qAzpntEHAb5pooUEpYABSgOfQoNhCEtLDJyf8,45
11
+ tonik-0.1.4.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
12
+ tonik-0.1.4.dist-info/RECORD,,
@@ -1,12 +0,0 @@
1
- tonik/__init__.py,sha256=ZBVGh4dm_l9xwiBGb33O5QV9MfZeNiEd3DBDAm6DiHk,511
2
- tonik/api.py,sha256=8YS0WCMlm5xs2N6V8n9BiP87ywJC_BUzYsPvWR9KWJk,6499
3
- tonik/storage.py,sha256=DNIfNb3oCgICcUI_MADQjQdUqkmZJIu42zCYGRs9ers,11512
4
- tonik/utils.py,sha256=3nSRU_GnV6arP4e63YHn4oEV8XbqzVAW8FCvQVIwGdg,2757
5
- tonik/xarray2netcdf.py,sha256=w8ubAWa2yCnk9-JQ_e7CHdfEjvjcK1pXol8Qy_PT2hY,5241
6
- tonik/xarray2zarr.py,sha256=jTrHFpjN3lEDIJEI6RyPvvbW2jZsfYwGE_LQaZ6dwx8,2099
7
- tonik/package_data/index.html,sha256=GKDClUhIam_fAYbNfzAolORhSCG3ae1wW3VjWCg4PMk,2732
8
- tonik-0.1.3.dist-info/METADATA,sha256=DaGP_RFy6luf75DQMr9UA3cfJXlOVJbdHFPU09RCE_4,1938
9
- tonik-0.1.3.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
10
- tonik-0.1.3.dist-info/entry_points.txt,sha256=VnGfC5qAzpntEHAb5pooUEpYABSgOfQoNhCEtLDJyf8,45
11
- tonik-0.1.3.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
12
- tonik-0.1.3.dist-info/RECORD,,
File without changes