tonik 0.0.7__py3-none-any.whl → 0.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tonik/__init__.py CHANGED
@@ -2,7 +2,7 @@ import importlib
2
2
  from os import PathLike
3
3
  from typing import Optional
4
4
 
5
- from .storage import StorageGroup, Path
5
+ from .storage import Storage, Path
6
6
  from .utils import generate_test_data
7
7
 
8
8
 
@@ -20,4 +20,4 @@ def get_data(filename: Optional[PathLike] = None) -> str:
20
20
 
21
21
  """
22
22
  f = importlib.resources.files(__package__)
23
- return str(f) if filename is None else str(f / filename)
23
+ return str(f) if filename is None else str(f / filename)
tonik/api.py CHANGED
@@ -15,7 +15,7 @@ from fastapi.responses import HTMLResponse, StreamingResponse
15
15
  from pydantic import BaseModel
16
16
  from typing import Annotated
17
17
 
18
- from .storage import StorageGroup
18
+ from .storage import Storage
19
19
  from . import get_data
20
20
 
21
21
  logger = logging.getLogger(__name__)
@@ -66,12 +66,12 @@ class TonikAPI:
66
66
  subdir: Annotated[list[str] | None, Query()] = None):
67
67
  _st = self.preprocess_datetime(starttime)
68
68
  _et = self.preprocess_datetime(endtime)
69
- g = StorageGroup(group, rootdir=self.rootdir,
70
- starttime=_st, endtime=_et)
69
+ g = Storage(group, rootdir=self.rootdir,
70
+ starttime=_st, endtime=_et)
71
71
  if subdir is None:
72
72
  c = g
73
73
  else:
74
- c = g.get_store(*subdir)
74
+ c = g.get_substore(*subdir)
75
75
  try:
76
76
  feat = c(name)
77
77
  except ValueError as e:
@@ -137,11 +137,21 @@ class TonikAPI:
137
137
  d, units='hours since 1970-01-01 00:00:00.0', calendar='gregorian')
138
138
  return freq, dates, spec
139
139
 
140
- def inventory(self, group: str) -> dict:
141
- sg = StorageGroup(group, rootdir=self.rootdir)
142
- return sg.to_dict()
143
-
144
- # ta = TonikAPI('/tmp').feature()
140
+ def inventory(self, group: str, subdir: Annotated[list[str] | None, Query()] = None, tree: bool = True) -> list | dict:
141
+ sg = Storage(group, rootdir=self.rootdir, create=False)
142
+ try:
143
+ c = sg.get_substore(*subdir)
144
+ except TypeError:
145
+ c = sg
146
+ except FileNotFoundError as e:
147
+ msg = "Directory {} not found.".format(
148
+ '/'.join([sg.path] + subdir))
149
+ raise HTTPException(status_code=404, detail=msg)
150
+ if tree and not subdir:
151
+ return sg.to_dict()
152
+ else:
153
+ dir_contents = os.listdir(c.path)
154
+ return [fn.replace('.nc', '').replace('.zarr', '') for fn in dir_contents]
145
155
 
146
156
 
147
157
  def main(argv=None):
tonik/storage.py CHANGED
@@ -1,19 +1,13 @@
1
- from datetime import datetime, timedelta
2
- import json
3
- import glob
4
1
  import logging
5
2
  import logging.config
6
3
  import os
7
4
  import re
8
- import tempfile
9
5
 
10
6
  import pandas as pd
11
7
  import xarray as xr
12
8
 
13
9
  from .xarray2hdf5 import xarray2hdf5
14
-
15
-
16
- ERROR_LOG_FILENAME = "tonik.log"
10
+ from .xarray2zarr import xarray2zarr
17
11
 
18
12
  LOGGING_CONFIG = {
19
13
  "version": 1,
@@ -51,14 +45,6 @@ LOGGING_CONFIG = {
51
45
  },
52
46
  },
53
47
  "handlers": {
54
- "logfile": { # The handler name
55
- "formatter": "json", # Refer to the formatter defined above
56
- "level": "ERROR", # FILTER: Only ERROR and CRITICAL logs
57
- "class": "logging.handlers.RotatingFileHandler", # OUTPUT: Which class to use
58
- # Param for class above. Defines filename to use, load it from constant
59
- "filename": ERROR_LOG_FILENAME,
60
- "backupCount": 2, # Param for class above. Defines how many log files to keep as it grows
61
- },
62
48
  "simple": { # The handler name
63
49
  "formatter": "default", # Refer to the formatter defined above
64
50
  "class": "logging.StreamHandler", # OUTPUT: Same as above, stream to console
@@ -66,7 +52,7 @@ LOGGING_CONFIG = {
66
52
  },
67
53
  },
68
54
  "loggers": {
69
- "zizou": { # The name of the logger, this SHOULD match your module!
55
+ "storage": { # The name of the logger, this SHOULD match your module!
70
56
  "level": "DEBUG", # FILTER: only INFO logs onwards from "tryceratops" logger
71
57
  "handlers": [
72
58
  "simple", # Refer the handler defined above
@@ -74,9 +60,9 @@ LOGGING_CONFIG = {
74
60
  },
75
61
  },
76
62
  "root": {
77
- "level": "ERROR", # FILTER: only INFO logs onwards
63
+ "level": "INFO", # FILTER: only INFO logs onwards
78
64
  "handlers": [
79
- "logfile", # Refer the handler defined above
65
+ "simple", # Refer the handler defined above
80
66
  ]
81
67
  },
82
68
  }
@@ -86,13 +72,19 @@ logger = logging.getLogger("__name__")
86
72
 
87
73
 
88
74
  class Path(object):
89
- def __init__(self, name, parentdir):
75
+ def __init__(self, name, parentdir, create=True, backend='zarr'):
90
76
  self.name = name
77
+ self.create = create
78
+ self.backend = backend
91
79
  self.path = os.path.join(parentdir, name)
92
- try:
93
- os.makedirs(self.path, exist_ok=True)
94
- except FileExistsError:
95
- pass
80
+ if create:
81
+ try:
82
+ os.makedirs(self.path, exist_ok=True)
83
+ except FileExistsError:
84
+ pass
85
+ else:
86
+ if not os.path.exists(self.path):
87
+ raise FileNotFoundError(f"Path {self.path} not found")
96
88
  self.children = {}
97
89
 
98
90
  def __str__(self):
@@ -104,14 +96,20 @@ class Path(object):
104
96
  try:
105
97
  return self.children[key]
106
98
  except KeyError:
107
- self.children[key] = Path(key, self.path)
99
+ self.children[key] = Path(
100
+ key, self.path, self.create, self.backend)
108
101
  return self.children[key]
109
102
 
110
103
  def feature_path(self, feature):
111
- _feature_path = os.path.join(self.path, feature + ".nc")
104
+
105
+ if self.backend == 'h5netcdf':
106
+ file_ending = '.nc'
107
+ elif self.backend == 'zarr':
108
+ file_ending = '.zarr'
109
+ _feature_path = os.path.join(self.path, feature + file_ending)
112
110
  if not os.path.exists(_feature_path):
113
111
  raise FileNotFoundError(f"File {_feature_path} not found")
114
- self.children[feature] = Path(feature + ".nc", self.path)
112
+ self.children[feature] = Path(feature + file_ending, self.path)
115
113
  return _feature_path
116
114
 
117
115
  def __call__(self, feature, stack_length=None, interval='10min'):
@@ -134,8 +132,7 @@ class Path(object):
134
132
  num_periods = None
135
133
  if stack_length is not None:
136
134
  valid_stack_units = ['W', 'D', 'h', 'T', 'min', 'S']
137
- if not re.match(r'\d*\s*(\w*)', stack_length).group(1)\
138
- in valid_stack_units:
135
+ if re.match(r'\d*\s*(\w*)', stack_length).group(1) not in valid_stack_units:
139
136
  raise ValueError(
140
137
  'Stack length should be one of: {}'.
141
138
  format(', '.join(valid_stack_units))
@@ -157,9 +154,12 @@ class Path(object):
157
154
  format(stack_length, interval, num_periods))
158
155
 
159
156
  xd_index = dict(datetime=slice(self.starttime, self.endtime))
160
- with xr.open_dataset(filename, group='original', engine='h5netcdf') as ds:
161
- ds.sortby("datetime")
162
- rq = ds.loc[xd_index].load()
157
+ with xr.open_dataset(filename, group='original', engine=self.backend) as ds:
158
+ try:
159
+ rq = ds.loc[xd_index].load()
160
+ except KeyError:
161
+ ds = ds.sortby("datetime")
162
+ rq = ds.loc[xd_index].load()
163
163
 
164
164
  # Stack features
165
165
  if stack_length is not None:
@@ -191,10 +191,13 @@ class Path(object):
191
191
  """
192
192
  Save a feature to disk
193
193
  """
194
- xarray2hdf5(data, self.path, **kwargs)
194
+ if self.backend == 'h5netcdf':
195
+ xarray2hdf5(data, self.path, **kwargs)
196
+ elif self.backend == 'zarr':
197
+ xarray2zarr(data, self.path, **kwargs)
195
198
 
196
199
 
197
- class StorageGroup(Path):
200
+ class Storage(Path):
198
201
  """
199
202
  Query computed features
200
203
 
@@ -206,7 +209,7 @@ class StorageGroup(Path):
206
209
  :type endtime: :class:`datetime.datetime`
207
210
 
208
211
  >>> import datetime
209
- >>> g = Group('Whakaari')
212
+ >>> g = Storage('Whakaari', /tmp)
210
213
  >>> start = datetime.datetime(2012,1,1,0,0,0)
211
214
  >>> end = datetime.datetime(2012,1,2,23,59,59)
212
215
  >>> g.starttime = start
@@ -215,11 +218,11 @@ class StorageGroup(Path):
215
218
  >>> rsam = c("rsam")
216
219
  """
217
220
 
218
- def __init__(self, name, rootdir=None, starttime=None, endtime=None):
221
+ def __init__(self, name, rootdir, starttime=None, endtime=None, create=True, backend='zarr'):
219
222
  self.stores = set()
220
223
  self.starttime = starttime
221
224
  self.endtime = endtime
222
- super().__init__(name, rootdir)
225
+ super().__init__(name, rootdir, create, backend)
223
226
 
224
227
  def print_tree(self, site, indent=0, output=''):
225
228
  output += ' ' * indent + site.path + '\n'
@@ -232,7 +235,7 @@ class StorageGroup(Path):
232
235
  rstr = self.print_tree(self, 0, rstr)
233
236
  return rstr
234
237
 
235
- def get_store(self, *args):
238
+ def get_substore(self, *args):
236
239
  # return the store for a given site, sensor, or channel
237
240
  # if one of them is None return the store for the level above
238
241
  # if all are None return the root store
@@ -257,30 +260,35 @@ class StorageGroup(Path):
257
260
  try:
258
261
  subdirs = root.split(self.path)[1].split(os.sep)[1:]
259
262
  except IndexError:
260
- st = self.get_store()
263
+ st = self.get_substore()
261
264
  else:
262
265
  try:
263
- st = self.get_store(*subdirs)
266
+ st = self.get_substore(*subdirs)
264
267
  except TypeError as e:
265
268
  raise e
266
269
  for _f in files:
267
270
  if _f.endswith('.nc'):
268
- st.feature_path(_f.replace('.nc', ''))
271
+ st.feature_path(_f.replace(
272
+ '.nc', '').replace('.zarr', ''))
269
273
 
270
274
  @staticmethod
271
275
  def directory_tree_to_dict(path):
272
276
  name = os.path.basename(path)
273
- if os.path.isdir(path):
274
- return {name: [StorageGroup.directory_tree_to_dict(os.path.join(path, child)) for child in sorted(os.listdir(path))]}
277
+ if name.endswith('.zarr'):
278
+ return name.replace('.zarr', '')
279
+ elif os.path.isdir(path):
280
+ return {name: [Storage.directory_tree_to_dict(os.path.join(path, child)) for child in sorted(os.listdir(path))]}
275
281
  else:
276
- if path.endswith('.nc'):
282
+ if name.endswith('.nc'):
277
283
  return name.replace('.nc', '')
284
+ else:
285
+ return
278
286
 
279
287
  def to_dict(self):
280
288
  """
281
289
  Convert the storage group to json
282
290
  """
283
- return StorageGroup.directory_tree_to_dict(self.path)
291
+ return Storage.directory_tree_to_dict(self.path)
284
292
 
285
293
  def get_starttime(self):
286
294
  return self.__starttime
tonik/utils.py CHANGED
@@ -6,8 +6,9 @@ import xarray as xr
6
6
 
7
7
 
8
8
  def generate_test_data(dim=1, ndays=30, nfreqs=10,
9
- tstart=datetime.utcnow(),
10
- feature_name=None,
9
+ tstart=datetime.now(),
10
+ freq='10min', intervals=None,
11
+ feature_name=None, seed=42,
11
12
  freq_name=None, add_nans=True):
12
13
  """
13
14
  Generate a 1D or 2D feature for testing.
@@ -15,9 +16,12 @@ def generate_test_data(dim=1, ndays=30, nfreqs=10,
15
16
  assert dim < 3
16
17
  assert dim > 0
17
18
 
18
- nints = ndays * 6 * 24
19
- dates = pd.date_range(tstart.strftime('%Y-%m-%d'), freq='10min', periods=nints)
20
- rs = np.random.default_rng(42)
19
+ if intervals is None:
20
+ nints = ndays * 6 * 24
21
+ else:
22
+ nints = intervals
23
+ dates = pd.date_range(tstart, freq=freq, periods=nints)
24
+ rs = np.random.default_rng(seed)
21
25
  # Random walk as test signal
22
26
  data = np.abs(np.cumsum(rs.normal(0, 8., len(dates))))
23
27
  if dim == 2:
@@ -29,7 +33,8 @@ def generate_test_data(dim=1, ndays=30, nfreqs=10,
29
33
  data[idx_nan] = np.nan
30
34
  if feature_name is None:
31
35
  feature_name = 'rsam'
32
- xrd = xr.Dataset({feature_name: xr.DataArray(data, coords=[dates], dims=['datetime'])})
36
+ xrd = xr.Dataset({feature_name: xr.DataArray(
37
+ data, coords=[dates], dims=['datetime'])})
33
38
  if dim == 2:
34
39
  if add_nans:
35
40
  data[:, idx_nan] = np.nan
@@ -38,9 +43,10 @@ def generate_test_data(dim=1, ndays=30, nfreqs=10,
38
43
  feature_name = 'ssam'
39
44
  if freq_name is None:
40
45
  freq_name = 'frequency'
41
- xrd = xr.Dataset({feature_name: xr.DataArray(data, coords=[freqs, dates], dims=[freq_name, 'datetime'])})
46
+ xrd = xr.Dataset({feature_name: xr.DataArray(
47
+ data, coords=[freqs, dates], dims=[freq_name, 'datetime'])})
42
48
  xrd.attrs['starttime'] = dates[0].isoformat()
43
49
  xrd.attrs['endtime'] = dates[-1].isoformat()
44
50
  xrd.attrs['station'] = 'MDR'
45
51
  xrd.attrs['interval'] = '10min'
46
- return xrd
52
+ return xrd
tonik/xarray2zarr.py ADDED
@@ -0,0 +1,23 @@
1
+ import os
2
+
3
+ import xarray as xr
4
+
5
+
6
+ def xarray2zarr(xds, path, mode='a'):
7
+ for feature in xds.data_vars.keys():
8
+ fout = os.path.join(path, feature + '.zarr')
9
+ if not os.path.exists(fout) or mode == 'w':
10
+ xds[feature].to_zarr(
11
+ fout, group='original', mode='w')
12
+ else:
13
+ xds_existing = xr.open_zarr(fout, group='original')
14
+ overlap = xds_existing.datetime.where(
15
+ xds_existing.datetime == xds.datetime)
16
+ if overlap.size > 0:
17
+ xds.loc[dict(datetime=overlap)].to_zarr(
18
+ fout, group='original', mode='r+', region='auto')
19
+ xds.drop_sel(datetime=overlap).to_zarr(
20
+ fout, group='original', mode='a', append_dim="datetime")
21
+ else:
22
+ xds[feature].to_zarr(
23
+ fout, group='original', append_dim='datetime')
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.1
2
2
  Name: tonik
3
- Version: 0.0.7
3
+ Version: 0.0.8
4
4
  Summary: Store time series data as HDF5 files and access them through an API.
5
5
  Project-URL: Homepage, https://tsc-tools.github.io/tonik
6
6
  Project-URL: Issues, https://github.com/tsc-tools/tonik/issues
@@ -20,6 +20,7 @@ Requires-Dist: pandas>=2.0
20
20
  Requires-Dist: python-json-logger>=2.0
21
21
  Requires-Dist: uvicorn[standard]>=0.22
22
22
  Requires-Dist: xarray>=2023.4
23
+ Requires-Dist: zarr
23
24
  Provides-Extra: dev
24
25
  Requires-Dist: mkdocs; extra == 'dev'
25
26
  Requires-Dist: mkdocs-jupyter; extra == 'dev'
@@ -0,0 +1,12 @@
1
+ tonik/__init__.py,sha256=ZBVGh4dm_l9xwiBGb33O5QV9MfZeNiEd3DBDAm6DiHk,511
2
+ tonik/api.py,sha256=vdsWHNGGWo4sbqlDyZQj2tX5oe6hAWCzyL6ffsfpCB4,6437
3
+ tonik/storage.py,sha256=sScIFA4KXURNPwTnV-rvDh6cWCy9sRrErr9BshZpw2I,11303
4
+ tonik/utils.py,sha256=YD2zZx5nKGfTJKTYTsEZVV78uNRCSakvU_6X6Mgwx-s,1664
5
+ tonik/xarray2hdf5.py,sha256=cekO9vo9ZRlr0VndswJjPC27CEVD3TpRVKLAJ-aAO0g,4465
6
+ tonik/xarray2zarr.py,sha256=d7FAOe7DESbKC9CZS41r62DjlNy0S8ik01lMGXBvJKw,901
7
+ tonik/package_data/index.html,sha256=GKDClUhIam_fAYbNfzAolORhSCG3ae1wW3VjWCg4PMk,2732
8
+ tonik-0.0.8.dist-info/METADATA,sha256=uqrHvBl01n05hqWN8lnNETuoYIGZ8vNq9QWnRmajcEY,1938
9
+ tonik-0.0.8.dist-info/WHEEL,sha256=KGYbc1zXlYddvwxnNty23BeaKzh7YuoSIvIMO4jEhvw,87
10
+ tonik-0.0.8.dist-info/entry_points.txt,sha256=VnGfC5qAzpntEHAb5pooUEpYABSgOfQoNhCEtLDJyf8,45
11
+ tonik-0.0.8.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
12
+ tonik-0.0.8.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.25.0
2
+ Generator: hatchling 1.17.1
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,11 +0,0 @@
1
- tonik/__init__.py,sha256=p97Bbz-yujI-uNmbqn1S61lq-zfF1VPaS5c1fxs1Fa8,516
2
- tonik/api.py,sha256=gnwoss7UV8FaY92xzumhcoVPjkzB695qgByHUYcLSw4,5916
3
- tonik/storage.py,sha256=pJnvoGFb8uZqnpkjOsgnntW-a7dhKVlvevs725nAS54,11009
4
- tonik/utils.py,sha256=nV0lK8Azasr8LUuQGXxfxef6nU3bn3dCTQnQTmWsKAY,1534
5
- tonik/xarray2hdf5.py,sha256=cekO9vo9ZRlr0VndswJjPC27CEVD3TpRVKLAJ-aAO0g,4465
6
- tonik/package_data/index.html,sha256=GKDClUhIam_fAYbNfzAolORhSCG3ae1wW3VjWCg4PMk,2732
7
- tonik-0.0.7.dist-info/METADATA,sha256=6DhYEfnEAWSKLEZJQQRiRF_cZAGAQFK6mLmHQEYJbuE,1918
8
- tonik-0.0.7.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
9
- tonik-0.0.7.dist-info/entry_points.txt,sha256=VnGfC5qAzpntEHAb5pooUEpYABSgOfQoNhCEtLDJyf8,45
10
- tonik-0.0.7.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
11
- tonik-0.0.7.dist-info/RECORD,,