tonik 0.0.7__py3-none-any.whl → 0.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tonik/__init__.py CHANGED
@@ -2,7 +2,7 @@ import importlib
2
2
  from os import PathLike
3
3
  from typing import Optional
4
4
 
5
- from .storage import StorageGroup, Path
5
+ from .storage import Storage, Path
6
6
  from .utils import generate_test_data
7
7
 
8
8
 
@@ -20,4 +20,4 @@ def get_data(filename: Optional[PathLike] = None) -> str:
20
20
 
21
21
  """
22
22
  f = importlib.resources.files(__package__)
23
- return str(f) if filename is None else str(f / filename)
23
+ return str(f) if filename is None else str(f / filename)
tonik/api.py CHANGED
@@ -1,22 +1,21 @@
1
- from argparse import ArgumentParser
2
- from datetime import timedelta, datetime, timezone
3
1
  import logging
4
2
  import os
3
+ from argparse import ArgumentParser
4
+ from datetime import datetime
5
+ from typing import Annotated
5
6
  from urllib.parse import unquote
6
7
 
7
- from cftime import num2date, date2num
8
8
  import datashader as dsh
9
9
  import numpy as np
10
10
  import pandas as pd
11
11
  import uvicorn
12
+ from cftime import date2num, num2date
12
13
  from fastapi import FastAPI, HTTPException, Query
13
14
  from fastapi.middleware.cors import CORSMiddleware
14
15
  from fastapi.responses import HTMLResponse, StreamingResponse
15
- from pydantic import BaseModel
16
- from typing import Annotated
17
16
 
18
- from .storage import StorageGroup
19
17
  from . import get_data
18
+ from .storage import Storage
20
19
 
21
20
  logger = logging.getLogger(__name__)
22
21
 
@@ -66,12 +65,13 @@ class TonikAPI:
66
65
  subdir: Annotated[list[str] | None, Query()] = None):
67
66
  _st = self.preprocess_datetime(starttime)
68
67
  _et = self.preprocess_datetime(endtime)
69
- g = StorageGroup(group, rootdir=self.rootdir,
70
- starttime=_st, endtime=_et)
68
+ g = Storage(group, rootdir=self.rootdir,
69
+ starttime=_st, endtime=_et,
70
+ create=False)
71
71
  if subdir is None:
72
72
  c = g
73
73
  else:
74
- c = g.get_store(*subdir)
74
+ c = g.get_substore(*subdir)
75
75
  try:
76
76
  feat = c(name)
77
77
  except ValueError as e:
@@ -103,13 +103,15 @@ class TonikAPI:
103
103
  else:
104
104
  df = pd.DataFrame(data=feat.to_pandas(), columns=[feat.name])
105
105
  df['dates'] = df.index
106
- try:
107
- current_resolution = pd.Timedelta(df['dates'].diff().mean())
108
- if current_resolution < pd.Timedelta(resolution):
109
- df = df.resample(pd.Timedelta(resolution)).mean()
110
- except ValueError as e:
111
- logger.warning(
112
- f"Cannot resample {feat.name} to {resolution}: e")
106
+ if resolution != 'full':
107
+ try:
108
+ current_resolution = pd.Timedelta(
109
+ df['dates'].diff().mean())
110
+ if current_resolution < pd.Timedelta(resolution):
111
+ df = df.resample(pd.Timedelta(resolution)).mean()
112
+ except ValueError:
113
+ logger.warning(
114
+ f"Cannot resample {feat.name} to {resolution}: e")
113
115
  df.rename(columns={feat.name: 'feature'}, inplace=True)
114
116
  output = df.to_csv(index=False, columns=['dates', 'feature'])
115
117
  return StreamingResponse(iter([output]),
@@ -137,11 +139,21 @@ class TonikAPI:
137
139
  d, units='hours since 1970-01-01 00:00:00.0', calendar='gregorian')
138
140
  return freq, dates, spec
139
141
 
140
- def inventory(self, group: str) -> dict:
141
- sg = StorageGroup(group, rootdir=self.rootdir)
142
- return sg.to_dict()
143
-
144
- # ta = TonikAPI('/tmp').feature()
142
+ def inventory(self, group: str, subdir: Annotated[list[str] | None, Query()] = None, tree: bool = True) -> list | dict:
143
+ sg = Storage(group, rootdir=self.rootdir, create=False)
144
+ try:
145
+ c = sg.get_substore(*subdir)
146
+ except TypeError:
147
+ c = sg
148
+ except FileNotFoundError:
149
+ msg = "Directory {} not found.".format(
150
+ '/'.join([sg.path] + subdir))
151
+ raise HTTPException(status_code=404, detail=msg)
152
+ if tree and not subdir:
153
+ return sg.to_dict()
154
+ else:
155
+ dir_contents = os.listdir(c.path)
156
+ return [fn.replace('.nc', '').replace('.zarr', '') for fn in dir_contents]
145
157
 
146
158
 
147
159
  def main(argv=None):
tonik/storage.py CHANGED
@@ -1,19 +1,13 @@
1
- from datetime import datetime, timedelta
2
- import json
3
- import glob
4
1
  import logging
5
2
  import logging.config
6
3
  import os
7
4
  import re
8
- import tempfile
9
5
 
10
6
  import pandas as pd
11
7
  import xarray as xr
12
8
 
13
9
  from .xarray2hdf5 import xarray2hdf5
14
-
15
-
16
- ERROR_LOG_FILENAME = "tonik.log"
10
+ from .xarray2zarr import xarray2zarr
17
11
 
18
12
  LOGGING_CONFIG = {
19
13
  "version": 1,
@@ -51,14 +45,6 @@ LOGGING_CONFIG = {
51
45
  },
52
46
  },
53
47
  "handlers": {
54
- "logfile": { # The handler name
55
- "formatter": "json", # Refer to the formatter defined above
56
- "level": "ERROR", # FILTER: Only ERROR and CRITICAL logs
57
- "class": "logging.handlers.RotatingFileHandler", # OUTPUT: Which class to use
58
- # Param for class above. Defines filename to use, load it from constant
59
- "filename": ERROR_LOG_FILENAME,
60
- "backupCount": 2, # Param for class above. Defines how many log files to keep as it grows
61
- },
62
48
  "simple": { # The handler name
63
49
  "formatter": "default", # Refer to the formatter defined above
64
50
  "class": "logging.StreamHandler", # OUTPUT: Same as above, stream to console
@@ -66,7 +52,7 @@ LOGGING_CONFIG = {
66
52
  },
67
53
  },
68
54
  "loggers": {
69
- "zizou": { # The name of the logger, this SHOULD match your module!
55
+ "storage": { # The name of the logger, this SHOULD match your module!
70
56
  "level": "DEBUG", # FILTER: only INFO logs onwards from "tryceratops" logger
71
57
  "handlers": [
72
58
  "simple", # Refer the handler defined above
@@ -74,9 +60,9 @@ LOGGING_CONFIG = {
74
60
  },
75
61
  },
76
62
  "root": {
77
- "level": "ERROR", # FILTER: only INFO logs onwards
63
+ "level": "INFO", # FILTER: only INFO logs onwards
78
64
  "handlers": [
79
- "logfile", # Refer the handler defined above
65
+ "simple", # Refer the handler defined above
80
66
  ]
81
67
  },
82
68
  }
@@ -86,13 +72,19 @@ logger = logging.getLogger("__name__")
86
72
 
87
73
 
88
74
  class Path(object):
89
- def __init__(self, name, parentdir):
75
+ def __init__(self, name, parentdir, create=True, backend='zarr'):
90
76
  self.name = name
77
+ self.create = create
78
+ self.backend = backend
91
79
  self.path = os.path.join(parentdir, name)
92
- try:
93
- os.makedirs(self.path, exist_ok=True)
94
- except FileExistsError:
95
- pass
80
+ if create:
81
+ try:
82
+ os.makedirs(self.path, exist_ok=True)
83
+ except FileExistsError:
84
+ pass
85
+ else:
86
+ if not os.path.exists(self.path):
87
+ raise FileNotFoundError(f"Path {self.path} not found")
96
88
  self.children = {}
97
89
 
98
90
  def __str__(self):
@@ -104,14 +96,20 @@ class Path(object):
104
96
  try:
105
97
  return self.children[key]
106
98
  except KeyError:
107
- self.children[key] = Path(key, self.path)
99
+ self.children[key] = Path(
100
+ key, self.path, self.create, self.backend)
108
101
  return self.children[key]
109
102
 
110
103
  def feature_path(self, feature):
111
- _feature_path = os.path.join(self.path, feature + ".nc")
104
+
105
+ if self.backend == 'h5netcdf':
106
+ file_ending = '.nc'
107
+ elif self.backend == 'zarr':
108
+ file_ending = '.zarr'
109
+ _feature_path = os.path.join(self.path, feature + file_ending)
112
110
  if not os.path.exists(_feature_path):
113
111
  raise FileNotFoundError(f"File {_feature_path} not found")
114
- self.children[feature] = Path(feature + ".nc", self.path)
112
+ self.children[feature] = Path(feature + file_ending, self.path)
115
113
  return _feature_path
116
114
 
117
115
  def __call__(self, feature, stack_length=None, interval='10min'):
@@ -134,8 +132,7 @@ class Path(object):
134
132
  num_periods = None
135
133
  if stack_length is not None:
136
134
  valid_stack_units = ['W', 'D', 'h', 'T', 'min', 'S']
137
- if not re.match(r'\d*\s*(\w*)', stack_length).group(1)\
138
- in valid_stack_units:
135
+ if re.match(r'\d*\s*(\w*)', stack_length).group(1) not in valid_stack_units:
139
136
  raise ValueError(
140
137
  'Stack length should be one of: {}'.
141
138
  format(', '.join(valid_stack_units))
@@ -157,9 +154,12 @@ class Path(object):
157
154
  format(stack_length, interval, num_periods))
158
155
 
159
156
  xd_index = dict(datetime=slice(self.starttime, self.endtime))
160
- with xr.open_dataset(filename, group='original', engine='h5netcdf') as ds:
161
- ds.sortby("datetime")
162
- rq = ds.loc[xd_index].load()
157
+ with xr.open_dataset(filename, group='original', engine=self.backend) as ds:
158
+ try:
159
+ rq = ds.loc[xd_index].load()
160
+ except KeyError:
161
+ ds = ds.sortby("datetime")
162
+ rq = ds.loc[xd_index].load()
163
163
 
164
164
  # Stack features
165
165
  if stack_length is not None:
@@ -191,10 +191,13 @@ class Path(object):
191
191
  """
192
192
  Save a feature to disk
193
193
  """
194
- xarray2hdf5(data, self.path, **kwargs)
194
+ if self.backend == 'h5netcdf':
195
+ xarray2hdf5(data, self.path, **kwargs)
196
+ elif self.backend == 'zarr':
197
+ xarray2zarr(data, self.path, **kwargs)
195
198
 
196
199
 
197
- class StorageGroup(Path):
200
+ class Storage(Path):
198
201
  """
199
202
  Query computed features
200
203
 
@@ -206,7 +209,7 @@ class StorageGroup(Path):
206
209
  :type endtime: :class:`datetime.datetime`
207
210
 
208
211
  >>> import datetime
209
- >>> g = Group('Whakaari')
212
+ >>> g = Storage('Whakaari', /tmp)
210
213
  >>> start = datetime.datetime(2012,1,1,0,0,0)
211
214
  >>> end = datetime.datetime(2012,1,2,23,59,59)
212
215
  >>> g.starttime = start
@@ -215,11 +218,11 @@ class StorageGroup(Path):
215
218
  >>> rsam = c("rsam")
216
219
  """
217
220
 
218
- def __init__(self, name, rootdir=None, starttime=None, endtime=None):
221
+ def __init__(self, name, rootdir, starttime=None, endtime=None, create=True, backend='zarr'):
219
222
  self.stores = set()
220
223
  self.starttime = starttime
221
224
  self.endtime = endtime
222
- super().__init__(name, rootdir)
225
+ super().__init__(name, rootdir, create, backend)
223
226
 
224
227
  def print_tree(self, site, indent=0, output=''):
225
228
  output += ' ' * indent + site.path + '\n'
@@ -232,7 +235,7 @@ class StorageGroup(Path):
232
235
  rstr = self.print_tree(self, 0, rstr)
233
236
  return rstr
234
237
 
235
- def get_store(self, *args):
238
+ def get_substore(self, *args):
236
239
  # return the store for a given site, sensor, or channel
237
240
  # if one of them is None return the store for the level above
238
241
  # if all are None return the root store
@@ -257,30 +260,35 @@ class StorageGroup(Path):
257
260
  try:
258
261
  subdirs = root.split(self.path)[1].split(os.sep)[1:]
259
262
  except IndexError:
260
- st = self.get_store()
263
+ st = self.get_substore()
261
264
  else:
262
265
  try:
263
- st = self.get_store(*subdirs)
266
+ st = self.get_substore(*subdirs)
264
267
  except TypeError as e:
265
268
  raise e
266
269
  for _f in files:
267
270
  if _f.endswith('.nc'):
268
- st.feature_path(_f.replace('.nc', ''))
271
+ st.feature_path(_f.replace(
272
+ '.nc', '').replace('.zarr', ''))
269
273
 
270
274
  @staticmethod
271
275
  def directory_tree_to_dict(path):
272
276
  name = os.path.basename(path)
273
- if os.path.isdir(path):
274
- return {name: [StorageGroup.directory_tree_to_dict(os.path.join(path, child)) for child in sorted(os.listdir(path))]}
277
+ if name.endswith('.zarr'):
278
+ return name.replace('.zarr', '')
279
+ elif os.path.isdir(path):
280
+ return {name: [Storage.directory_tree_to_dict(os.path.join(path, child)) for child in sorted(os.listdir(path))]}
275
281
  else:
276
- if path.endswith('.nc'):
282
+ if name.endswith('.nc'):
277
283
  return name.replace('.nc', '')
284
+ else:
285
+ return
278
286
 
279
287
  def to_dict(self):
280
288
  """
281
289
  Convert the storage group to json
282
290
  """
283
- return StorageGroup.directory_tree_to_dict(self.path)
291
+ return Storage.directory_tree_to_dict(self.path)
284
292
 
285
293
  def get_starttime(self):
286
294
  return self.__starttime
tonik/utils.py CHANGED
@@ -6,41 +6,53 @@ import xarray as xr
6
6
 
7
7
 
8
8
  def generate_test_data(dim=1, ndays=30, nfreqs=10,
9
- tstart=datetime.utcnow(),
10
- feature_name=None,
11
- freq_name=None, add_nans=True):
9
+ tstart=datetime.now(),
10
+ freq='10min', intervals=None,
11
+ feature_names=None, seed=42,
12
+ freq_names=None, add_nans=True):
12
13
  """
13
14
  Generate a 1D or 2D feature for testing.
14
15
  """
15
16
  assert dim < 3
16
17
  assert dim > 0
17
18
 
18
- nints = ndays * 6 * 24
19
- dates = pd.date_range(tstart.strftime('%Y-%m-%d'), freq='10min', periods=nints)
20
- rs = np.random.default_rng(42)
19
+ if intervals is None:
20
+ nints = ndays * 6 * 24
21
+ else:
22
+ nints = intervals
23
+ dates = pd.date_range(tstart, freq=freq, periods=nints)
24
+ rs = np.random.default_rng(seed)
21
25
  # Random walk as test signal
22
26
  data = np.abs(np.cumsum(rs.normal(0, 8., len(dates))))
23
27
  if dim == 2:
24
28
  data = np.tile(data, (nfreqs, 1))
25
29
  # Add 10% NaNs
26
30
  idx_nan = rs.integers(0, nints-1, int(0.1*nints))
31
+
32
+ xds_dict = {}
27
33
  if dim == 1:
28
34
  if add_nans:
29
35
  data[idx_nan] = np.nan
30
- if feature_name is None:
31
- feature_name = 'rsam'
32
- xrd = xr.Dataset({feature_name: xr.DataArray(data, coords=[dates], dims=['datetime'])})
36
+ if feature_names is None:
37
+ feature_names = ['rsam', 'dsar']
38
+ for feature in feature_names:
39
+ xds_dict[feature] = xr.DataArray(
40
+ data, coords=[dates], dims=['datetime'])
33
41
  if dim == 2:
34
42
  if add_nans:
35
43
  data[:, idx_nan] = np.nan
36
44
  freqs = np.arange(nfreqs)
37
- if feature_name is None:
38
- feature_name = 'ssam'
39
- if freq_name is None:
40
- freq_name = 'frequency'
41
- xrd = xr.Dataset({feature_name: xr.DataArray(data, coords=[freqs, dates], dims=[freq_name, 'datetime'])})
42
- xrd.attrs['starttime'] = dates[0].isoformat()
43
- xrd.attrs['endtime'] = dates[-1].isoformat()
44
- xrd.attrs['station'] = 'MDR'
45
- xrd.attrs['interval'] = '10min'
46
- return xrd
45
+ if feature_names is None:
46
+ feature_names = ['ssam', 'filterbank']
47
+ if freq_names is None:
48
+ freq_names = ['frequency', 'fbfrequency']
49
+
50
+ for feature_name, freq_name in zip(feature_names, freq_names):
51
+ xds_dict[feature_name] = xr.DataArray(
52
+ data, coords=[freqs, dates], dims=[freq_name, 'datetime'])
53
+ xds = xr.Dataset(xds_dict)
54
+ xds.attrs['starttime'] = dates[0].isoformat()
55
+ xds.attrs['endtime'] = dates[-1].isoformat()
56
+ xds.attrs['station'] = 'MDR'
57
+ xds.attrs['interval'] = '10min'
58
+ return xds
tonik/xarray2zarr.py ADDED
@@ -0,0 +1,37 @@
1
+ import logging
2
+ import os
3
+
4
+ import xarray as xr
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+
9
+ def xarray2zarr(xds, path, mode='a'):
10
+ for feature in xds.data_vars.keys():
11
+ fout = os.path.join(path, feature + '.zarr')
12
+ if not os.path.exists(fout) or mode == 'w':
13
+ xds[feature].to_zarr(
14
+ fout, group='original', mode='w')
15
+ else:
16
+ xds_existing = xr.open_zarr(fout, group='original')
17
+ try:
18
+ overlap = xds_existing.datetime.where(
19
+ xds_existing.datetime == xds.datetime)
20
+ if overlap.size > 0:
21
+ xds[feature].loc[dict(datetime=overlap)].to_zarr(
22
+ fout, group='original', mode='r+', region='auto')
23
+ xds[feature].drop_sel(datetime=overlap).to_zarr(
24
+ fout, group='original', mode='a', append_dim="datetime")
25
+ else:
26
+ xds[feature].to_zarr(
27
+ fout, group='original', append_dim='datetime')
28
+ except Exception as e:
29
+ msg = f"Appending {feature} to {fout} failed: {e}\n"
30
+ msg += "Attempting to merge the two datasets."
31
+ logger.error(msg)
32
+ # remove duplicate datetime entries
33
+ xda_existing = xds_existing[feature].drop_duplicates(
34
+ 'datetime', keep='last')
35
+ xda_new = xds[feature].drop_duplicates('datetime', keep='last')
36
+ xda_new.combine_first(xda_existing).to_zarr(
37
+ fout, group='original', mode='w')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: tonik
3
- Version: 0.0.7
3
+ Version: 0.0.9
4
4
  Summary: Store time series data as HDF5 files and access them through an API.
5
5
  Project-URL: Homepage, https://tsc-tools.github.io/tonik
6
6
  Project-URL: Issues, https://github.com/tsc-tools/tonik/issues
@@ -20,6 +20,7 @@ Requires-Dist: pandas>=2.0
20
20
  Requires-Dist: python-json-logger>=2.0
21
21
  Requires-Dist: uvicorn[standard]>=0.22
22
22
  Requires-Dist: xarray>=2023.4
23
+ Requires-Dist: zarr
23
24
  Provides-Extra: dev
24
25
  Requires-Dist: mkdocs; extra == 'dev'
25
26
  Requires-Dist: mkdocs-jupyter; extra == 'dev'
@@ -0,0 +1,12 @@
1
+ tonik/__init__.py,sha256=ZBVGh4dm_l9xwiBGb33O5QV9MfZeNiEd3DBDAm6DiHk,511
2
+ tonik/api.py,sha256=8YS0WCMlm5xs2N6V8n9BiP87ywJC_BUzYsPvWR9KWJk,6499
3
+ tonik/storage.py,sha256=sScIFA4KXURNPwTnV-rvDh6cWCy9sRrErr9BshZpw2I,11303
4
+ tonik/utils.py,sha256=_TxXf9o9fOvtuOvGO6-ww9F5m0QelHyfQzQw8RGjTV4,1868
5
+ tonik/xarray2hdf5.py,sha256=cekO9vo9ZRlr0VndswJjPC27CEVD3TpRVKLAJ-aAO0g,4465
6
+ tonik/xarray2zarr.py,sha256=DM91jW63ySIuhjlJBIdrw61dZGG8QU5qKlBJYYIhRos,1592
7
+ tonik/package_data/index.html,sha256=GKDClUhIam_fAYbNfzAolORhSCG3ae1wW3VjWCg4PMk,2732
8
+ tonik-0.0.9.dist-info/METADATA,sha256=CWU0A4o08nA0X6L53Rzo6cRgG7NblX3EJRQkNSnoE-w,1938
9
+ tonik-0.0.9.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
10
+ tonik-0.0.9.dist-info/entry_points.txt,sha256=VnGfC5qAzpntEHAb5pooUEpYABSgOfQoNhCEtLDJyf8,45
11
+ tonik-0.0.9.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
12
+ tonik-0.0.9.dist-info/RECORD,,
@@ -1,11 +0,0 @@
1
- tonik/__init__.py,sha256=p97Bbz-yujI-uNmbqn1S61lq-zfF1VPaS5c1fxs1Fa8,516
2
- tonik/api.py,sha256=gnwoss7UV8FaY92xzumhcoVPjkzB695qgByHUYcLSw4,5916
3
- tonik/storage.py,sha256=pJnvoGFb8uZqnpkjOsgnntW-a7dhKVlvevs725nAS54,11009
4
- tonik/utils.py,sha256=nV0lK8Azasr8LUuQGXxfxef6nU3bn3dCTQnQTmWsKAY,1534
5
- tonik/xarray2hdf5.py,sha256=cekO9vo9ZRlr0VndswJjPC27CEVD3TpRVKLAJ-aAO0g,4465
6
- tonik/package_data/index.html,sha256=GKDClUhIam_fAYbNfzAolORhSCG3ae1wW3VjWCg4PMk,2732
7
- tonik-0.0.7.dist-info/METADATA,sha256=6DhYEfnEAWSKLEZJQQRiRF_cZAGAQFK6mLmHQEYJbuE,1918
8
- tonik-0.0.7.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
9
- tonik-0.0.7.dist-info/entry_points.txt,sha256=VnGfC5qAzpntEHAb5pooUEpYABSgOfQoNhCEtLDJyf8,45
10
- tonik-0.0.7.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
11
- tonik-0.0.7.dist-info/RECORD,,
File without changes