tonik 0.0.6__tar.gz → 0.0.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,4 +4,5 @@ dist/
4
4
  tonik.log
5
5
  .vscode
6
6
  *.nfs*
7
- .gitignore
7
+ .gitignore
8
+ site/
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: tonik
3
- Version: 0.0.6
3
+ Version: 0.0.7
4
4
  Summary: Store time series data as HDF5 files and access them through an API.
5
5
  Project-URL: Homepage, https://tsc-tools.github.io/tonik
6
6
  Project-URL: Issues, https://github.com/tsc-tools/tonik/issues
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "tonik"
7
- version = "0.0.6"
7
+ version = "0.0.7"
8
8
  authors = [
9
9
  { name="Yannik Behr", email="y.behr@gns.cri.nz" },
10
10
  { name="Christof Mueller", email="c.mueller@gns.cri.nz" }
@@ -20,12 +20,12 @@ from . import get_data
20
20
 
21
21
  logger = logging.getLogger(__name__)
22
22
 
23
-
23
+
24
24
  class TonikAPI:
25
25
 
26
26
  def __init__(self, rootdir) -> None:
27
27
  self.rootdir = rootdir
28
- self.app = FastAPI()
28
+ self.app = FastAPI()
29
29
 
30
30
  # -- allow any origin to query API
31
31
  self.app.add_middleware(CORSMiddleware,
@@ -55,20 +55,23 @@ class TonikAPI:
55
55
  return dt
56
56
 
57
57
  def feature(self,
58
- group: str='Ruapehu',
59
- name: str='rsam',
60
- starttime: str=None,
61
- endtime: str=None,
62
- resolution: str='full',
63
- verticalres: int=10,
64
- log: bool=False,
65
- normalise: bool=False,
66
- subdir: Annotated[list[str] | None, Query()]=None):
58
+ group: str,
59
+ name: str,
60
+ starttime: str = None,
61
+ endtime: str = None,
62
+ resolution: str = 'full',
63
+ verticalres: int = 10,
64
+ log: bool = False,
65
+ normalise: bool = False,
66
+ subdir: Annotated[list[str] | None, Query()] = None):
67
67
  _st = self.preprocess_datetime(starttime)
68
68
  _et = self.preprocess_datetime(endtime)
69
69
  g = StorageGroup(group, rootdir=self.rootdir,
70
- starttime=_st, endtime=_et)
71
- c = g.get_store(*subdir)
70
+ starttime=_st, endtime=_et)
71
+ if subdir is None:
72
+ c = g
73
+ else:
74
+ c = g.get_store(*subdir)
72
75
  try:
73
76
  feat = c(name)
74
77
  except ValueError as e:
@@ -80,7 +83,8 @@ class TonikAPI:
80
83
  nfreqs = feat.shape[0]
81
84
  dates = feat.coords[feat.dims[1]].values
82
85
  if resolution != 'full':
83
- freq, dates, spec = self.aggregate_feature(resolution, verticalres, feat, nfreqs, dates)
86
+ freq, dates, spec = self.aggregate_feature(
87
+ resolution, verticalres, feat, nfreqs, dates)
84
88
  else:
85
89
  spec = feat.values
86
90
  freq = feat.coords[feat.dims[0]].values
@@ -88,44 +92,50 @@ class TonikAPI:
88
92
  if log and feat.name != 'sonogram':
89
93
  vals = 10*np.log10(vals)
90
94
  if normalise:
91
- vals = (vals - np.nanmin(vals))/(np.nanmax(vals) - np.nanmin(vals))
95
+ vals = (vals - np.nanmin(vals)) / \
96
+ (np.nanmax(vals) - np.nanmin(vals))
92
97
  freqs = freq.repeat(dates.size)
93
98
  dates = np.tile(dates, freq.size)
94
- df = pd.DataFrame({'dates': dates, 'freqs': freqs, 'feature': vals})
99
+ df = pd.DataFrame(
100
+ {'dates': dates, 'freqs': freqs, 'feature': vals})
95
101
  output = df.to_csv(index=False,
96
- columns=['dates', 'freqs', 'feature'])
102
+ columns=['dates', 'freqs', 'feature'])
97
103
  else:
98
104
  df = pd.DataFrame(data=feat.to_pandas(), columns=[feat.name])
99
105
  df['dates'] = df.index
100
106
  try:
101
- df = df.resample(str(float(resolution)/60000.0)+'T').mean()
107
+ current_resolution = pd.Timedelta(df['dates'].diff().mean())
108
+ if current_resolution < pd.Timedelta(resolution):
109
+ df = df.resample(pd.Timedelta(resolution)).mean()
102
110
  except ValueError as e:
103
- logger.warning(f"Cannot resample {feat.name} to {resolution}: e")
111
+ logger.warning(
112
+ f"Cannot resample {feat.name} to {resolution}: e")
104
113
  df.rename(columns={feat.name: 'feature'}, inplace=True)
105
114
  output = df.to_csv(index=False, columns=['dates', 'feature'])
106
115
  return StreamingResponse(iter([output]),
107
- media_type='text/csv',
108
- headers={"Content-Disposition":
109
- "attachment;filename=<VUMT_feature>.csv",
110
- 'Content-Length': str(len(output))})
111
-
116
+ media_type='text/csv',
117
+ headers={"Content-Disposition":
118
+ "attachment;filename=<tonik_feature>.csv",
119
+ 'Content-Length': str(len(output))})
112
120
 
113
121
  def aggregate_feature(self, resolution, verticalres, feat, nfreqs, dates):
114
- resolution = np.timedelta64(pd.Timedelta(resolution), 'ms').astype(float)
122
+ resolution = np.timedelta64(
123
+ pd.Timedelta(resolution), 'ms').astype(float)
115
124
  ndays = np.timedelta64(dates[-1] - dates[0], 'ms').astype(float)
116
- canvas_x = int(ndays/resolution)
125
+ canvas_x = int(ndays/resolution)
117
126
  canvas_y = min(nfreqs, verticalres)
118
127
  dates = date2num(dates.astype('datetime64[us]').astype(datetime),
119
- units='hours since 1970-01-01 00:00:00.0',
120
- calendar='gregorian')
128
+ units='hours since 1970-01-01 00:00:00.0',
129
+ calendar='gregorian')
121
130
  feat = feat.assign_coords({'datetime': dates})
122
131
  cvs = dsh.Canvas(plot_width=canvas_x,
123
- plot_height=canvas_y)
132
+ plot_height=canvas_y)
124
133
  agg = cvs.raster(source=feat)
125
134
  freq_dim = feat.dims[0]
126
135
  freq, d, spec = agg.coords[freq_dim].values, agg.coords['datetime'].values, agg.data
127
- dates = num2date(d, units='hours since 1970-01-01 00:00:00.0', calendar='gregorian')
128
- return freq,dates,spec
136
+ dates = num2date(
137
+ d, units='hours since 1970-01-01 00:00:00.0', calendar='gregorian')
138
+ return freq, dates, spec
129
139
 
130
140
  def inventory(self, group: str) -> dict:
131
141
  sg = StorageGroup(group, rootdir=self.rootdir)
@@ -133,6 +143,7 @@ class TonikAPI:
133
143
 
134
144
  # ta = TonikAPI('/tmp').feature()
135
145
 
146
+
136
147
  def main(argv=None):
137
148
  parser = ArgumentParser()
138
149
  parser.add_argument("--rootdir", default='/tmp')
@@ -140,5 +151,6 @@ def main(argv=None):
140
151
  ta = TonikAPI(args.rootdir)
141
152
  uvicorn.run(ta.app, host="0.0.0.0", port=8003)
142
153
 
154
+
143
155
  if __name__ == "__main__":
144
156
  main()
@@ -18,13 +18,14 @@ ERROR_LOG_FILENAME = "tonik.log"
18
18
  LOGGING_CONFIG = {
19
19
  "version": 1,
20
20
  "disable_existing_loggers": False,
21
- "formatters": {
21
+ "formatters": {
22
22
  "default": { # The formatter name, it can be anything that I wish
23
- "format": "%(asctime)s:%(name)s:%(process)d:%(lineno)d " "%(levelname)s %(message)s", # What to add in the message
23
+ # What to add in the message
24
+ "format": "%(asctime)s:%(name)s:%(process)d:%(lineno)d " "%(levelname)s %(message)s",
24
25
  "datefmt": "%Y-%m-%d %H:%M:%S", # How to display dates
25
26
  },
26
27
  "json": { # The formatter name
27
- "()": "pythonjsonlogger.jsonlogger.JsonFormatter", # The class to instantiate!
28
+ "()": "pythonjsonlogger.jsonlogger.JsonFormatter", # The class to instantiate!
28
29
  # Json is more complex, but easier to read, display all attributes!
29
30
  "format": """
30
31
  asctime: %(asctime)s
@@ -48,22 +49,23 @@ LOGGING_CONFIG = {
48
49
  """,
49
50
  "datefmt": "%Y-%m-%d %H:%M:%S", # How to display dates
50
51
  },
51
- },
52
+ },
52
53
  "handlers": {
53
54
  "logfile": { # The handler name
54
55
  "formatter": "json", # Refer to the formatter defined above
55
56
  "level": "ERROR", # FILTER: Only ERROR and CRITICAL logs
56
57
  "class": "logging.handlers.RotatingFileHandler", # OUTPUT: Which class to use
57
- "filename": ERROR_LOG_FILENAME, # Param for class above. Defines filename to use, load it from constant
58
+ # Param for class above. Defines filename to use, load it from constant
59
+ "filename": ERROR_LOG_FILENAME,
58
60
  "backupCount": 2, # Param for class above. Defines how many log files to keep as it grows
59
- },
61
+ },
60
62
  "simple": { # The handler name
61
63
  "formatter": "default", # Refer to the formatter defined above
62
64
  "class": "logging.StreamHandler", # OUTPUT: Same as above, stream to console
63
65
  "stream": "ext://sys.stdout",
64
66
  },
65
67
  },
66
- "loggers": {
68
+ "loggers": {
67
69
  "zizou": { # The name of the logger, this SHOULD match your module!
68
70
  "level": "DEBUG", # FILTER: only INFO logs onwards from "tryceratops" logger
69
71
  "handlers": [
@@ -92,10 +94,10 @@ class Path(object):
92
94
  except FileExistsError:
93
95
  pass
94
96
  self.children = {}
95
-
97
+
96
98
  def __str__(self):
97
99
  return self.path
98
-
100
+
99
101
  def __getitem__(self, key):
100
102
  if key is None:
101
103
  raise ValueError("Key cannot be None")
@@ -125,18 +127,18 @@ class Path(object):
125
127
  if self.endtime <= self.starttime:
126
128
  raise ValueError('Startime has to be smaller than endtime.')
127
129
 
128
- feature = feature.lower()
129
130
  filename = self.feature_path(feature)
130
131
 
131
- logger.debug(f"Reading feature {feature} between {self.starttime} and {self.endtime}")
132
+ logger.debug(
133
+ f"Reading feature {feature} between {self.starttime} and {self.endtime}")
132
134
  num_periods = None
133
135
  if stack_length is not None:
134
136
  valid_stack_units = ['W', 'D', 'h', 'T', 'min', 'S']
135
137
  if not re.match(r'\d*\s*(\w*)', stack_length).group(1)\
136
- in valid_stack_units:
138
+ in valid_stack_units:
137
139
  raise ValueError(
138
140
  'Stack length should be one of: {}'.
139
- format(', '.join(valid_stack_units))
141
+ format(', '.join(valid_stack_units))
140
142
  )
141
143
 
142
144
  if pd.to_timedelta(stack_length) < pd.to_timedelta(interval):
@@ -146,13 +148,13 @@ class Path(object):
146
148
  # Rewind starttime to account for stack length
147
149
  self.starttime -= pd.to_timedelta(stack_length)
148
150
 
149
- num_periods = (pd.to_timedelta(stack_length)/
151
+ num_periods = (pd.to_timedelta(stack_length) /
150
152
  pd.to_timedelta(interval))
151
153
  if not num_periods.is_integer():
152
154
  raise ValueError(
153
155
  'Stack length {} / interval {} = {}, but it needs'
154
156
  ' to be a whole number'.
155
- format(stack_length, interval, num_periods))
157
+ format(stack_length, interval, num_periods))
156
158
 
157
159
  xd_index = dict(datetime=slice(self.starttime, self.endtime))
158
160
  with xr.open_dataset(filename, group='original', engine='h5netcdf') as ds:
@@ -164,8 +166,8 @@ class Path(object):
164
166
  logger.debug("Stacking feature...")
165
167
  try:
166
168
  xdf = rq[feature].rolling(datetime=int(num_periods),
167
- center=False,
168
- min_periods=1).mean()
169
+ center=False,
170
+ min_periods=1).mean()
169
171
  # Return requested timeframe to that defined in initialisation
170
172
  self.starttime += pd.to_timedelta(stack_length)
171
173
  xdf_new = xdf.loc[self.starttime:self.endtime]
@@ -212,12 +214,13 @@ class StorageGroup(Path):
212
214
  >>> c = g.channel(site='WIZ', sensor='00', channel='HHZ')
213
215
  >>> rsam = c("rsam")
214
216
  """
217
+
215
218
  def __init__(self, name, rootdir=None, starttime=None, endtime=None):
216
- self.stores = set()
219
+ self.stores = set()
217
220
  self.starttime = starttime
218
221
  self.endtime = endtime
219
222
  super().__init__(name, rootdir)
220
-
223
+
221
224
  def print_tree(self, site, indent=0, output=''):
222
225
  output += ' ' * indent + site.path + '\n'
223
226
  for site in site.children.values():
@@ -243,7 +246,7 @@ class StorageGroup(Path):
243
246
  st.starttime = self.starttime
244
247
  st.endtime = self.endtime
245
248
  self.stores.add(st)
246
- return st
249
+ return st
247
250
 
248
251
  def from_directory(self):
249
252
  """
@@ -311,7 +314,5 @@ class StorageGroup(Path):
311
314
  if s is not self:
312
315
  s.endtime = time
313
316
 
314
-
315
317
  starttime = property(get_starttime, set_starttime)
316
318
  endtime = property(get_endtime, set_endtime)
317
-
@@ -8,6 +8,27 @@ import pandas as pd
8
8
  import pytest
9
9
 
10
10
 
11
+ def test_errors(setup_api):
12
+ client, l = setup_api
13
+ params = dict(name='rsam',
14
+ subdir=['MDR', '00', 'BHZ'],
15
+ starttime=str(l.starttime),
16
+ endtime=str(l.endtime))
17
+ with client.stream("GET", "/feature", params=params) as r:
18
+ r.read()
19
+ txt = r.text
20
+ assert r.status_code == 422
21
+
22
+ params = dict(group='volcanoes',
23
+ subdir=['MDR', '00', 'BHZ'],
24
+ starttime=str(l.starttime),
25
+ endtime=str(l.endtime))
26
+ with client.stream("GET", "/feature", params=params) as r:
27
+ r.read()
28
+ txt = r.text
29
+ assert r.status_code == 422
30
+
31
+
11
32
  def test_read_1Dfeature(setup_api):
12
33
  client, l = setup_api
13
34
  params = dict(name='rsam',
@@ -21,7 +42,7 @@ def test_read_1Dfeature(setup_api):
21
42
  df = pd.read_csv(StringIO(txt), parse_dates=True, index_col=0)
22
43
  np.testing.assert_array_almost_equal(df['feature'].values,
23
44
  l('rsam').values)
24
-
45
+
25
46
 
26
47
  def test_html_tags(setup_api):
27
48
  client, l = setup_api
@@ -36,7 +57,8 @@ def test_html_tags(setup_api):
36
57
  df = pd.read_csv(StringIO(txt), parse_dates=True, index_col=0)
37
58
  np.testing.assert_array_almost_equal(df['feature'].values,
38
59
  l('rsam').values)
39
-
60
+
61
+
40
62
  def test_read_ssam(setup_api):
41
63
  client, l = setup_api
42
64
  params = dict(name='ssam',
@@ -63,8 +85,9 @@ def test_read_ssam(setup_api):
63
85
  r.read()
64
86
  txt = r.text
65
87
  df = pd.read_csv(StringIO(txt), parse_dates=True, index_col=0)
66
- assert len(np.unique(df.index)) == 5
67
- assert len(np.unique(df['freqs'])) == 8
88
+ assert len(np.unique(df.index)) == 5
89
+ assert len(np.unique(df['freqs'])) == 8
90
+
68
91
 
69
92
  def test_read_filterbank(setup_api):
70
93
  client, l = setup_api
@@ -82,6 +105,7 @@ def test_read_filterbank(setup_api):
82
105
  np.testing.assert_array_almost_equal(df['feature'].values,
83
106
  l('filterbank').values.ravel(order='C'))
84
107
 
108
+
85
109
  def test_log(setup_api):
86
110
  client, l = setup_api
87
111
  params = dict(name='filterbank',
@@ -115,6 +139,7 @@ def test_autoencoder(setup_api):
115
139
  np.testing.assert_array_almost_equal(df['feature'].values,
116
140
  l('autoencoder').values.ravel(order='C'))
117
141
 
142
+
118
143
  def test_normalise(setup_api):
119
144
  client, l = setup_api
120
145
  params = dict(name='sonogram',
@@ -129,46 +154,48 @@ def test_normalise(setup_api):
129
154
  r.read()
130
155
  txt = r.text
131
156
  df = pd.read_csv(StringIO(txt), parse_dates=True, index_col=0)
132
- assert np.nanmax(df['feature'].values) == 1.
157
+ assert np.nanmax(df['feature'].values) == 1.
133
158
  assert np.nanmin(df['feature'].values) == 0.
134
159
 
135
- @pytest.mark.xfail
160
+
136
161
  def test_aggregate1DFeature(setup_api):
137
162
  client, fq = setup_api
138
163
  params = dict(name='rsam',
139
- volcano='Mt Doom',
164
+ group='volcanoes',
140
165
  subdir=['MDR', '00', 'BHZ'],
141
166
  starttime=str(fq.starttime),
142
167
  endtime=str(fq.endtime),
143
- resolution=3600000, #given in ms seconds by Grafana (here 1 hr)
168
+ # given in ms seconds by Grafana (here 1 hr)
169
+ resolution='1D',
144
170
  log=False)
145
171
  with client.stream("GET", "/feature", params=params) as r:
146
172
  r.read()
147
173
  txt = r.text
148
174
 
149
175
  df = pd.read_csv(StringIO(txt), parse_dates=True, index_col=0)
150
- assert df.index[1].value == 1448933100000000000
151
- assert df.index[2].value == 1448936700000000000
176
+ assert pd.Timedelta(df.index.diff().mean()) > pd.Timedelta('10min')
177
+ assert pd.Timedelta(df.index.diff().mean()) <= pd.Timedelta('1D')
178
+
152
179
 
153
180
  def test_inventory(setup_api):
154
181
  client, fq = setup_api
155
182
  params = dict(group='volcanoes')
156
183
  with client.stream("GET", "/inventory", params=params) as r:
157
184
  r.read()
158
- txt = r.text
185
+ txt = r.text
159
186
  features = sorted(["sonogram", "predom_freq", "ssam", "bandwidth",
160
187
  "filterbank", "central_freq", "rsam", "dsar",
161
188
  "rsam_energy_prop", "autoencoder"])
162
189
  result_expected = {"volcanoes": [
163
- {"MDR":[
164
- {"00":[
165
- {"BHZ": features}
166
- ]
167
- }
168
- ]
169
- }
170
- ]
171
- }
190
+ {"MDR": [
191
+ {"00": [
192
+ {"BHZ": features}
193
+ ]
194
+ }
195
+ ]
196
+ }
197
+ ]
198
+ }
172
199
  result_test = json.loads(txt)
173
200
  assert result_test['volcanoes'][1] == result_expected['volcanoes'][0]
174
201
 
@@ -177,5 +204,4 @@ def test_inventory(setup_api):
177
204
  txt = r.text
178
205
  result_test = json.loads(txt)
179
206
  test_features = result_test['volcanoes'][1]['MDR'][0]['00'][0]['BHZ']
180
- assert sorted(test_features) == features
181
-
207
+ assert sorted(test_features) == features
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes