tonik 0.0.5__py3-none-any.whl → 0.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tonik/api.py CHANGED
@@ -1,25 +1,26 @@
1
1
  from argparse import ArgumentParser
2
- from datetime import timedelta, datetime
2
+ from datetime import timedelta, datetime, timezone
3
3
  import logging
4
4
  import os
5
+ from urllib.parse import unquote
5
6
 
6
7
  from cftime import num2date, date2num
7
8
  import datashader as dsh
8
9
  import numpy as np
9
10
  import pandas as pd
10
11
  import uvicorn
11
- from fastapi import FastAPI, HTTPException
12
+ from fastapi import FastAPI, HTTPException, Query
12
13
  from fastapi.middleware.cors import CORSMiddleware
13
14
  from fastapi.responses import HTMLResponse, StreamingResponse
14
15
  from pydantic import BaseModel
15
- from typing import List
16
+ from typing import Annotated
16
17
 
17
18
  from .storage import StorageGroup
18
19
  from . import get_data
19
20
 
20
21
  logger = logging.getLogger(__name__)
21
22
 
22
-
23
+
23
24
  class TonikAPI:
24
25
 
25
26
  def __init__(self, rootdir) -> None:
@@ -32,37 +33,49 @@ class TonikAPI:
32
33
 
33
34
  self.app.get("/", response_class=HTMLResponse)(self.root)
34
35
  self.app.get("/feature")(self.feature)
36
+ self.app.get("/inventory")(self.inventory)
35
37
 
36
- async def root(self):
38
+ def root(self):
37
39
  with open(get_data("package_data/index.html"), "r", encoding="utf-8") as file:
38
40
  html_content = file.read()
39
41
  return HTMLResponse(content=html_content, status_code=200)
40
42
 
43
+ def preprocess_datetime(self, dt):
44
+ """
45
+ Convert datetime string to datetime object.
46
+ """
47
+ # remove timezone info
48
+ dt = dt.split('+')[0]
49
+ # remove 'Z' at the end
50
+ dt = dt.replace('Z', '')
51
+ # convert html encoded characters
52
+ dt = unquote(dt)
53
+ dt = datetime.fromisoformat(dt)
54
+ dt = dt.replace(tzinfo=None)
55
+ return dt
41
56
 
42
57
  def feature(self,
43
- name: str='rsam',
44
- group: str='Ruapehu',
45
- site: str='MAVZ',
46
- sensor: str='10',
47
- channel: str='HHZ',
48
- starttime: datetime=datetime.utcnow()-timedelta(days=30),
49
- endtime: datetime=datetime.utcnow(),
50
- resolution: str='full',
51
- verticalres: int=10,
52
- log: bool=True,
53
- normalise: bool=False):
54
-
55
- _st = datetime.fromisoformat(str(starttime))
56
- _st = _st.replace(tzinfo=None)
57
- _et = datetime.fromisoformat(str(endtime))
58
- _et = _et.replace(tzinfo=None)
58
+ group: str,
59
+ name: str,
60
+ starttime: str = None,
61
+ endtime: str = None,
62
+ resolution: str = 'full',
63
+ verticalres: int = 10,
64
+ log: bool = False,
65
+ normalise: bool = False,
66
+ subdir: Annotated[list[str] | None, Query()] = None):
67
+ _st = self.preprocess_datetime(starttime)
68
+ _et = self.preprocess_datetime(endtime)
59
69
  g = StorageGroup(group, rootdir=self.rootdir,
60
- starttime=_st, endtime=_et)
61
- c = g.get_store(site=site, sensor=sensor, channel=channel)
70
+ starttime=_st, endtime=_et)
71
+ if subdir is None:
72
+ c = g
73
+ else:
74
+ c = g.get_store(*subdir)
62
75
  try:
63
76
  feat = c(name)
64
77
  except ValueError as e:
65
- msg = f"Feature {name} not found in directory {l.sitedir}:"
78
+ msg = f"Feature {name} not found in directory {c.path}:"
66
79
  msg += f"{e}"
67
80
  raise HTTPException(status_code=404, detail=msg)
68
81
  if len(feat.shape) > 1:
@@ -70,7 +83,8 @@ class TonikAPI:
70
83
  nfreqs = feat.shape[0]
71
84
  dates = feat.coords[feat.dims[1]].values
72
85
  if resolution != 'full':
73
- freq, dates, spec = self.aggregate_feature(resolution, verticalres, feat, nfreqs, dates)
86
+ freq, dates, spec = self.aggregate_feature(
87
+ resolution, verticalres, feat, nfreqs, dates)
74
88
  else:
75
89
  spec = feat.values
76
90
  freq = feat.coords[feat.dims[0]].values
@@ -78,191 +92,56 @@ class TonikAPI:
78
92
  if log and feat.name != 'sonogram':
79
93
  vals = 10*np.log10(vals)
80
94
  if normalise:
81
- vals = (vals - np.nanmin(vals))/(np.nanmax(vals) - np.nanmin(vals))
95
+ vals = (vals - np.nanmin(vals)) / \
96
+ (np.nanmax(vals) - np.nanmin(vals))
82
97
  freqs = freq.repeat(dates.size)
83
98
  dates = np.tile(dates, freq.size)
84
- df = pd.DataFrame({'dates': dates, 'freqs': freqs, 'feature': vals})
99
+ df = pd.DataFrame(
100
+ {'dates': dates, 'freqs': freqs, 'feature': vals})
85
101
  output = df.to_csv(index=False,
86
- columns=['dates', 'freqs', 'feature'])
102
+ columns=['dates', 'freqs', 'feature'])
87
103
  else:
88
104
  df = pd.DataFrame(data=feat.to_pandas(), columns=[feat.name])
89
105
  df['dates'] = df.index
90
106
  try:
91
- df = df.resample(str(float(resolution)/60000.0)+'T').mean()
107
+ current_resolution = pd.Timedelta(df['dates'].diff().mean())
108
+ if current_resolution < pd.Timedelta(resolution):
109
+ df = df.resample(pd.Timedelta(resolution)).mean()
92
110
  except ValueError as e:
93
- logger.warning(f"Cannot resample {feat.name} to {resolution}: e")
111
+ logger.warning(
112
+ f"Cannot resample {feat.name} to {resolution}: e")
94
113
  df.rename(columns={feat.name: 'feature'}, inplace=True)
95
114
  output = df.to_csv(index=False, columns=['dates', 'feature'])
96
115
  return StreamingResponse(iter([output]),
97
- media_type='text/csv',
98
- headers={"Content-Disposition":
99
- "attachment;filename=<VUMT_feature>.csv",
100
- 'Content-Length': str(len(output))})
101
-
116
+ media_type='text/csv',
117
+ headers={"Content-Disposition":
118
+ "attachment;filename=<tonik_feature>.csv",
119
+ 'Content-Length': str(len(output))})
102
120
 
103
121
  def aggregate_feature(self, resolution, verticalres, feat, nfreqs, dates):
104
- resolution = np.timedelta64(pd.Timedelta(resolution), 'ms').astype(float)
122
+ resolution = np.timedelta64(
123
+ pd.Timedelta(resolution), 'ms').astype(float)
105
124
  ndays = np.timedelta64(dates[-1] - dates[0], 'ms').astype(float)
106
- canvas_x = int(ndays/resolution)
125
+ canvas_x = int(ndays/resolution)
107
126
  canvas_y = min(nfreqs, verticalres)
108
127
  dates = date2num(dates.astype('datetime64[us]').astype(datetime),
109
- units='hours since 1970-01-01 00:00:00.0',
110
- calendar='gregorian')
128
+ units='hours since 1970-01-01 00:00:00.0',
129
+ calendar='gregorian')
111
130
  feat = feat.assign_coords({'datetime': dates})
112
131
  cvs = dsh.Canvas(plot_width=canvas_x,
113
- plot_height=canvas_y)
132
+ plot_height=canvas_y)
114
133
  agg = cvs.raster(source=feat)
115
134
  freq_dim = feat.dims[0]
116
135
  freq, d, spec = agg.coords[freq_dim].values, agg.coords['datetime'].values, agg.data
117
- dates = num2date(d, units='hours since 1970-01-01 00:00:00.0', calendar='gregorian')
118
- return freq,dates,spec
119
-
120
-
121
- # #pydanticmodel output: Json file
122
- # class Feature(BaseModel):
123
- # name: list
124
-
125
-
126
- # class Channel(BaseModel):
127
- # name: str
128
- # features: List[Feature] = []
129
-
130
-
131
- # class Location(BaseModel):
132
- # name: str
133
- # channels: List[Channel] = []
134
-
135
-
136
- # class Station(BaseModel):
137
- # name: str
138
- # lat: float
139
- # lon: float
140
- # locations: List[Location] = []
141
-
142
-
143
- # class Group(BaseModel):
144
- # volcano: str
145
- # stations: List[Station] = []
146
-
136
+ dates = num2date(
137
+ d, units='hours since 1970-01-01 00:00:00.0', calendar='gregorian')
138
+ return freq, dates, spec
147
139
 
148
- # def get_pydanticModel(group, station, location, channel, feature_list):
149
-
150
- # channels_data = {"name": channel, "features": feature_list}
151
- # channel_models = []
152
- # channel_model = Channel(**channels_data)
153
- # channel_models.append(channel_model)
154
-
155
- # location_data = {"name": location, "channels": channel_models}
156
- # location_models = []
157
- # location_model = Location(**location_data)
158
- # location_models.append(location_model)
159
-
160
- # stations_data = {"name": station, "lat": "42", "lon": "171",
161
- # "locations": location_models}
162
- # station_models = []
163
- # station_model = Station(**stations_data)
164
- # station_models.append(station_model)
165
-
166
- # group_model = Group(group=group, stations=station_models)
167
-
168
- # # Exporting to JSON
169
- # json_data = group_model.json()
170
- # return json_data
171
-
172
-
173
- # write a function that scans LOCKERROOMROOT for
174
- # available groups, stations, locations, channels, and features
175
- # and returns a pydantic model
176
- # def get_available_features():
177
- # groups = os.listdir(ROOT)
178
- # group_models = []
179
- # for group in groups:
180
- # stations = os.listdir(os.path.join(LOCKERROOMROOT, group))
181
- # station_models = []
182
- # for station in stations:
183
- # locations = os.listdir(os.path.join(LOCKERROOMROOT, group, station))
184
- # location_models = []
185
- # for location in locations:
186
- # channels = os.listdir(os.path.join(LOCKERROOMROOT, group, station, location))
187
- # channel_models = []
188
- # for channel in channels:
189
- # features = os.listdir(os.path.join(LOCKERROOMROOT, group, station, location, channel))
190
- # feature_list = []
191
- # for feature in features:
192
- # feature_list.append(feature)
193
- # channel_data = {"name": channel, "features": feature_list}
194
- # channel_model = Channel(**channel_data)
195
- # channel_models.append(channel_model)
196
- # location_data = {"name": location, "channels": channel_models}
197
- # location_model = Location(**location_data)
198
- # location_models.append(location_model)
199
- # station_data = {"name": station, "lat": "42", "lon": "171", "locations": location_models}
200
- # station_model = Station(**station_data)
201
- # station_models.append(station_model)
202
- # group_data = {"volcano": group, "stations": station_models}
203
- # group_model = Group(**group_data)
204
- # group_models.append(group_model)
205
- # return group_models
206
-
207
- # @app.get("/featureEndpoint")
208
- # def featureEndpoint(group: str="all", station: str="all", channel: str="all",
209
- # type: str="all"):
210
- # groups = vm.get_available_volcanoes()
211
-
212
- # station_model_list = []
213
- # channel_model_list = []
214
- # volcano_model_list = []
215
- # for _volcano in volcanoes:
216
- # streams = vm.get_available_streams(_volcano)
217
- # for _stream in streams:
218
- # _, _station, _, _channel = _stream.split('.')
219
- # stream_dir = os.path.join(FEATUREDIR, _volcano, _station, _channel)
220
- # try:
221
- # feature_list = os.listdir(stream_dir)
222
- # except (NotADirectoryError, FileNotFoundError):
223
- # continue
224
- # feature_list = sorted([str(os.path.basename(path)).split('.nc')[0] for path in feature_list])
225
- # channels_data = {"name": _channel, "features":feature_list}
226
- # channel_model = Channel(**channels_data)
227
- # channel_model_list.append(channel_model)
228
- # try:
229
- # site_info = vm.get_site_information(_station)
230
- # lat = site_info['latitude']
231
- # lon = site_info['longitude']
232
- # except:
233
- # lat, lon = -999.9, -999.9
234
- # stations_data = {"name": _station, "lat": lat, "lon": lon, "channels":channel_model_list}
235
- # station_model = Station(**stations_data)
236
- # station_model_list.append(station_model)
237
-
238
- # volcano_model = Volcano(volcano=_volcano, stations=station_model_list)
239
- # volcano_model_list.append(volcano_model)
240
-
241
- # if len(volcano_model_list) == 0:
242
- # return('no volcano')
243
-
244
- # scenario_model = Scenario(scenario='VUMT', volcanoes=volcano_model_list)
245
- # if volcano != "all":
246
- # # return all stations for a volcano
247
- # for _volcano in scenario_model.volcanoes:
248
- # if _volcano.volcano == volcano:
249
- # if station == "all":
250
- # return _volcano
251
- # for _station in _volcano.stations:
252
- # if _station.name == station:
253
- # if channel == "all":
254
- # return _station
255
- # for _channel in _station.channels:
256
- # if _channel.name == channel:
257
- # feature_list_filtered = []
258
- # for _f in _channel.features:
259
- # if _f in FeatureRequest.feat_dict[type]:
260
- # feature_list_filtered.append(_f)
261
- # _channel.features = feature_list_filtered
262
- # return _channel
263
-
264
- # return scenario_model
140
+ def inventory(self, group: str) -> dict:
141
+ sg = StorageGroup(group, rootdir=self.rootdir)
142
+ return sg.to_dict()
265
143
 
144
+ # ta = TonikAPI('/tmp').feature()
266
145
 
267
146
 
268
147
  def main(argv=None):
@@ -272,5 +151,6 @@ def main(argv=None):
272
151
  ta = TonikAPI(args.rootdir)
273
152
  uvicorn.run(ta.app, host="0.0.0.0", port=8003)
274
153
 
154
+
275
155
  if __name__ == "__main__":
276
156
  main()
@@ -3,7 +3,7 @@
3
3
  <head>
4
4
  <meta charset="UTF-8">
5
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
- <title>SAM API Documentation</title>
6
+ <title>Tonik API Documentation</title>
7
7
  <style>
8
8
  body {
9
9
  font-family: Arial, sans-serif;
@@ -37,41 +37,35 @@
37
37
  <body>
38
38
 
39
39
  <header>
40
- <h1>Your API Name</h1>
41
- <p>Seismic Acoustic Monitoring Tool (SAM) - API</p>
40
+ <h1>Tonik</h1>
41
+ <p>Tonik - API</p>
42
42
  </header>
43
43
 
44
44
  <section>
45
45
  <h2>Overview</h2>
46
46
  <p>
47
- Welcome to the documentation for Seismic Acoustic Monitoring API. This API provides access to waveform features and
48
- analysis results computed by SAM.
47
+ Welcome to the documentation for the Tonik API. This API provides access to time-series stored with the tonik package.
49
48
  </p>
50
49
 
51
- <!--h2>Authentication</h2>
52
- <p>
53
- To access the API, you need to authenticate using [authentication method]. Obtain your API key from [location].
54
- </p-->
55
-
56
50
  <h2>Endpoints</h2>
57
51
  <p>
58
52
  The endpoints available in the API are:
59
53
  </p>
60
54
  <ul>
61
- <li><strong>GET /feature:</strong> Request waveform features and analysis results.</li>
62
- <li><strong>GET /featureEndpoint:</strong> Request meta information on available stations, features and results.</li>
55
+ <li><strong>GET /feature:</strong> Request time-series data.</li>
56
+ <li><strong>GET /inventory:</strong> Request information on data.</li>
63
57
  </ul>
64
58
 
65
59
  <h2>Code Examples</h2>
66
60
  <h3>Requesting meta data</h3>
67
- The following will return information on available stations, features and results in JSON format.
61
+ The following will return information on available datasets in JSON format.
68
62
  <pre>
69
- curl -X GET "http://your.host.server:yourport/featureEndpoint"
63
+ curl -X GET "http://your.host.server:yourport/inventory"
70
64
  </pre>
71
65
  To do the same with Python using requests you can use the following code:
72
66
  <pre>
73
67
  import requests
74
- url = "http://your.host.server:yourport/featureEndpoint"
68
+ url = "http://your.host.server:yourport/inventory"
75
69
  response = requests.get(url)
76
70
  response.json()
77
71
  </pre>
@@ -79,7 +73,7 @@
79
73
  The following example shows how to request RSAM data for station WIZ at volcano Whakaari
80
74
  between 2019-12-01 and 2019-12-31. The return format is CSV.
81
75
  <pre>
82
- curl -X GET "http://your.host.server:yourport/feature?name=rsam&starttime=2019-12-01T00:00:00&endtime=2019-12-31T00:00:00&volcano=Whakaari&site=WIZ"
76
+ curl -X GET "http://your.host.server:yourport/feature?name=rsam&starttime=2019-12-01T00:00:00&endtime=2019-12-31T00:00:00&subdir=Whakaari&subdir=WIZ"
83
77
  </pre>
84
78
  To do the same with Python using pandas you can use the following code:
85
79
  <pre>
@@ -89,7 +83,7 @@
89
83
  endtime="2019-12-31T00:00:00"
90
84
  volcano="Whakaari"
91
85
  site="WIZ"
92
- url = f"http://your.host.server:yourport/feature?name={feature}&starttime={starttime}&endtime={endtime}&volcano={volcano}&site={site}"
86
+ url = f"http://your.host.server:yourport/feature?name={feature}&starttime={starttime}&endtime={endtime}&subdir={volcano}&subdir={site}"
93
87
  pd.read_csv(url, parse_dates=True, index_col=0)
94
88
  </pre>
95
89
  </section>
tonik/storage.py CHANGED
@@ -1,4 +1,5 @@
1
1
  from datetime import datetime, timedelta
2
+ import json
2
3
  import glob
3
4
  import logging
4
5
  import logging.config
@@ -17,13 +18,14 @@ ERROR_LOG_FILENAME = "tonik.log"
17
18
  LOGGING_CONFIG = {
18
19
  "version": 1,
19
20
  "disable_existing_loggers": False,
20
- "formatters": {
21
+ "formatters": {
21
22
  "default": { # The formatter name, it can be anything that I wish
22
- "format": "%(asctime)s:%(name)s:%(process)d:%(lineno)d " "%(levelname)s %(message)s", # What to add in the message
23
+ # What to add in the message
24
+ "format": "%(asctime)s:%(name)s:%(process)d:%(lineno)d " "%(levelname)s %(message)s",
23
25
  "datefmt": "%Y-%m-%d %H:%M:%S", # How to display dates
24
26
  },
25
27
  "json": { # The formatter name
26
- "()": "pythonjsonlogger.jsonlogger.JsonFormatter", # The class to instantiate!
28
+ "()": "pythonjsonlogger.jsonlogger.JsonFormatter", # The class to instantiate!
27
29
  # Json is more complex, but easier to read, display all attributes!
28
30
  "format": """
29
31
  asctime: %(asctime)s
@@ -47,22 +49,23 @@ LOGGING_CONFIG = {
47
49
  """,
48
50
  "datefmt": "%Y-%m-%d %H:%M:%S", # How to display dates
49
51
  },
50
- },
52
+ },
51
53
  "handlers": {
52
54
  "logfile": { # The handler name
53
55
  "formatter": "json", # Refer to the formatter defined above
54
56
  "level": "ERROR", # FILTER: Only ERROR and CRITICAL logs
55
57
  "class": "logging.handlers.RotatingFileHandler", # OUTPUT: Which class to use
56
- "filename": ERROR_LOG_FILENAME, # Param for class above. Defines filename to use, load it from constant
58
+ # Param for class above. Defines filename to use, load it from constant
59
+ "filename": ERROR_LOG_FILENAME,
57
60
  "backupCount": 2, # Param for class above. Defines how many log files to keep as it grows
58
- },
61
+ },
59
62
  "simple": { # The handler name
60
63
  "formatter": "default", # Refer to the formatter defined above
61
64
  "class": "logging.StreamHandler", # OUTPUT: Same as above, stream to console
62
65
  "stream": "ext://sys.stdout",
63
66
  },
64
67
  },
65
- "loggers": {
68
+ "loggers": {
66
69
  "zizou": { # The name of the logger, this SHOULD match your module!
67
70
  "level": "DEBUG", # FILTER: only INFO logs onwards from "tryceratops" logger
68
71
  "handlers": [
@@ -86,13 +89,18 @@ class Path(object):
86
89
  def __init__(self, name, parentdir):
87
90
  self.name = name
88
91
  self.path = os.path.join(parentdir, name)
89
- os.makedirs(self.path, exist_ok=True)
92
+ try:
93
+ os.makedirs(self.path, exist_ok=True)
94
+ except FileExistsError:
95
+ pass
90
96
  self.children = {}
91
-
97
+
92
98
  def __str__(self):
93
99
  return self.path
94
-
100
+
95
101
  def __getitem__(self, key):
102
+ if key is None:
103
+ raise ValueError("Key cannot be None")
96
104
  try:
97
105
  return self.children[key]
98
106
  except KeyError:
@@ -103,7 +111,7 @@ class Path(object):
103
111
  _feature_path = os.path.join(self.path, feature + ".nc")
104
112
  if not os.path.exists(_feature_path):
105
113
  raise FileNotFoundError(f"File {_feature_path} not found")
106
- self.children[feature] = _feature_path
114
+ self.children[feature] = Path(feature + ".nc", self.path)
107
115
  return _feature_path
108
116
 
109
117
  def __call__(self, feature, stack_length=None, interval='10min'):
@@ -119,18 +127,18 @@ class Path(object):
119
127
  if self.endtime <= self.starttime:
120
128
  raise ValueError('Startime has to be smaller than endtime.')
121
129
 
122
- feature = feature.lower()
123
130
  filename = self.feature_path(feature)
124
131
 
125
- logger.debug(f"Reading feature {feature} between {self.starttime} and {self.endtime}")
132
+ logger.debug(
133
+ f"Reading feature {feature} between {self.starttime} and {self.endtime}")
126
134
  num_periods = None
127
135
  if stack_length is not None:
128
- valid_stack_units = ['W', 'D', 'H', 'T', 'min', 'S']
136
+ valid_stack_units = ['W', 'D', 'h', 'T', 'min', 'S']
129
137
  if not re.match(r'\d*\s*(\w*)', stack_length).group(1)\
130
- in valid_stack_units:
138
+ in valid_stack_units:
131
139
  raise ValueError(
132
140
  'Stack length should be one of: {}'.
133
- format(', '.join(valid_stack_units))
141
+ format(', '.join(valid_stack_units))
134
142
  )
135
143
 
136
144
  if pd.to_timedelta(stack_length) < pd.to_timedelta(interval):
@@ -140,13 +148,13 @@ class Path(object):
140
148
  # Rewind starttime to account for stack length
141
149
  self.starttime -= pd.to_timedelta(stack_length)
142
150
 
143
- num_periods = (pd.to_timedelta(stack_length)/
151
+ num_periods = (pd.to_timedelta(stack_length) /
144
152
  pd.to_timedelta(interval))
145
153
  if not num_periods.is_integer():
146
154
  raise ValueError(
147
155
  'Stack length {} / interval {} = {}, but it needs'
148
156
  ' to be a whole number'.
149
- format(stack_length, interval, num_periods))
157
+ format(stack_length, interval, num_periods))
150
158
 
151
159
  xd_index = dict(datetime=slice(self.starttime, self.endtime))
152
160
  with xr.open_dataset(filename, group='original', engine='h5netcdf') as ds:
@@ -158,13 +166,11 @@ class Path(object):
158
166
  logger.debug("Stacking feature...")
159
167
  try:
160
168
  xdf = rq[feature].rolling(datetime=int(num_periods),
161
- center=False,
162
- min_periods=1).mean()
169
+ center=False,
170
+ min_periods=1).mean()
163
171
  # Return requested timeframe to that defined in initialisation
164
172
  self.starttime += pd.to_timedelta(stack_length)
165
- xdf_new = xdf.loc[
166
- self.starttime:
167
- self.endtime-pd.to_timedelta(interval)]
173
+ xdf_new = xdf.loc[self.starttime:self.endtime]
168
174
  xdf_new = xdf_new.rename(feature)
169
175
  except ValueError as e:
170
176
  logger.error(e)
@@ -181,11 +187,11 @@ class Path(object):
181
187
  """
182
188
  self.__call__(*args, **kwargs)
183
189
 
184
- def save(self, data):
190
+ def save(self, data, **kwargs):
185
191
  """
186
192
  Save a feature to disk
187
193
  """
188
- xarray2hdf5(data, self.path)
194
+ xarray2hdf5(data, self.path, **kwargs)
189
195
 
190
196
 
191
197
  class StorageGroup(Path):
@@ -208,60 +214,73 @@ class StorageGroup(Path):
208
214
  >>> c = g.channel(site='WIZ', sensor='00', channel='HHZ')
209
215
  >>> rsam = c("rsam")
210
216
  """
217
+
211
218
  def __init__(self, name, rootdir=None, starttime=None, endtime=None):
212
- self.stores = set()
219
+ self.stores = set()
213
220
  self.starttime = starttime
214
221
  self.endtime = endtime
215
222
  super().__init__(name, rootdir)
216
223
 
224
+ def print_tree(self, site, indent=0, output=''):
225
+ output += ' ' * indent + site.path + '\n'
226
+ for site in site.children.values():
227
+ output += self.print_tree(site, indent + 2)
228
+ return output
229
+
217
230
  def __repr__(self):
218
231
  rstr = f"Group: {self.name}\n"
219
- last_site = False
220
- for j, site in enumerate(self.children.values()):
221
- if j == len(self.children) - 1:
222
- last_site = True
223
- rstr += f"|__ {site.name}\n"
224
- last_sensor = False
225
- for i, sensor in enumerate(site.children.values()):
226
- if i == len(site.children) - 1:
227
- last_sensor = True
228
- rstr += (" " if last_site else "|") + f" |__ {sensor.name}\n"
229
- for k, channel in enumerate(sensor.children.values()):
230
- rstr += (" " if last_site else "| ")
231
- rstr += (" " if last_sensor else "| ")
232
- rstr += f"|__ {channel.name}\n"
232
+ rstr = self.print_tree(self, 0, rstr)
233
233
  return rstr
234
234
 
235
- def get_store(self, site, sensor, channel):
235
+ def get_store(self, *args):
236
236
  # return the store for a given site, sensor, or channel
237
237
  # if one of them is None return the store for the level above
238
238
  # if all are None return the root store
239
239
  try:
240
- st = self[site][sensor][channel]
241
- except:
242
- try:
243
- st = self[site][sensor]
244
- except:
245
- try:
246
- st = self[site]
247
- except:
248
- return self
240
+ st = self
241
+ for arg in args:
242
+ st = st[arg]
243
+ except KeyError:
244
+ return self
249
245
 
250
246
  st.starttime = self.starttime
251
247
  st.endtime = self.endtime
252
248
  self.stores.add(st)
253
- return st
249
+ return st
254
250
 
255
251
  def from_directory(self):
256
- feature_files = glob.glob(os.path.join(self.path, '**', '*.nc'),
257
- recursive=True)
258
- for _f in feature_files:
259
- subdir = _f.split(self.path)[1].strip(os.sep)
260
- # split the path into parts
261
- # get the subdirectories
262
- site, sensor, channel, ffile = subdir.split(os.sep)
263
- fname = ffile.strip('.nc')
264
- c = self.get_store(site, sensor, channel)
252
+ """
253
+ Construct the storage group from the root directory
254
+ """
255
+ for root, dirs, files in os.walk(self.path):
256
+ if files:
257
+ try:
258
+ subdirs = root.split(self.path)[1].split(os.sep)[1:]
259
+ except IndexError:
260
+ st = self.get_store()
261
+ else:
262
+ try:
263
+ st = self.get_store(*subdirs)
264
+ except TypeError as e:
265
+ raise e
266
+ for _f in files:
267
+ if _f.endswith('.nc'):
268
+ st.feature_path(_f.replace('.nc', ''))
269
+
270
+ @staticmethod
271
+ def directory_tree_to_dict(path):
272
+ name = os.path.basename(path)
273
+ if os.path.isdir(path):
274
+ return {name: [StorageGroup.directory_tree_to_dict(os.path.join(path, child)) for child in sorted(os.listdir(path))]}
275
+ else:
276
+ if path.endswith('.nc'):
277
+ return name.replace('.nc', '')
278
+
279
+ def to_dict(self):
280
+ """
281
+ Convert the storage group to json
282
+ """
283
+ return StorageGroup.directory_tree_to_dict(self.path)
265
284
 
266
285
  def get_starttime(self):
267
286
  return self.__starttime
@@ -276,7 +295,8 @@ class StorageGroup(Path):
276
295
  time.month,
277
296
  time.day)
278
297
  for s in self.stores:
279
- s.starttime = time
298
+ if s is not self:
299
+ s.starttime = time
280
300
 
281
301
  def get_endtime(self):
282
302
  return self.__endtime
@@ -291,9 +311,8 @@ class StorageGroup(Path):
291
311
  time.month,
292
312
  time.day)
293
313
  for s in self.stores:
294
- s.endtime = time
295
-
314
+ if s is not self:
315
+ s.endtime = time
296
316
 
297
317
  starttime = property(get_starttime, set_starttime)
298
318
  endtime = property(get_endtime, set_endtime)
299
-
tonik/utils.py CHANGED
@@ -8,7 +8,7 @@ import xarray as xr
8
8
  def generate_test_data(dim=1, ndays=30, nfreqs=10,
9
9
  tstart=datetime.utcnow(),
10
10
  feature_name=None,
11
- freq_name=None):
11
+ freq_name=None, add_nans=True):
12
12
  """
13
13
  Generate a 1D or 2D feature for testing.
14
14
  """
@@ -25,12 +25,14 @@ def generate_test_data(dim=1, ndays=30, nfreqs=10,
25
25
  # Add 10% NaNs
26
26
  idx_nan = rs.integers(0, nints-1, int(0.1*nints))
27
27
  if dim == 1:
28
- data[idx_nan] = np.nan
28
+ if add_nans:
29
+ data[idx_nan] = np.nan
29
30
  if feature_name is None:
30
31
  feature_name = 'rsam'
31
32
  xrd = xr.Dataset({feature_name: xr.DataArray(data, coords=[dates], dims=['datetime'])})
32
33
  if dim == 2:
33
- data[:, idx_nan] = np.nan
34
+ if add_nans:
35
+ data[:, idx_nan] = np.nan
34
36
  freqs = np.arange(nfreqs)
35
37
  if feature_name is None:
36
38
  feature_name = 'ssam'
@@ -40,4 +42,5 @@ def generate_test_data(dim=1, ndays=30, nfreqs=10,
40
42
  xrd.attrs['starttime'] = dates[0].isoformat()
41
43
  xrd.attrs['endtime'] = dates[-1].isoformat()
42
44
  xrd.attrs['station'] = 'MDR'
45
+ xrd.attrs['interval'] = '10min'
43
46
  return xrd
tonik/xarray2hdf5.py CHANGED
@@ -8,22 +8,36 @@ import h5netcdf
8
8
  import numpy as np
9
9
 
10
10
 
11
- def xarray2hdf5(xArray, fdir, rootGroupName="original", timedim="datetime"):
11
+ def xarray2hdf5(xArray, fdir, rootGroupName="original", timedim="datetime",
12
+ archive_starttime=datetime(2000, 1, 1), resolution=None):
12
13
  """
13
14
  Store an xarray dataset as an HDF5 file.
14
15
 
15
- :param xArray: Data to store.
16
- :type xArray: :class:`xarray.Dataset`
17
- :param fdir: Directory to store data under.
18
- :type fdir: str
19
- :param rootGroupName: Hdf5 group name.
20
- :type rootGroupName: str
21
- :param timedim: Name of time dimension.
22
- :type timedim: str
16
+ Parameters
17
+ ----------
18
+ xArray : xarray.Dataset
19
+ Data to store.
20
+ fdir : str
21
+ Directory to store data under.
22
+ rootGroupName : str
23
+ Hdf5 group name.
24
+ timedim : str
25
+ Name of time dimension.
26
+ archive_starttime : datetime
27
+ Start time of archive. If the start time of the data is before this
28
+ time, the data start time is used.
29
+ resolution : float
30
+ Time resolution of the data in hours. If None, the resolution is
31
+ determined from the data.
23
32
  """
24
33
  filterwarnings(action='ignore', category=DeprecationWarning,
25
34
  message='`np.bool` is a deprecated alias')
26
35
 
36
+ starttime = xArray[timedim].values[0].astype('datetime64[us]').astype(datetime)
37
+ starttime = min(starttime, archive_starttime)
38
+ if resolution is None:
39
+ resolution = (np.diff(xArray[timedim])/np.timedelta64(1, 'h'))[0]
40
+
27
41
  for featureName in list(xArray.data_vars.keys()):
28
42
  h5file = os.path.join(fdir, featureName +'.nc')
29
43
 
@@ -31,7 +45,8 @@ def xarray2hdf5(xArray, fdir, rootGroupName="original", timedim="datetime"):
31
45
 
32
46
  with h5netcdf.File(h5file, mode) as h5f:
33
47
  try:
34
- rootGrp = _create_h5_Structure(rootGroupName, featureName, h5f, xArray)
48
+ rootGrp = _create_h5_Structure(rootGroupName, featureName,
49
+ h5f, xArray, starttime, timedim)
35
50
  except ValueError: # group already exists, append
36
51
  rootGrp = h5f[rootGroupName]
37
52
 
@@ -39,17 +54,17 @@ def xarray2hdf5(xArray, fdir, rootGroupName="original", timedim="datetime"):
39
54
  new_time = date2num(xArray[timedim].values.astype('datetime64[us]').astype(datetime),
40
55
  units=rootGrp[timedim].attrs['units'],
41
56
  calendar=rootGrp[timedim].attrs['calendar'])
42
- dt = (np.diff(xArray['datetime'])/np.timedelta64(1, 'h'))[0]
43
- t0 = date2num(np.datetime64(rootGrp.attrs['starttime']).astype('datetime64[us]').astype(datetime),
57
+ t0 = date2num(starttime,
44
58
  units=rootGrp[timedim].attrs['units'],
45
59
  calendar=rootGrp[timedim].attrs['calendar'])
46
- indices = np.rint((new_time - t0)/dt).astype(int)
47
- assert np.all(indices >= 0)
60
+ indices = np.rint((new_time - t0)/resolution).astype(int)
61
+ if not np.all(indices >= 0):
62
+ raise ValueError("Data starts before the archive start time")
48
63
  times = rootGrp[timedim]
49
64
  newsize = indices[-1] + 1
50
65
  if newsize > times.shape[0]:
51
66
  rootGrp.resize_dimension(timedim, newsize)
52
- times[:] = t0 + np.arange(times.shape[0])*dt
67
+ times[:] = t0 + np.arange(times.shape[0]) * resolution
53
68
  data = rootGrp[featureName]
54
69
  if len(data.shape) > 1:
55
70
  data[:, indices] = xArray[featureName].values
@@ -63,10 +78,18 @@ def xarray2hdf5(xArray, fdir, rootGroupName="original", timedim="datetime"):
63
78
  logging.warning(f"Could not set all meta info for {featureName}: {e}")
64
79
 
65
80
 
66
- def _create_h5_Structure(defaultGroupName, featureName, h5f, xArray):
81
+ def _create_h5_Structure(defaultGroupName, featureName, h5f, xArray, starttime, timedim):
67
82
  rootGrp = h5f.create_group(defaultGroupName)
83
+ rootGrp.dimensions[timedim] = None
84
+ coordinates = rootGrp.create_variable(timedim, (timedim,), float)
85
+ coordinates.attrs['units'] = 'hours since 1970-01-01 00:00:00.0'
86
+ coordinates.attrs['calendar'] = 'gregorian'
87
+ rootGrp.attrs['starttime'] = str(starttime)
68
88
  for label, size in xArray.dims.items():
69
- _setAttributes(label, size, rootGrp, xArray)
89
+ if not np.issubdtype(xArray[label].dtype, np.datetime64):
90
+ rootGrp.dimensions[label] = size
91
+ coordinates = rootGrp.create_variable(label, (label,), float)
92
+ coordinates[:] = xArray[label].values
70
93
  # Note: xArray.dims returns a dictionary of dimensions that are not necesarily
71
94
  # in the right order; xArray[featureName].dims returns a tuple with dimension
72
95
  # names in the correct order
@@ -74,20 +97,6 @@ def _create_h5_Structure(defaultGroupName, featureName, h5f, xArray):
74
97
  return rootGrp
75
98
 
76
99
 
77
- def _setAttributes(label, size, rootGrp, xArray):
78
- if np.issubdtype(xArray[label].dtype, np.datetime64):
79
- starttime = str(xArray[label].values[0].astype('datetime64[us]').astype(datetime))
80
- rootGrp.dimensions[label] = None
81
- coordinates = rootGrp.create_variable(label, (label,), float)
82
- coordinates.attrs['units'] = 'hours since 1970-01-01 00:00:00.0'
83
- coordinates.attrs['calendar'] = 'gregorian'
84
- rootGrp.attrs['starttime'] = starttime
85
- else:
86
- rootGrp.dimensions[label] = size
87
- coordinates = rootGrp.create_variable(label, (label,), float)
88
- coordinates[:] = xArray[label].values
89
-
90
-
91
100
  def _setMetaInfo(featureName, h5f, xArray):
92
101
  h5f.attrs['station'] = xArray.attrs['station']
93
102
  h5f.attrs['latitude'] = -42
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: tonik
3
- Version: 0.0.5
4
- Summary: A collection of tools to integrate with GNS Science's time series classification platform.
3
+ Version: 0.0.7
4
+ Summary: Store time series data as HDF5 files and access them through an API.
5
5
  Project-URL: Homepage, https://tsc-tools.github.io/tonik
6
6
  Project-URL: Issues, https://github.com/tsc-tools/tonik/issues
7
7
  Author-email: Yannik Behr <y.behr@gns.cri.nz>, Christof Mueller <c.mueller@gns.cri.nz>
@@ -0,0 +1,11 @@
1
+ tonik/__init__.py,sha256=p97Bbz-yujI-uNmbqn1S61lq-zfF1VPaS5c1fxs1Fa8,516
2
+ tonik/api.py,sha256=gnwoss7UV8FaY92xzumhcoVPjkzB695qgByHUYcLSw4,5916
3
+ tonik/storage.py,sha256=pJnvoGFb8uZqnpkjOsgnntW-a7dhKVlvevs725nAS54,11009
4
+ tonik/utils.py,sha256=nV0lK8Azasr8LUuQGXxfxef6nU3bn3dCTQnQTmWsKAY,1534
5
+ tonik/xarray2hdf5.py,sha256=cekO9vo9ZRlr0VndswJjPC27CEVD3TpRVKLAJ-aAO0g,4465
6
+ tonik/package_data/index.html,sha256=GKDClUhIam_fAYbNfzAolORhSCG3ae1wW3VjWCg4PMk,2732
7
+ tonik-0.0.7.dist-info/METADATA,sha256=6DhYEfnEAWSKLEZJQQRiRF_cZAGAQFK6mLmHQEYJbuE,1918
8
+ tonik-0.0.7.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
9
+ tonik-0.0.7.dist-info/entry_points.txt,sha256=VnGfC5qAzpntEHAb5pooUEpYABSgOfQoNhCEtLDJyf8,45
10
+ tonik-0.0.7.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
11
+ tonik-0.0.7.dist-info/RECORD,,
@@ -1,11 +0,0 @@
1
- tonik/__init__.py,sha256=p97Bbz-yujI-uNmbqn1S61lq-zfF1VPaS5c1fxs1Fa8,516
2
- tonik/api.py,sha256=zTZZbpPsetoM5EH8ao8RlwR39lU1jEWpxwATC4S8Qq0,11012
3
- tonik/storage.py,sha256=F1NnRIZLq7uB7TTK1-5VHYix6_e_fl2J1FmiCFYqIJ0,10719
4
- tonik/utils.py,sha256=jEjvUNcU9nUhQZTgu8iwfPmF4U2odyOT8EJu0v1DaA0,1433
5
- tonik/xarray2hdf5.py,sha256=biQ3KVt0QrxJhOWm38FwglzYkLhPtO13G1B1vVF2c6o,4090
6
- tonik/package_data/index.html,sha256=IaPMU-A_FswjMAKA6PrHp9VFTuKJ6o_PGyjg_CTutrY,3082
7
- tonik-0.0.5.dist-info/METADATA,sha256=4VJUxgbE6FIPk9g4Uj5KlMN2qbW05us5zw3j4byWwjU,1940
8
- tonik-0.0.5.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
9
- tonik-0.0.5.dist-info/entry_points.txt,sha256=VnGfC5qAzpntEHAb5pooUEpYABSgOfQoNhCEtLDJyf8,45
10
- tonik-0.0.5.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
11
- tonik-0.0.5.dist-info/RECORD,,
File without changes