tonik 0.0.1__py3-none-any.whl → 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tonik/__init__.py CHANGED
@@ -1 +1,23 @@
1
- from .xarray2hdf5 import xarray2hdf5
1
+ import importlib
2
+ from os import PathLike
3
+ from typing import Optional
4
+
5
+ from .storage import StorageGroup, Path
6
+ from .utils import generate_test_data
7
+
8
+
9
+ def get_data(filename: Optional[PathLike] = None) -> str:
10
+ """Return path to tonik package.
11
+
12
+ Parameters
13
+ ----------
14
+ filename : Pathlike, default None
15
+ Append `filename` to returned path.
16
+
17
+ Returns
18
+ -------
19
+ pkgdir_path
20
+
21
+ """
22
+ f = importlib.resources.files(__package__)
23
+ return str(f) if filename is None else str(f / filename)
tonik/api.py ADDED
@@ -0,0 +1,276 @@
1
+ from argparse import ArgumentParser
2
+ from datetime import timedelta, datetime
3
+ import logging
4
+ import os
5
+
6
+ from cftime import num2date, date2num
7
+ import datashader as dsh
8
+ import numpy as np
9
+ import pandas as pd
10
+ import uvicorn
11
+ from fastapi import FastAPI, HTTPException
12
+ from fastapi.middleware.cors import CORSMiddleware
13
+ from fastapi.responses import HTMLResponse, StreamingResponse
14
+ from pydantic import BaseModel
15
+ from typing import List
16
+
17
+ from .storage import StorageGroup
18
+ from . import get_data
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class TonikAPI:
24
+
25
+ def __init__(self, rootdir) -> None:
26
+ self.rootdir = rootdir
27
+ self.app = FastAPI()
28
+
29
+ # -- allow any origin to query API
30
+ self.app.add_middleware(CORSMiddleware,
31
+ allow_origins=["*"])
32
+
33
+ self.app.get("/", response_class=HTMLResponse)(self.root)
34
+ self.app.get("/feature")(self.feature)
35
+
36
+ async def root(self):
37
+ with open(get_data("package_data/index.html"), "r", encoding="utf-8") as file:
38
+ html_content = file.read()
39
+ return HTMLResponse(content=html_content, status_code=200)
40
+
41
+
42
+ def feature(self,
43
+ name: str='rsam',
44
+ group: str='Ruapehu',
45
+ site: str='MAVZ',
46
+ sensor: str='10',
47
+ channel: str='HHZ',
48
+ starttime: datetime=datetime.utcnow()-timedelta(days=30),
49
+ endtime: datetime=datetime.utcnow(),
50
+ resolution: str='full',
51
+ verticalres: int=10,
52
+ log: bool=True,
53
+ normalise: bool=False):
54
+
55
+ _st = datetime.fromisoformat(str(starttime))
56
+ _st = _st.replace(tzinfo=None)
57
+ _et = datetime.fromisoformat(str(endtime))
58
+ _et = _et.replace(tzinfo=None)
59
+ g = StorageGroup(group, rootdir=self.rootdir,
60
+ starttime=_st, endtime=_et)
61
+ c = g.get_store(site=site, sensor=sensor, channel=channel)
62
+ try:
63
+ feat = c(name)
64
+ except ValueError as e:
65
+ msg = f"Feature {name} not found in directory {l.sitedir}:"
66
+ msg += f"{e}"
67
+ raise HTTPException(status_code=404, detail=msg)
68
+ if len(feat.shape) > 1:
69
+ # assume first dimension is frequency
70
+ nfreqs = feat.shape[0]
71
+ dates = feat.coords[feat.dims[1]].values
72
+ if resolution != 'full':
73
+ freq, dates, spec = self.aggregate_feature(resolution, verticalres, feat, nfreqs, dates)
74
+ else:
75
+ spec = feat.values
76
+ freq = feat.coords[feat.dims[0]].values
77
+ vals = spec.ravel(order='C')
78
+ if log and feat.name != 'sonogram':
79
+ vals = 10*np.log10(vals)
80
+ if normalise:
81
+ vals = (vals - np.nanmin(vals))/(np.nanmax(vals) - np.nanmin(vals))
82
+ freqs = freq.repeat(dates.size)
83
+ dates = np.tile(dates, freq.size)
84
+ df = pd.DataFrame({'dates': dates, 'freqs': freqs, 'feature': vals})
85
+ output = df.to_csv(index=False,
86
+ columns=['dates', 'freqs', 'feature'])
87
+ else:
88
+ df = pd.DataFrame(data=feat.to_pandas(), columns=[feat.name])
89
+ df['dates'] = df.index
90
+ try:
91
+ df = df.resample(str(float(resolution)/60000.0)+'T').mean()
92
+ except ValueError as e:
93
+ logger.warning(f"Cannot resample {feat.name} to {resolution}: e")
94
+ df.rename(columns={feat.name: 'feature'}, inplace=True)
95
+ output = df.to_csv(index=False, columns=['dates', 'feature'])
96
+ return StreamingResponse(iter([output]),
97
+ media_type='text/csv',
98
+ headers={"Content-Disposition":
99
+ "attachment;filename=<VUMT_feature>.csv",
100
+ 'Content-Length': str(len(output))})
101
+
102
+
103
+ def aggregate_feature(self, resolution, verticalres, feat, nfreqs, dates):
104
+ resolution = np.timedelta64(pd.Timedelta(resolution), 'ms').astype(float)
105
+ ndays = np.timedelta64(dates[-1] - dates[0], 'ms').astype(float)
106
+ canvas_x = int(ndays/resolution)
107
+ canvas_y = min(nfreqs, verticalres)
108
+ dates = date2num(dates.astype('datetime64[us]').astype(datetime),
109
+ units='hours since 1970-01-01 00:00:00.0',
110
+ calendar='gregorian')
111
+ feat = feat.assign_coords({'datetime': dates})
112
+ cvs = dsh.Canvas(plot_width=canvas_x,
113
+ plot_height=canvas_y)
114
+ agg = cvs.raster(source=feat)
115
+ freq_dim = feat.dims[0]
116
+ freq, d, spec = agg.coords[freq_dim].values, agg.coords['datetime'].values, agg.data
117
+ dates = num2date(d, units='hours since 1970-01-01 00:00:00.0', calendar='gregorian')
118
+ return freq,dates,spec
119
+
120
+
121
+ # #pydanticmodel output: Json file
122
+ # class Feature(BaseModel):
123
+ # name: list
124
+
125
+
126
+ # class Channel(BaseModel):
127
+ # name: str
128
+ # features: List[Feature] = []
129
+
130
+
131
+ # class Location(BaseModel):
132
+ # name: str
133
+ # channels: List[Channel] = []
134
+
135
+
136
+ # class Station(BaseModel):
137
+ # name: str
138
+ # lat: float
139
+ # lon: float
140
+ # locations: List[Location] = []
141
+
142
+
143
+ # class Group(BaseModel):
144
+ # volcano: str
145
+ # stations: List[Station] = []
146
+
147
+
148
+ # def get_pydanticModel(group, station, location, channel, feature_list):
149
+
150
+ # channels_data = {"name": channel, "features": feature_list}
151
+ # channel_models = []
152
+ # channel_model = Channel(**channels_data)
153
+ # channel_models.append(channel_model)
154
+
155
+ # location_data = {"name": location, "channels": channel_models}
156
+ # location_models = []
157
+ # location_model = Location(**location_data)
158
+ # location_models.append(location_model)
159
+
160
+ # stations_data = {"name": station, "lat": "42", "lon": "171",
161
+ # "locations": location_models}
162
+ # station_models = []
163
+ # station_model = Station(**stations_data)
164
+ # station_models.append(station_model)
165
+
166
+ # group_model = Group(group=group, stations=station_models)
167
+
168
+ # # Exporting to JSON
169
+ # json_data = group_model.json()
170
+ # return json_data
171
+
172
+
173
+ # write a function that scans LOCKERROOMROOT for
174
+ # available groups, stations, locations, channels, and features
175
+ # and returns a pydantic model
176
+ # def get_available_features():
177
+ # groups = os.listdir(ROOT)
178
+ # group_models = []
179
+ # for group in groups:
180
+ # stations = os.listdir(os.path.join(LOCKERROOMROOT, group))
181
+ # station_models = []
182
+ # for station in stations:
183
+ # locations = os.listdir(os.path.join(LOCKERROOMROOT, group, station))
184
+ # location_models = []
185
+ # for location in locations:
186
+ # channels = os.listdir(os.path.join(LOCKERROOMROOT, group, station, location))
187
+ # channel_models = []
188
+ # for channel in channels:
189
+ # features = os.listdir(os.path.join(LOCKERROOMROOT, group, station, location, channel))
190
+ # feature_list = []
191
+ # for feature in features:
192
+ # feature_list.append(feature)
193
+ # channel_data = {"name": channel, "features": feature_list}
194
+ # channel_model = Channel(**channel_data)
195
+ # channel_models.append(channel_model)
196
+ # location_data = {"name": location, "channels": channel_models}
197
+ # location_model = Location(**location_data)
198
+ # location_models.append(location_model)
199
+ # station_data = {"name": station, "lat": "42", "lon": "171", "locations": location_models}
200
+ # station_model = Station(**station_data)
201
+ # station_models.append(station_model)
202
+ # group_data = {"volcano": group, "stations": station_models}
203
+ # group_model = Group(**group_data)
204
+ # group_models.append(group_model)
205
+ # return group_models
206
+
207
+ # @app.get("/featureEndpoint")
208
+ # def featureEndpoint(group: str="all", station: str="all", channel: str="all",
209
+ # type: str="all"):
210
+ # groups = vm.get_available_volcanoes()
211
+
212
+ # station_model_list = []
213
+ # channel_model_list = []
214
+ # volcano_model_list = []
215
+ # for _volcano in volcanoes:
216
+ # streams = vm.get_available_streams(_volcano)
217
+ # for _stream in streams:
218
+ # _, _station, _, _channel = _stream.split('.')
219
+ # stream_dir = os.path.join(FEATUREDIR, _volcano, _station, _channel)
220
+ # try:
221
+ # feature_list = os.listdir(stream_dir)
222
+ # except (NotADirectoryError, FileNotFoundError):
223
+ # continue
224
+ # feature_list = sorted([str(os.path.basename(path)).split('.nc')[0] for path in feature_list])
225
+ # channels_data = {"name": _channel, "features":feature_list}
226
+ # channel_model = Channel(**channels_data)
227
+ # channel_model_list.append(channel_model)
228
+ # try:
229
+ # site_info = vm.get_site_information(_station)
230
+ # lat = site_info['latitude']
231
+ # lon = site_info['longitude']
232
+ # except:
233
+ # lat, lon = -999.9, -999.9
234
+ # stations_data = {"name": _station, "lat": lat, "lon": lon, "channels":channel_model_list}
235
+ # station_model = Station(**stations_data)
236
+ # station_model_list.append(station_model)
237
+
238
+ # volcano_model = Volcano(volcano=_volcano, stations=station_model_list)
239
+ # volcano_model_list.append(volcano_model)
240
+
241
+ # if len(volcano_model_list) == 0:
242
+ # return('no volcano')
243
+
244
+ # scenario_model = Scenario(scenario='VUMT', volcanoes=volcano_model_list)
245
+ # if volcano != "all":
246
+ # # return all stations for a volcano
247
+ # for _volcano in scenario_model.volcanoes:
248
+ # if _volcano.volcano == volcano:
249
+ # if station == "all":
250
+ # return _volcano
251
+ # for _station in _volcano.stations:
252
+ # if _station.name == station:
253
+ # if channel == "all":
254
+ # return _station
255
+ # for _channel in _station.channels:
256
+ # if _channel.name == channel:
257
+ # feature_list_filtered = []
258
+ # for _f in _channel.features:
259
+ # if _f in FeatureRequest.feat_dict[type]:
260
+ # feature_list_filtered.append(_f)
261
+ # _channel.features = feature_list_filtered
262
+ # return _channel
263
+
264
+ # return scenario_model
265
+
266
+
267
+
268
+ def main(argv=None):
269
+ parser = ArgumentParser()
270
+ parser.add_argument("--rootdir", default='/tmp')
271
+ args = parser.parse_args(argv)
272
+ ta = TonikAPI(args.rootdir)
273
+ uvicorn.run(ta.app, host="0.0.0.0", port=8003)
274
+
275
+ if __name__ == "__main__":
276
+ main()
@@ -0,0 +1,99 @@
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>SAM API Documentation</title>
7
+ <style>
8
+ body {
9
+ font-family: Arial, sans-serif;
10
+ margin: 0;
11
+ padding: 0;
12
+ background-color: #f5f5f5;
13
+ }
14
+ header {
15
+ background-color: #333;
16
+ color: white;
17
+ text-align: center;
18
+ padding: 1em;
19
+ }
20
+ section {
21
+ max-width: 800px;
22
+ margin: 2em auto;
23
+ padding: 2em;
24
+ background-color: white;
25
+ border-radius: 8px;
26
+ box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
27
+ }
28
+ h1 {
29
+ color: #333;
30
+ }
31
+ p {
32
+ line-height: 1.6;
33
+ color: #666;
34
+ }
35
+ </style>
36
+ </head>
37
+ <body>
38
+
39
+ <header>
40
+ <h1>Your API Name</h1>
41
+ <p>Seismic Acoustic Monitoring Tool (SAM) - API</p>
42
+ </header>
43
+
44
+ <section>
45
+ <h2>Overview</h2>
46
+ <p>
47
+ Welcome to the documentation for Seismic Acoustic Monitoring API. This API provides access to waveform features and
48
+ analysis results computed by SAM.
49
+ </p>
50
+
51
+ <!--h2>Authentication</h2>
52
+ <p>
53
+ To access the API, you need to authenticate using [authentication method]. Obtain your API key from [location].
54
+ </p-->
55
+
56
+ <h2>Endpoints</h2>
57
+ <p>
58
+ The endpoints available in the API are:
59
+ </p>
60
+ <ul>
61
+ <li><strong>GET /feature:</strong> Request waveform features and analysis results.</li>
62
+ <li><strong>GET /featureEndpoint:</strong> Request meta information on available stations, features and results.</li>
63
+ </ul>
64
+
65
+ <h2>Code Examples</h2>
66
+ <h3>Requesting meta data</h3>
67
+ The following will return information on available stations, features and results in JSON format.
68
+ <pre>
69
+ curl -X GET "http://your.host.server:yourport/featureEndpoint"
70
+ </pre>
71
+ To do the same with Python using requests you can use the following code:
72
+ <pre>
73
+ import requests
74
+ url = "http://your.host.server:yourport/featureEndpoint"
75
+ response = requests.get(url)
76
+ response.json()
77
+ </pre>
78
+ <h3>Requesting data</h3>
79
+ The following example shows how to request RSAM data for station WIZ at volcano Whakaari
80
+ between 2019-12-01 and 2019-12-31. The return format is CSV.
81
+ <pre>
82
+ curl -X GET "http://your.host.server:yourport/feature?name=rsam&starttime=2019-12-01T00:00:00&endtime=2019-12-31T00:00:00&volcano=Whakaari&site=WIZ"
83
+ </pre>
84
+ To do the same with Python using pandas you can use the following code:
85
+ <pre>
86
+ import pandas as pd
87
+ feature="rsam"
88
+ starttime="2019-12-01T00:00:00"
89
+ endtime="2019-12-31T00:00:00"
90
+ volcano="Whakaari"
91
+ site="WIZ"
92
+ url = f"http://your.host.server:yourport/feature?name={feature}&starttime={starttime}&endtime={endtime}&volcano={volcano}&site={site}"
93
+ pd.read_csv(url, parse_dates=True, index_col=0)
94
+ </pre>
95
+ </section>
96
+ </section>
97
+
98
+ </body>
99
+ </html>
tonik/storage.py ADDED
@@ -0,0 +1,299 @@
1
+ from datetime import datetime, timedelta
2
+ import glob
3
+ import logging
4
+ import logging.config
5
+ import os
6
+ import re
7
+ import tempfile
8
+
9
+ import pandas as pd
10
+ import xarray as xr
11
+
12
+ from .xarray2hdf5 import xarray2hdf5
13
+
14
+
15
+ ERROR_LOG_FILENAME = "tonik.log"
16
+
17
+ LOGGING_CONFIG = {
18
+ "version": 1,
19
+ "disable_existing_loggers": False,
20
+ "formatters": {
21
+ "default": { # The formatter name, it can be anything that I wish
22
+ "format": "%(asctime)s:%(name)s:%(process)d:%(lineno)d " "%(levelname)s %(message)s", # What to add in the message
23
+ "datefmt": "%Y-%m-%d %H:%M:%S", # How to display dates
24
+ },
25
+ "json": { # The formatter name
26
+ "()": "pythonjsonlogger.jsonlogger.JsonFormatter", # The class to instantiate!
27
+ # Json is more complex, but easier to read, display all attributes!
28
+ "format": """
29
+ asctime: %(asctime)s
30
+ created: %(created)f
31
+ filename: %(filename)s
32
+ funcName: %(funcName)s
33
+ levelname: %(levelname)s
34
+ levelno: %(levelno)s
35
+ lineno: %(lineno)d
36
+ message: %(message)s
37
+ module: %(module)s
38
+ msec: %(msecs)d
39
+ name: %(name)s
40
+ pathname: %(pathname)s
41
+ process: %(process)d
42
+ processName: %(processName)s
43
+ relativeCreated: %(relativeCreated)d
44
+ thread: %(thread)d
45
+ threadName: %(threadName)s
46
+ exc_info: %(exc_info)s
47
+ """,
48
+ "datefmt": "%Y-%m-%d %H:%M:%S", # How to display dates
49
+ },
50
+ },
51
+ "handlers": {
52
+ "logfile": { # The handler name
53
+ "formatter": "json", # Refer to the formatter defined above
54
+ "level": "ERROR", # FILTER: Only ERROR and CRITICAL logs
55
+ "class": "logging.handlers.RotatingFileHandler", # OUTPUT: Which class to use
56
+ "filename": ERROR_LOG_FILENAME, # Param for class above. Defines filename to use, load it from constant
57
+ "backupCount": 2, # Param for class above. Defines how many log files to keep as it grows
58
+ },
59
+ "simple": { # The handler name
60
+ "formatter": "default", # Refer to the formatter defined above
61
+ "class": "logging.StreamHandler", # OUTPUT: Same as above, stream to console
62
+ "stream": "ext://sys.stdout",
63
+ },
64
+ },
65
+ "loggers": {
66
+ "zizou": { # The name of the logger, this SHOULD match your module!
67
+ "level": "DEBUG", # FILTER: only INFO logs onwards from "tryceratops" logger
68
+ "handlers": [
69
+ "simple", # Refer the handler defined above
70
+ ],
71
+ },
72
+ },
73
+ "root": {
74
+ "level": "ERROR", # FILTER: only INFO logs onwards
75
+ "handlers": [
76
+ "logfile", # Refer the handler defined above
77
+ ]
78
+ },
79
+ }
80
+
81
+ logging.config.dictConfig(LOGGING_CONFIG)
82
+ logger = logging.getLogger("__name__")
83
+
84
+
85
+ class Path(object):
86
+ def __init__(self, name, parentdir):
87
+ self.name = name
88
+ self.path = os.path.join(parentdir, name)
89
+ os.makedirs(self.path, exist_ok=True)
90
+ self.children = {}
91
+
92
+ def __str__(self):
93
+ return self.path
94
+
95
+ def __getitem__(self, key):
96
+ try:
97
+ return self.children[key]
98
+ except KeyError:
99
+ self.children[key] = Path(key, self.path)
100
+ return self.children[key]
101
+
102
+ def feature_path(self, feature):
103
+ _feature_path = os.path.join(self.path, feature + ".nc")
104
+ if not os.path.exists(_feature_path):
105
+ raise FileNotFoundError(f"File {_feature_path} not found")
106
+ self.children[feature] = _feature_path
107
+ return _feature_path
108
+
109
+ def __call__(self, feature, stack_length=None, interval='10min'):
110
+ """
111
+ Request a particular feature
112
+
113
+ :param feature: Feature name
114
+ :type feature: str
115
+ :param stack_length: length of moving average in time
116
+ :type stack_length: str
117
+
118
+ """
119
+ if self.endtime <= self.starttime:
120
+ raise ValueError('Startime has to be smaller than endtime.')
121
+
122
+ feature = feature.lower()
123
+ filename = self.feature_path(feature)
124
+
125
+ logger.debug(f"Reading feature {feature} between {self.starttime} and {self.endtime}")
126
+ num_periods = None
127
+ if stack_length is not None:
128
+ valid_stack_units = ['W', 'D', 'H', 'T', 'min', 'S']
129
+ if not re.match(r'\d*\s*(\w*)', stack_length).group(1)\
130
+ in valid_stack_units:
131
+ raise ValueError(
132
+ 'Stack length should be one of: {}'.
133
+ format(', '.join(valid_stack_units))
134
+ )
135
+
136
+ if pd.to_timedelta(stack_length) < pd.to_timedelta(interval):
137
+ raise ValueError('Stack length {} is less than interval {}'.
138
+ format(stack_length, interval))
139
+
140
+ # Rewind starttime to account for stack length
141
+ self.starttime -= pd.to_timedelta(stack_length)
142
+
143
+ num_periods = (pd.to_timedelta(stack_length)/
144
+ pd.to_timedelta(interval))
145
+ if not num_periods.is_integer():
146
+ raise ValueError(
147
+ 'Stack length {} / interval {} = {}, but it needs'
148
+ ' to be a whole number'.
149
+ format(stack_length, interval, num_periods))
150
+
151
+ xd_index = dict(datetime=slice(self.starttime, self.endtime))
152
+ with xr.open_dataset(filename, group='original', engine='h5netcdf') as ds:
153
+ ds.sortby("datetime")
154
+ rq = ds.loc[xd_index].load()
155
+
156
+ # Stack features
157
+ if stack_length is not None:
158
+ logger.debug("Stacking feature...")
159
+ try:
160
+ xdf = rq[feature].rolling(datetime=int(num_periods),
161
+ center=False,
162
+ min_periods=1).mean()
163
+ # Return requested timeframe to that defined in initialisation
164
+ self.starttime += pd.to_timedelta(stack_length)
165
+ xdf_new = xdf.loc[
166
+ self.starttime:
167
+ self.endtime-pd.to_timedelta(interval)]
168
+ xdf_new = xdf_new.rename(feature)
169
+ except ValueError as e:
170
+ logger.error(e)
171
+ logger.error('Stack length {} is not valid for feature {}'.
172
+ format(stack_length, feature))
173
+ else:
174
+ return xdf_new
175
+
176
+ return rq[feature]
177
+
178
+ def load(self, *args, **kwargs):
179
+ """
180
+ Load a feature from disk
181
+ """
182
+ self.__call__(*args, **kwargs)
183
+
184
+ def save(self, data):
185
+ """
186
+ Save a feature to disk
187
+ """
188
+ xarray2hdf5(data, self.path)
189
+
190
+
191
+ class StorageGroup(Path):
192
+ """
193
+ Query computed features
194
+
195
+ :param rootdir: Path to parent directory.
196
+ :type rootdir: str
197
+ :param starttime: Begin of request
198
+ :type starttime: :class:`datetime.datetime`
199
+ :param endtime: Begin of request
200
+ :type endtime: :class:`datetime.datetime`
201
+
202
+ >>> import datetime
203
+ >>> g = Group('Whakaari')
204
+ >>> start = datetime.datetime(2012,1,1,0,0,0)
205
+ >>> end = datetime.datetime(2012,1,2,23,59,59)
206
+ >>> g.starttime = start
207
+ >>> g.endtime = end
208
+ >>> c = g.channel(site='WIZ', sensor='00', channel='HHZ')
209
+ >>> rsam = c("rsam")
210
+ """
211
+ def __init__(self, name, rootdir=None, starttime=None, endtime=None):
212
+ self.stores = set()
213
+ self.starttime = starttime
214
+ self.endtime = endtime
215
+ super().__init__(name, rootdir)
216
+
217
+ def __repr__(self):
218
+ rstr = f"Group: {self.name}\n"
219
+ last_site = False
220
+ for j, site in enumerate(self.children.values()):
221
+ if j == len(self.children) - 1:
222
+ last_site = True
223
+ rstr += f"|__ {site.name}\n"
224
+ last_sensor = False
225
+ for i, sensor in enumerate(site.children.values()):
226
+ if i == len(site.children) - 1:
227
+ last_sensor = True
228
+ rstr += (" " if last_site else "|") + f" |__ {sensor.name}\n"
229
+ for k, channel in enumerate(sensor.children.values()):
230
+ rstr += (" " if last_site else "| ")
231
+ rstr += (" " if last_sensor else "| ")
232
+ rstr += f"|__ {channel.name}\n"
233
+ return rstr
234
+
235
+ def get_store(self, site, sensor, channel):
236
+ # return the store for a given site, sensor, or channel
237
+ # if one of them is None return the store for the level above
238
+ # if all are None return the root store
239
+ try:
240
+ st = self[site][sensor][channel]
241
+ except:
242
+ try:
243
+ st = self[site][sensor]
244
+ except:
245
+ try:
246
+ st = self[site]
247
+ except:
248
+ return self
249
+
250
+ st.starttime = self.starttime
251
+ st.endtime = self.endtime
252
+ self.stores.add(st)
253
+ return st
254
+
255
+ def from_directory(self):
256
+ feature_files = glob.glob(os.path.join(self.path, '**', '*.nc'),
257
+ recursive=True)
258
+ for _f in feature_files:
259
+ subdir = _f.split(self.path)[1].strip(os.sep)
260
+ # split the path into parts
261
+ # get the subdirectories
262
+ site, sensor, channel, ffile = subdir.split(os.sep)
263
+ fname = ffile.strip('.nc')
264
+ c = self.get_store(site, sensor, channel)
265
+
266
+ def get_starttime(self):
267
+ return self.__starttime
268
+
269
+ def set_starttime(self, time):
270
+ if time is None:
271
+ self.__starttime = None
272
+ self.__sdate = None
273
+ return
274
+ self.__starttime = time
275
+ self.__sdate = '{}{:02d}{:02d}'.format(time.year,
276
+ time.month,
277
+ time.day)
278
+ for s in self.stores:
279
+ s.starttime = time
280
+
281
+ def get_endtime(self):
282
+ return self.__endtime
283
+
284
+ def set_endtime(self, time):
285
+ if time is None:
286
+ self.__endtime = None
287
+ self.__edate = None
288
+ return
289
+ self.__endtime = time
290
+ self.__edate = '{}{:02d}{:02d}'.format(time.year,
291
+ time.month,
292
+ time.day)
293
+ for s in self.stores:
294
+ s.endtime = time
295
+
296
+
297
+ starttime = property(get_starttime, set_starttime)
298
+ endtime = property(get_endtime, set_endtime)
299
+
tonik/utils.py ADDED
@@ -0,0 +1,43 @@
1
+ from datetime import datetime
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+ import xarray as xr
6
+
7
+
8
+ def generate_test_data(dim=1, ndays=30, nfreqs=10,
9
+ tstart=datetime.utcnow(),
10
+ feature_name=None,
11
+ freq_name=None):
12
+ """
13
+ Generate a 1D or 2D feature for testing.
14
+ """
15
+ assert dim < 3
16
+ assert dim > 0
17
+
18
+ nints = ndays * 6 * 24
19
+ dates = pd.date_range(tstart.strftime('%Y-%m-%d'), freq='10min', periods=nints)
20
+ rs = np.random.default_rng(42)
21
+ # Random walk as test signal
22
+ data = np.abs(np.cumsum(rs.normal(0, 8., len(dates))))
23
+ if dim == 2:
24
+ data = np.tile(data, (nfreqs, 1))
25
+ # Add 10% NaNs
26
+ idx_nan = rs.integers(0, nints-1, int(0.1*nints))
27
+ if dim == 1:
28
+ data[idx_nan] = np.nan
29
+ if feature_name is None:
30
+ feature_name = 'rsam'
31
+ xrd = xr.Dataset({feature_name: xr.DataArray(data, coords=[dates], dims=['datetime'])})
32
+ if dim == 2:
33
+ data[:, idx_nan] = np.nan
34
+ freqs = np.arange(nfreqs)
35
+ if feature_name is None:
36
+ feature_name = 'ssam'
37
+ if freq_name is None:
38
+ freq_name = 'frequency'
39
+ xrd = xr.Dataset({feature_name: xr.DataArray(data, coords=[freqs, dates], dims=[freq_name, 'datetime'])})
40
+ xrd.attrs['starttime'] = dates[0].isoformat()
41
+ xrd.attrs['endtime'] = dates[-1].isoformat()
42
+ xrd.attrs['station'] = 'MDR'
43
+ return xrd
tonik/xarray2hdf5.py CHANGED
@@ -3,10 +3,9 @@ import logging
3
3
  import os
4
4
  from warnings import filterwarnings
5
5
 
6
- from cftime import num2date, date2num, date2index
6
+ from cftime import num2date, date2num
7
7
  import h5netcdf
8
8
  import numpy as np
9
- import xarray as xr
10
9
 
11
10
 
12
11
  def xarray2hdf5(xArray, fdir, rootGroupName="original", timedim="datetime"):
@@ -1,8 +1,8 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.3
2
2
  Name: tonik
3
- Version: 0.0.1
3
+ Version: 0.0.3
4
4
  Summary: A collection of tools to integrate with GNS Science's time series classification platform.
5
- Project-URL: Homepage, https://github.com/tsc-tools/tonik
5
+ Project-URL: Homepage, https://tsc-tools.github.io/tonik.github.io
6
6
  Project-URL: Issues, https://github.com/tsc-tools/tonik/issues
7
7
  Author-email: Yannik Behr <y.behr@gns.cri.nz>, Christof Mueller <c.mueller@gns.cri.nz>
8
8
  License-File: LICENSE
@@ -10,12 +10,17 @@ Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
10
10
  Classifier: Operating System :: OS Independent
11
11
  Classifier: Programming Language :: Python :: 3
12
12
  Requires-Python: >=3.7
13
- Requires-Dist: h5netcdf
14
- Requires-Dist: h5py
15
- Requires-Dist: netcdf4
16
- Requires-Dist: pandas
17
- Requires-Dist: pytest
18
- Requires-Dist: xarray
13
+ Requires-Dist: datashader>=0.14
14
+ Requires-Dist: fastapi>=0.95
15
+ Requires-Dist: h5netcdf>=1.1
16
+ Requires-Dist: h5py>=3.8
17
+ Requires-Dist: netcdf4>=1.6
18
+ Requires-Dist: pandas>=2.0
19
+ Requires-Dist: python-json-logger>=2.0
20
+ Requires-Dist: uvicorn[standard]>=0.22
21
+ Requires-Dist: xarray>=2023.4
22
+ Provides-Extra: dev
23
+ Requires-Dist: pytest; extra == 'dev'
19
24
  Description-Content-Type: text/markdown
20
25
 
21
26
  # Time series classification tools
@@ -0,0 +1,11 @@
1
+ tonik/__init__.py,sha256=p97Bbz-yujI-uNmbqn1S61lq-zfF1VPaS5c1fxs1Fa8,516
2
+ tonik/api.py,sha256=zTZZbpPsetoM5EH8ao8RlwR39lU1jEWpxwATC4S8Qq0,11012
3
+ tonik/storage.py,sha256=F1NnRIZLq7uB7TTK1-5VHYix6_e_fl2J1FmiCFYqIJ0,10719
4
+ tonik/utils.py,sha256=jEjvUNcU9nUhQZTgu8iwfPmF4U2odyOT8EJu0v1DaA0,1433
5
+ tonik/xarray2hdf5.py,sha256=biQ3KVt0QrxJhOWm38FwglzYkLhPtO13G1B1vVF2c6o,4090
6
+ tonik/package_data/index.html,sha256=IaPMU-A_FswjMAKA6PrHp9VFTuKJ6o_PGyjg_CTutrY,3082
7
+ tonik-0.0.3.dist-info/METADATA,sha256=9MLYvdxjaYM0okuTgI2JhOFwiEV_Gir-7N6PnI2VTeo,1085
8
+ tonik-0.0.3.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
9
+ tonik-0.0.3.dist-info/entry_points.txt,sha256=VnGfC5qAzpntEHAb5pooUEpYABSgOfQoNhCEtLDJyf8,45
10
+ tonik-0.0.3.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
11
+ tonik-0.0.3.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.17.1
2
+ Generator: hatchling 1.25.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ tonik_api = tonik.api:main
@@ -1,6 +0,0 @@
1
- tonik/__init__.py,sha256=d7gnshn92xGI-U7YTs7Q5cMWjvLW45m-EXJ5IcYaZrs,36
2
- tonik/xarray2hdf5.py,sha256=sImRJ80EQ3yI_7xJg34VfS8SSIzkDtRHda3Mg959xPs,4122
3
- tonik-0.0.1.dist-info/METADATA,sha256=KS91xVocqwwtwLKprecpfp0JU15s-1xERkMqKBSpPS4,873
4
- tonik-0.0.1.dist-info/WHEEL,sha256=KGYbc1zXlYddvwxnNty23BeaKzh7YuoSIvIMO4jEhvw,87
5
- tonik-0.0.1.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
6
- tonik-0.0.1.dist-info/RECORD,,