tonik 0.0.1__py3-none-any.whl → 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tonik/__init__.py +23 -1
- tonik/api.py +276 -0
- tonik/package_data/index.html +99 -0
- tonik/storage.py +299 -0
- tonik/utils.py +43 -0
- tonik/xarray2hdf5.py +1 -2
- {tonik-0.0.1.dist-info → tonik-0.0.3.dist-info}/METADATA +14 -9
- tonik-0.0.3.dist-info/RECORD +11 -0
- {tonik-0.0.1.dist-info → tonik-0.0.3.dist-info}/WHEEL +1 -1
- tonik-0.0.3.dist-info/entry_points.txt +2 -0
- tonik-0.0.1.dist-info/RECORD +0 -6
- {tonik-0.0.1.dist-info → tonik-0.0.3.dist-info}/licenses/LICENSE +0 -0
tonik/__init__.py
CHANGED
|
@@ -1 +1,23 @@
|
|
|
1
|
-
|
|
1
|
+
import importlib
|
|
2
|
+
from os import PathLike
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from .storage import StorageGroup, Path
|
|
6
|
+
from .utils import generate_test_data
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def get_data(filename: Optional[PathLike] = None) -> str:
|
|
10
|
+
"""Return path to tonik package.
|
|
11
|
+
|
|
12
|
+
Parameters
|
|
13
|
+
----------
|
|
14
|
+
filename : Pathlike, default None
|
|
15
|
+
Append `filename` to returned path.
|
|
16
|
+
|
|
17
|
+
Returns
|
|
18
|
+
-------
|
|
19
|
+
pkgdir_path
|
|
20
|
+
|
|
21
|
+
"""
|
|
22
|
+
f = importlib.resources.files(__package__)
|
|
23
|
+
return str(f) if filename is None else str(f / filename)
|
tonik/api.py
ADDED
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
from argparse import ArgumentParser
|
|
2
|
+
from datetime import timedelta, datetime
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
|
|
6
|
+
from cftime import num2date, date2num
|
|
7
|
+
import datashader as dsh
|
|
8
|
+
import numpy as np
|
|
9
|
+
import pandas as pd
|
|
10
|
+
import uvicorn
|
|
11
|
+
from fastapi import FastAPI, HTTPException
|
|
12
|
+
from fastapi.middleware.cors import CORSMiddleware
|
|
13
|
+
from fastapi.responses import HTMLResponse, StreamingResponse
|
|
14
|
+
from pydantic import BaseModel
|
|
15
|
+
from typing import List
|
|
16
|
+
|
|
17
|
+
from .storage import StorageGroup
|
|
18
|
+
from . import get_data
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class TonikAPI:
|
|
24
|
+
|
|
25
|
+
def __init__(self, rootdir) -> None:
|
|
26
|
+
self.rootdir = rootdir
|
|
27
|
+
self.app = FastAPI()
|
|
28
|
+
|
|
29
|
+
# -- allow any origin to query API
|
|
30
|
+
self.app.add_middleware(CORSMiddleware,
|
|
31
|
+
allow_origins=["*"])
|
|
32
|
+
|
|
33
|
+
self.app.get("/", response_class=HTMLResponse)(self.root)
|
|
34
|
+
self.app.get("/feature")(self.feature)
|
|
35
|
+
|
|
36
|
+
async def root(self):
|
|
37
|
+
with open(get_data("package_data/index.html"), "r", encoding="utf-8") as file:
|
|
38
|
+
html_content = file.read()
|
|
39
|
+
return HTMLResponse(content=html_content, status_code=200)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def feature(self,
|
|
43
|
+
name: str='rsam',
|
|
44
|
+
group: str='Ruapehu',
|
|
45
|
+
site: str='MAVZ',
|
|
46
|
+
sensor: str='10',
|
|
47
|
+
channel: str='HHZ',
|
|
48
|
+
starttime: datetime=datetime.utcnow()-timedelta(days=30),
|
|
49
|
+
endtime: datetime=datetime.utcnow(),
|
|
50
|
+
resolution: str='full',
|
|
51
|
+
verticalres: int=10,
|
|
52
|
+
log: bool=True,
|
|
53
|
+
normalise: bool=False):
|
|
54
|
+
|
|
55
|
+
_st = datetime.fromisoformat(str(starttime))
|
|
56
|
+
_st = _st.replace(tzinfo=None)
|
|
57
|
+
_et = datetime.fromisoformat(str(endtime))
|
|
58
|
+
_et = _et.replace(tzinfo=None)
|
|
59
|
+
g = StorageGroup(group, rootdir=self.rootdir,
|
|
60
|
+
starttime=_st, endtime=_et)
|
|
61
|
+
c = g.get_store(site=site, sensor=sensor, channel=channel)
|
|
62
|
+
try:
|
|
63
|
+
feat = c(name)
|
|
64
|
+
except ValueError as e:
|
|
65
|
+
msg = f"Feature {name} not found in directory {l.sitedir}:"
|
|
66
|
+
msg += f"{e}"
|
|
67
|
+
raise HTTPException(status_code=404, detail=msg)
|
|
68
|
+
if len(feat.shape) > 1:
|
|
69
|
+
# assume first dimension is frequency
|
|
70
|
+
nfreqs = feat.shape[0]
|
|
71
|
+
dates = feat.coords[feat.dims[1]].values
|
|
72
|
+
if resolution != 'full':
|
|
73
|
+
freq, dates, spec = self.aggregate_feature(resolution, verticalres, feat, nfreqs, dates)
|
|
74
|
+
else:
|
|
75
|
+
spec = feat.values
|
|
76
|
+
freq = feat.coords[feat.dims[0]].values
|
|
77
|
+
vals = spec.ravel(order='C')
|
|
78
|
+
if log and feat.name != 'sonogram':
|
|
79
|
+
vals = 10*np.log10(vals)
|
|
80
|
+
if normalise:
|
|
81
|
+
vals = (vals - np.nanmin(vals))/(np.nanmax(vals) - np.nanmin(vals))
|
|
82
|
+
freqs = freq.repeat(dates.size)
|
|
83
|
+
dates = np.tile(dates, freq.size)
|
|
84
|
+
df = pd.DataFrame({'dates': dates, 'freqs': freqs, 'feature': vals})
|
|
85
|
+
output = df.to_csv(index=False,
|
|
86
|
+
columns=['dates', 'freqs', 'feature'])
|
|
87
|
+
else:
|
|
88
|
+
df = pd.DataFrame(data=feat.to_pandas(), columns=[feat.name])
|
|
89
|
+
df['dates'] = df.index
|
|
90
|
+
try:
|
|
91
|
+
df = df.resample(str(float(resolution)/60000.0)+'T').mean()
|
|
92
|
+
except ValueError as e:
|
|
93
|
+
logger.warning(f"Cannot resample {feat.name} to {resolution}: e")
|
|
94
|
+
df.rename(columns={feat.name: 'feature'}, inplace=True)
|
|
95
|
+
output = df.to_csv(index=False, columns=['dates', 'feature'])
|
|
96
|
+
return StreamingResponse(iter([output]),
|
|
97
|
+
media_type='text/csv',
|
|
98
|
+
headers={"Content-Disposition":
|
|
99
|
+
"attachment;filename=<VUMT_feature>.csv",
|
|
100
|
+
'Content-Length': str(len(output))})
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def aggregate_feature(self, resolution, verticalres, feat, nfreqs, dates):
|
|
104
|
+
resolution = np.timedelta64(pd.Timedelta(resolution), 'ms').astype(float)
|
|
105
|
+
ndays = np.timedelta64(dates[-1] - dates[0], 'ms').astype(float)
|
|
106
|
+
canvas_x = int(ndays/resolution)
|
|
107
|
+
canvas_y = min(nfreqs, verticalres)
|
|
108
|
+
dates = date2num(dates.astype('datetime64[us]').astype(datetime),
|
|
109
|
+
units='hours since 1970-01-01 00:00:00.0',
|
|
110
|
+
calendar='gregorian')
|
|
111
|
+
feat = feat.assign_coords({'datetime': dates})
|
|
112
|
+
cvs = dsh.Canvas(plot_width=canvas_x,
|
|
113
|
+
plot_height=canvas_y)
|
|
114
|
+
agg = cvs.raster(source=feat)
|
|
115
|
+
freq_dim = feat.dims[0]
|
|
116
|
+
freq, d, spec = agg.coords[freq_dim].values, agg.coords['datetime'].values, agg.data
|
|
117
|
+
dates = num2date(d, units='hours since 1970-01-01 00:00:00.0', calendar='gregorian')
|
|
118
|
+
return freq,dates,spec
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
# #pydanticmodel output: Json file
|
|
122
|
+
# class Feature(BaseModel):
|
|
123
|
+
# name: list
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
# class Channel(BaseModel):
|
|
127
|
+
# name: str
|
|
128
|
+
# features: List[Feature] = []
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
# class Location(BaseModel):
|
|
132
|
+
# name: str
|
|
133
|
+
# channels: List[Channel] = []
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
# class Station(BaseModel):
|
|
137
|
+
# name: str
|
|
138
|
+
# lat: float
|
|
139
|
+
# lon: float
|
|
140
|
+
# locations: List[Location] = []
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
# class Group(BaseModel):
|
|
144
|
+
# volcano: str
|
|
145
|
+
# stations: List[Station] = []
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
# def get_pydanticModel(group, station, location, channel, feature_list):
|
|
149
|
+
|
|
150
|
+
# channels_data = {"name": channel, "features": feature_list}
|
|
151
|
+
# channel_models = []
|
|
152
|
+
# channel_model = Channel(**channels_data)
|
|
153
|
+
# channel_models.append(channel_model)
|
|
154
|
+
|
|
155
|
+
# location_data = {"name": location, "channels": channel_models}
|
|
156
|
+
# location_models = []
|
|
157
|
+
# location_model = Location(**location_data)
|
|
158
|
+
# location_models.append(location_model)
|
|
159
|
+
|
|
160
|
+
# stations_data = {"name": station, "lat": "42", "lon": "171",
|
|
161
|
+
# "locations": location_models}
|
|
162
|
+
# station_models = []
|
|
163
|
+
# station_model = Station(**stations_data)
|
|
164
|
+
# station_models.append(station_model)
|
|
165
|
+
|
|
166
|
+
# group_model = Group(group=group, stations=station_models)
|
|
167
|
+
|
|
168
|
+
# # Exporting to JSON
|
|
169
|
+
# json_data = group_model.json()
|
|
170
|
+
# return json_data
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
# write a function that scans LOCKERROOMROOT for
|
|
174
|
+
# available groups, stations, locations, channels, and features
|
|
175
|
+
# and returns a pydantic model
|
|
176
|
+
# def get_available_features():
|
|
177
|
+
# groups = os.listdir(ROOT)
|
|
178
|
+
# group_models = []
|
|
179
|
+
# for group in groups:
|
|
180
|
+
# stations = os.listdir(os.path.join(LOCKERROOMROOT, group))
|
|
181
|
+
# station_models = []
|
|
182
|
+
# for station in stations:
|
|
183
|
+
# locations = os.listdir(os.path.join(LOCKERROOMROOT, group, station))
|
|
184
|
+
# location_models = []
|
|
185
|
+
# for location in locations:
|
|
186
|
+
# channels = os.listdir(os.path.join(LOCKERROOMROOT, group, station, location))
|
|
187
|
+
# channel_models = []
|
|
188
|
+
# for channel in channels:
|
|
189
|
+
# features = os.listdir(os.path.join(LOCKERROOMROOT, group, station, location, channel))
|
|
190
|
+
# feature_list = []
|
|
191
|
+
# for feature in features:
|
|
192
|
+
# feature_list.append(feature)
|
|
193
|
+
# channel_data = {"name": channel, "features": feature_list}
|
|
194
|
+
# channel_model = Channel(**channel_data)
|
|
195
|
+
# channel_models.append(channel_model)
|
|
196
|
+
# location_data = {"name": location, "channels": channel_models}
|
|
197
|
+
# location_model = Location(**location_data)
|
|
198
|
+
# location_models.append(location_model)
|
|
199
|
+
# station_data = {"name": station, "lat": "42", "lon": "171", "locations": location_models}
|
|
200
|
+
# station_model = Station(**station_data)
|
|
201
|
+
# station_models.append(station_model)
|
|
202
|
+
# group_data = {"volcano": group, "stations": station_models}
|
|
203
|
+
# group_model = Group(**group_data)
|
|
204
|
+
# group_models.append(group_model)
|
|
205
|
+
# return group_models
|
|
206
|
+
|
|
207
|
+
# @app.get("/featureEndpoint")
|
|
208
|
+
# def featureEndpoint(group: str="all", station: str="all", channel: str="all",
|
|
209
|
+
# type: str="all"):
|
|
210
|
+
# groups = vm.get_available_volcanoes()
|
|
211
|
+
|
|
212
|
+
# station_model_list = []
|
|
213
|
+
# channel_model_list = []
|
|
214
|
+
# volcano_model_list = []
|
|
215
|
+
# for _volcano in volcanoes:
|
|
216
|
+
# streams = vm.get_available_streams(_volcano)
|
|
217
|
+
# for _stream in streams:
|
|
218
|
+
# _, _station, _, _channel = _stream.split('.')
|
|
219
|
+
# stream_dir = os.path.join(FEATUREDIR, _volcano, _station, _channel)
|
|
220
|
+
# try:
|
|
221
|
+
# feature_list = os.listdir(stream_dir)
|
|
222
|
+
# except (NotADirectoryError, FileNotFoundError):
|
|
223
|
+
# continue
|
|
224
|
+
# feature_list = sorted([str(os.path.basename(path)).split('.nc')[0] for path in feature_list])
|
|
225
|
+
# channels_data = {"name": _channel, "features":feature_list}
|
|
226
|
+
# channel_model = Channel(**channels_data)
|
|
227
|
+
# channel_model_list.append(channel_model)
|
|
228
|
+
# try:
|
|
229
|
+
# site_info = vm.get_site_information(_station)
|
|
230
|
+
# lat = site_info['latitude']
|
|
231
|
+
# lon = site_info['longitude']
|
|
232
|
+
# except:
|
|
233
|
+
# lat, lon = -999.9, -999.9
|
|
234
|
+
# stations_data = {"name": _station, "lat": lat, "lon": lon, "channels":channel_model_list}
|
|
235
|
+
# station_model = Station(**stations_data)
|
|
236
|
+
# station_model_list.append(station_model)
|
|
237
|
+
|
|
238
|
+
# volcano_model = Volcano(volcano=_volcano, stations=station_model_list)
|
|
239
|
+
# volcano_model_list.append(volcano_model)
|
|
240
|
+
|
|
241
|
+
# if len(volcano_model_list) == 0:
|
|
242
|
+
# return('no volcano')
|
|
243
|
+
|
|
244
|
+
# scenario_model = Scenario(scenario='VUMT', volcanoes=volcano_model_list)
|
|
245
|
+
# if volcano != "all":
|
|
246
|
+
# # return all stations for a volcano
|
|
247
|
+
# for _volcano in scenario_model.volcanoes:
|
|
248
|
+
# if _volcano.volcano == volcano:
|
|
249
|
+
# if station == "all":
|
|
250
|
+
# return _volcano
|
|
251
|
+
# for _station in _volcano.stations:
|
|
252
|
+
# if _station.name == station:
|
|
253
|
+
# if channel == "all":
|
|
254
|
+
# return _station
|
|
255
|
+
# for _channel in _station.channels:
|
|
256
|
+
# if _channel.name == channel:
|
|
257
|
+
# feature_list_filtered = []
|
|
258
|
+
# for _f in _channel.features:
|
|
259
|
+
# if _f in FeatureRequest.feat_dict[type]:
|
|
260
|
+
# feature_list_filtered.append(_f)
|
|
261
|
+
# _channel.features = feature_list_filtered
|
|
262
|
+
# return _channel
|
|
263
|
+
|
|
264
|
+
# return scenario_model
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
def main(argv=None):
|
|
269
|
+
parser = ArgumentParser()
|
|
270
|
+
parser.add_argument("--rootdir", default='/tmp')
|
|
271
|
+
args = parser.parse_args(argv)
|
|
272
|
+
ta = TonikAPI(args.rootdir)
|
|
273
|
+
uvicorn.run(ta.app, host="0.0.0.0", port=8003)
|
|
274
|
+
|
|
275
|
+
if __name__ == "__main__":
|
|
276
|
+
main()
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
<!DOCTYPE html>
|
|
2
|
+
<html lang="en">
|
|
3
|
+
<head>
|
|
4
|
+
<meta charset="UTF-8">
|
|
5
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
6
|
+
<title>SAM API Documentation</title>
|
|
7
|
+
<style>
|
|
8
|
+
body {
|
|
9
|
+
font-family: Arial, sans-serif;
|
|
10
|
+
margin: 0;
|
|
11
|
+
padding: 0;
|
|
12
|
+
background-color: #f5f5f5;
|
|
13
|
+
}
|
|
14
|
+
header {
|
|
15
|
+
background-color: #333;
|
|
16
|
+
color: white;
|
|
17
|
+
text-align: center;
|
|
18
|
+
padding: 1em;
|
|
19
|
+
}
|
|
20
|
+
section {
|
|
21
|
+
max-width: 800px;
|
|
22
|
+
margin: 2em auto;
|
|
23
|
+
padding: 2em;
|
|
24
|
+
background-color: white;
|
|
25
|
+
border-radius: 8px;
|
|
26
|
+
box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
|
|
27
|
+
}
|
|
28
|
+
h1 {
|
|
29
|
+
color: #333;
|
|
30
|
+
}
|
|
31
|
+
p {
|
|
32
|
+
line-height: 1.6;
|
|
33
|
+
color: #666;
|
|
34
|
+
}
|
|
35
|
+
</style>
|
|
36
|
+
</head>
|
|
37
|
+
<body>
|
|
38
|
+
|
|
39
|
+
<header>
|
|
40
|
+
<h1>Your API Name</h1>
|
|
41
|
+
<p>Seismic Acoustic Monitoring Tool (SAM) - API</p>
|
|
42
|
+
</header>
|
|
43
|
+
|
|
44
|
+
<section>
|
|
45
|
+
<h2>Overview</h2>
|
|
46
|
+
<p>
|
|
47
|
+
Welcome to the documentation for Seismic Acoustic Monitoring API. This API provides access to waveform features and
|
|
48
|
+
analysis results computed by SAM.
|
|
49
|
+
</p>
|
|
50
|
+
|
|
51
|
+
<!--h2>Authentication</h2>
|
|
52
|
+
<p>
|
|
53
|
+
To access the API, you need to authenticate using [authentication method]. Obtain your API key from [location].
|
|
54
|
+
</p-->
|
|
55
|
+
|
|
56
|
+
<h2>Endpoints</h2>
|
|
57
|
+
<p>
|
|
58
|
+
The endpoints available in the API are:
|
|
59
|
+
</p>
|
|
60
|
+
<ul>
|
|
61
|
+
<li><strong>GET /feature:</strong> Request waveform features and analysis results.</li>
|
|
62
|
+
<li><strong>GET /featureEndpoint:</strong> Request meta information on available stations, features and results.</li>
|
|
63
|
+
</ul>
|
|
64
|
+
|
|
65
|
+
<h2>Code Examples</h2>
|
|
66
|
+
<h3>Requesting meta data</h3>
|
|
67
|
+
The following will return information on available stations, features and results in JSON format.
|
|
68
|
+
<pre>
|
|
69
|
+
curl -X GET "http://your.host.server:yourport/featureEndpoint"
|
|
70
|
+
</pre>
|
|
71
|
+
To do the same with Python using requests you can use the following code:
|
|
72
|
+
<pre>
|
|
73
|
+
import requests
|
|
74
|
+
url = "http://your.host.server:yourport/featureEndpoint"
|
|
75
|
+
response = requests.get(url)
|
|
76
|
+
response.json()
|
|
77
|
+
</pre>
|
|
78
|
+
<h3>Requesting data</h3>
|
|
79
|
+
The following example shows how to request RSAM data for station WIZ at volcano Whakaari
|
|
80
|
+
between 2019-12-01 and 2019-12-31. The return format is CSV.
|
|
81
|
+
<pre>
|
|
82
|
+
curl -X GET "http://your.host.server:yourport/feature?name=rsam&starttime=2019-12-01T00:00:00&endtime=2019-12-31T00:00:00&volcano=Whakaari&site=WIZ"
|
|
83
|
+
</pre>
|
|
84
|
+
To do the same with Python using pandas you can use the following code:
|
|
85
|
+
<pre>
|
|
86
|
+
import pandas as pd
|
|
87
|
+
feature="rsam"
|
|
88
|
+
starttime="2019-12-01T00:00:00"
|
|
89
|
+
endtime="2019-12-31T00:00:00"
|
|
90
|
+
volcano="Whakaari"
|
|
91
|
+
site="WIZ"
|
|
92
|
+
url = f"http://your.host.server:yourport/feature?name={feature}&starttime={starttime}&endtime={endtime}&volcano={volcano}&site={site}"
|
|
93
|
+
pd.read_csv(url, parse_dates=True, index_col=0)
|
|
94
|
+
</pre>
|
|
95
|
+
</section>
|
|
96
|
+
</section>
|
|
97
|
+
|
|
98
|
+
</body>
|
|
99
|
+
</html>
|
tonik/storage.py
ADDED
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
from datetime import datetime, timedelta
|
|
2
|
+
import glob
|
|
3
|
+
import logging
|
|
4
|
+
import logging.config
|
|
5
|
+
import os
|
|
6
|
+
import re
|
|
7
|
+
import tempfile
|
|
8
|
+
|
|
9
|
+
import pandas as pd
|
|
10
|
+
import xarray as xr
|
|
11
|
+
|
|
12
|
+
from .xarray2hdf5 import xarray2hdf5
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
ERROR_LOG_FILENAME = "tonik.log"
|
|
16
|
+
|
|
17
|
+
LOGGING_CONFIG = {
|
|
18
|
+
"version": 1,
|
|
19
|
+
"disable_existing_loggers": False,
|
|
20
|
+
"formatters": {
|
|
21
|
+
"default": { # The formatter name, it can be anything that I wish
|
|
22
|
+
"format": "%(asctime)s:%(name)s:%(process)d:%(lineno)d " "%(levelname)s %(message)s", # What to add in the message
|
|
23
|
+
"datefmt": "%Y-%m-%d %H:%M:%S", # How to display dates
|
|
24
|
+
},
|
|
25
|
+
"json": { # The formatter name
|
|
26
|
+
"()": "pythonjsonlogger.jsonlogger.JsonFormatter", # The class to instantiate!
|
|
27
|
+
# Json is more complex, but easier to read, display all attributes!
|
|
28
|
+
"format": """
|
|
29
|
+
asctime: %(asctime)s
|
|
30
|
+
created: %(created)f
|
|
31
|
+
filename: %(filename)s
|
|
32
|
+
funcName: %(funcName)s
|
|
33
|
+
levelname: %(levelname)s
|
|
34
|
+
levelno: %(levelno)s
|
|
35
|
+
lineno: %(lineno)d
|
|
36
|
+
message: %(message)s
|
|
37
|
+
module: %(module)s
|
|
38
|
+
msec: %(msecs)d
|
|
39
|
+
name: %(name)s
|
|
40
|
+
pathname: %(pathname)s
|
|
41
|
+
process: %(process)d
|
|
42
|
+
processName: %(processName)s
|
|
43
|
+
relativeCreated: %(relativeCreated)d
|
|
44
|
+
thread: %(thread)d
|
|
45
|
+
threadName: %(threadName)s
|
|
46
|
+
exc_info: %(exc_info)s
|
|
47
|
+
""",
|
|
48
|
+
"datefmt": "%Y-%m-%d %H:%M:%S", # How to display dates
|
|
49
|
+
},
|
|
50
|
+
},
|
|
51
|
+
"handlers": {
|
|
52
|
+
"logfile": { # The handler name
|
|
53
|
+
"formatter": "json", # Refer to the formatter defined above
|
|
54
|
+
"level": "ERROR", # FILTER: Only ERROR and CRITICAL logs
|
|
55
|
+
"class": "logging.handlers.RotatingFileHandler", # OUTPUT: Which class to use
|
|
56
|
+
"filename": ERROR_LOG_FILENAME, # Param for class above. Defines filename to use, load it from constant
|
|
57
|
+
"backupCount": 2, # Param for class above. Defines how many log files to keep as it grows
|
|
58
|
+
},
|
|
59
|
+
"simple": { # The handler name
|
|
60
|
+
"formatter": "default", # Refer to the formatter defined above
|
|
61
|
+
"class": "logging.StreamHandler", # OUTPUT: Same as above, stream to console
|
|
62
|
+
"stream": "ext://sys.stdout",
|
|
63
|
+
},
|
|
64
|
+
},
|
|
65
|
+
"loggers": {
|
|
66
|
+
"zizou": { # The name of the logger, this SHOULD match your module!
|
|
67
|
+
"level": "DEBUG", # FILTER: only INFO logs onwards from "tryceratops" logger
|
|
68
|
+
"handlers": [
|
|
69
|
+
"simple", # Refer the handler defined above
|
|
70
|
+
],
|
|
71
|
+
},
|
|
72
|
+
},
|
|
73
|
+
"root": {
|
|
74
|
+
"level": "ERROR", # FILTER: only INFO logs onwards
|
|
75
|
+
"handlers": [
|
|
76
|
+
"logfile", # Refer the handler defined above
|
|
77
|
+
]
|
|
78
|
+
},
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
logging.config.dictConfig(LOGGING_CONFIG)
|
|
82
|
+
logger = logging.getLogger("__name__")
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class Path(object):
|
|
86
|
+
def __init__(self, name, parentdir):
|
|
87
|
+
self.name = name
|
|
88
|
+
self.path = os.path.join(parentdir, name)
|
|
89
|
+
os.makedirs(self.path, exist_ok=True)
|
|
90
|
+
self.children = {}
|
|
91
|
+
|
|
92
|
+
def __str__(self):
|
|
93
|
+
return self.path
|
|
94
|
+
|
|
95
|
+
def __getitem__(self, key):
|
|
96
|
+
try:
|
|
97
|
+
return self.children[key]
|
|
98
|
+
except KeyError:
|
|
99
|
+
self.children[key] = Path(key, self.path)
|
|
100
|
+
return self.children[key]
|
|
101
|
+
|
|
102
|
+
def feature_path(self, feature):
|
|
103
|
+
_feature_path = os.path.join(self.path, feature + ".nc")
|
|
104
|
+
if not os.path.exists(_feature_path):
|
|
105
|
+
raise FileNotFoundError(f"File {_feature_path} not found")
|
|
106
|
+
self.children[feature] = _feature_path
|
|
107
|
+
return _feature_path
|
|
108
|
+
|
|
109
|
+
def __call__(self, feature, stack_length=None, interval='10min'):
|
|
110
|
+
"""
|
|
111
|
+
Request a particular feature
|
|
112
|
+
|
|
113
|
+
:param feature: Feature name
|
|
114
|
+
:type feature: str
|
|
115
|
+
:param stack_length: length of moving average in time
|
|
116
|
+
:type stack_length: str
|
|
117
|
+
|
|
118
|
+
"""
|
|
119
|
+
if self.endtime <= self.starttime:
|
|
120
|
+
raise ValueError('Startime has to be smaller than endtime.')
|
|
121
|
+
|
|
122
|
+
feature = feature.lower()
|
|
123
|
+
filename = self.feature_path(feature)
|
|
124
|
+
|
|
125
|
+
logger.debug(f"Reading feature {feature} between {self.starttime} and {self.endtime}")
|
|
126
|
+
num_periods = None
|
|
127
|
+
if stack_length is not None:
|
|
128
|
+
valid_stack_units = ['W', 'D', 'H', 'T', 'min', 'S']
|
|
129
|
+
if not re.match(r'\d*\s*(\w*)', stack_length).group(1)\
|
|
130
|
+
in valid_stack_units:
|
|
131
|
+
raise ValueError(
|
|
132
|
+
'Stack length should be one of: {}'.
|
|
133
|
+
format(', '.join(valid_stack_units))
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
if pd.to_timedelta(stack_length) < pd.to_timedelta(interval):
|
|
137
|
+
raise ValueError('Stack length {} is less than interval {}'.
|
|
138
|
+
format(stack_length, interval))
|
|
139
|
+
|
|
140
|
+
# Rewind starttime to account for stack length
|
|
141
|
+
self.starttime -= pd.to_timedelta(stack_length)
|
|
142
|
+
|
|
143
|
+
num_periods = (pd.to_timedelta(stack_length)/
|
|
144
|
+
pd.to_timedelta(interval))
|
|
145
|
+
if not num_periods.is_integer():
|
|
146
|
+
raise ValueError(
|
|
147
|
+
'Stack length {} / interval {} = {}, but it needs'
|
|
148
|
+
' to be a whole number'.
|
|
149
|
+
format(stack_length, interval, num_periods))
|
|
150
|
+
|
|
151
|
+
xd_index = dict(datetime=slice(self.starttime, self.endtime))
|
|
152
|
+
with xr.open_dataset(filename, group='original', engine='h5netcdf') as ds:
|
|
153
|
+
ds.sortby("datetime")
|
|
154
|
+
rq = ds.loc[xd_index].load()
|
|
155
|
+
|
|
156
|
+
# Stack features
|
|
157
|
+
if stack_length is not None:
|
|
158
|
+
logger.debug("Stacking feature...")
|
|
159
|
+
try:
|
|
160
|
+
xdf = rq[feature].rolling(datetime=int(num_periods),
|
|
161
|
+
center=False,
|
|
162
|
+
min_periods=1).mean()
|
|
163
|
+
# Return requested timeframe to that defined in initialisation
|
|
164
|
+
self.starttime += pd.to_timedelta(stack_length)
|
|
165
|
+
xdf_new = xdf.loc[
|
|
166
|
+
self.starttime:
|
|
167
|
+
self.endtime-pd.to_timedelta(interval)]
|
|
168
|
+
xdf_new = xdf_new.rename(feature)
|
|
169
|
+
except ValueError as e:
|
|
170
|
+
logger.error(e)
|
|
171
|
+
logger.error('Stack length {} is not valid for feature {}'.
|
|
172
|
+
format(stack_length, feature))
|
|
173
|
+
else:
|
|
174
|
+
return xdf_new
|
|
175
|
+
|
|
176
|
+
return rq[feature]
|
|
177
|
+
|
|
178
|
+
def load(self, *args, **kwargs):
|
|
179
|
+
"""
|
|
180
|
+
Load a feature from disk
|
|
181
|
+
"""
|
|
182
|
+
self.__call__(*args, **kwargs)
|
|
183
|
+
|
|
184
|
+
def save(self, data):
|
|
185
|
+
"""
|
|
186
|
+
Save a feature to disk
|
|
187
|
+
"""
|
|
188
|
+
xarray2hdf5(data, self.path)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
class StorageGroup(Path):
|
|
192
|
+
"""
|
|
193
|
+
Query computed features
|
|
194
|
+
|
|
195
|
+
:param rootdir: Path to parent directory.
|
|
196
|
+
:type rootdir: str
|
|
197
|
+
:param starttime: Begin of request
|
|
198
|
+
:type starttime: :class:`datetime.datetime`
|
|
199
|
+
:param endtime: Begin of request
|
|
200
|
+
:type endtime: :class:`datetime.datetime`
|
|
201
|
+
|
|
202
|
+
>>> import datetime
|
|
203
|
+
>>> g = Group('Whakaari')
|
|
204
|
+
>>> start = datetime.datetime(2012,1,1,0,0,0)
|
|
205
|
+
>>> end = datetime.datetime(2012,1,2,23,59,59)
|
|
206
|
+
>>> g.starttime = start
|
|
207
|
+
>>> g.endtime = end
|
|
208
|
+
>>> c = g.channel(site='WIZ', sensor='00', channel='HHZ')
|
|
209
|
+
>>> rsam = c("rsam")
|
|
210
|
+
"""
|
|
211
|
+
def __init__(self, name, rootdir=None, starttime=None, endtime=None):
|
|
212
|
+
self.stores = set()
|
|
213
|
+
self.starttime = starttime
|
|
214
|
+
self.endtime = endtime
|
|
215
|
+
super().__init__(name, rootdir)
|
|
216
|
+
|
|
217
|
+
def __repr__(self):
|
|
218
|
+
rstr = f"Group: {self.name}\n"
|
|
219
|
+
last_site = False
|
|
220
|
+
for j, site in enumerate(self.children.values()):
|
|
221
|
+
if j == len(self.children) - 1:
|
|
222
|
+
last_site = True
|
|
223
|
+
rstr += f"|__ {site.name}\n"
|
|
224
|
+
last_sensor = False
|
|
225
|
+
for i, sensor in enumerate(site.children.values()):
|
|
226
|
+
if i == len(site.children) - 1:
|
|
227
|
+
last_sensor = True
|
|
228
|
+
rstr += (" " if last_site else "|") + f" |__ {sensor.name}\n"
|
|
229
|
+
for k, channel in enumerate(sensor.children.values()):
|
|
230
|
+
rstr += (" " if last_site else "| ")
|
|
231
|
+
rstr += (" " if last_sensor else "| ")
|
|
232
|
+
rstr += f"|__ {channel.name}\n"
|
|
233
|
+
return rstr
|
|
234
|
+
|
|
235
|
+
def get_store(self, site, sensor, channel):
|
|
236
|
+
# return the store for a given site, sensor, or channel
|
|
237
|
+
# if one of them is None return the store for the level above
|
|
238
|
+
# if all are None return the root store
|
|
239
|
+
try:
|
|
240
|
+
st = self[site][sensor][channel]
|
|
241
|
+
except:
|
|
242
|
+
try:
|
|
243
|
+
st = self[site][sensor]
|
|
244
|
+
except:
|
|
245
|
+
try:
|
|
246
|
+
st = self[site]
|
|
247
|
+
except:
|
|
248
|
+
return self
|
|
249
|
+
|
|
250
|
+
st.starttime = self.starttime
|
|
251
|
+
st.endtime = self.endtime
|
|
252
|
+
self.stores.add(st)
|
|
253
|
+
return st
|
|
254
|
+
|
|
255
|
+
def from_directory(self):
|
|
256
|
+
feature_files = glob.glob(os.path.join(self.path, '**', '*.nc'),
|
|
257
|
+
recursive=True)
|
|
258
|
+
for _f in feature_files:
|
|
259
|
+
subdir = _f.split(self.path)[1].strip(os.sep)
|
|
260
|
+
# split the path into parts
|
|
261
|
+
# get the subdirectories
|
|
262
|
+
site, sensor, channel, ffile = subdir.split(os.sep)
|
|
263
|
+
fname = ffile.strip('.nc')
|
|
264
|
+
c = self.get_store(site, sensor, channel)
|
|
265
|
+
|
|
266
|
+
def get_starttime(self):
|
|
267
|
+
return self.__starttime
|
|
268
|
+
|
|
269
|
+
def set_starttime(self, time):
|
|
270
|
+
if time is None:
|
|
271
|
+
self.__starttime = None
|
|
272
|
+
self.__sdate = None
|
|
273
|
+
return
|
|
274
|
+
self.__starttime = time
|
|
275
|
+
self.__sdate = '{}{:02d}{:02d}'.format(time.year,
|
|
276
|
+
time.month,
|
|
277
|
+
time.day)
|
|
278
|
+
for s in self.stores:
|
|
279
|
+
s.starttime = time
|
|
280
|
+
|
|
281
|
+
def get_endtime(self):
|
|
282
|
+
return self.__endtime
|
|
283
|
+
|
|
284
|
+
def set_endtime(self, time):
|
|
285
|
+
if time is None:
|
|
286
|
+
self.__endtime = None
|
|
287
|
+
self.__edate = None
|
|
288
|
+
return
|
|
289
|
+
self.__endtime = time
|
|
290
|
+
self.__edate = '{}{:02d}{:02d}'.format(time.year,
|
|
291
|
+
time.month,
|
|
292
|
+
time.day)
|
|
293
|
+
for s in self.stores:
|
|
294
|
+
s.endtime = time
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
starttime = property(get_starttime, set_starttime)
|
|
298
|
+
endtime = property(get_endtime, set_endtime)
|
|
299
|
+
|
tonik/utils.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pandas as pd
|
|
5
|
+
import xarray as xr
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def generate_test_data(dim=1, ndays=30, nfreqs=10,
|
|
9
|
+
tstart=datetime.utcnow(),
|
|
10
|
+
feature_name=None,
|
|
11
|
+
freq_name=None):
|
|
12
|
+
"""
|
|
13
|
+
Generate a 1D or 2D feature for testing.
|
|
14
|
+
"""
|
|
15
|
+
assert dim < 3
|
|
16
|
+
assert dim > 0
|
|
17
|
+
|
|
18
|
+
nints = ndays * 6 * 24
|
|
19
|
+
dates = pd.date_range(tstart.strftime('%Y-%m-%d'), freq='10min', periods=nints)
|
|
20
|
+
rs = np.random.default_rng(42)
|
|
21
|
+
# Random walk as test signal
|
|
22
|
+
data = np.abs(np.cumsum(rs.normal(0, 8., len(dates))))
|
|
23
|
+
if dim == 2:
|
|
24
|
+
data = np.tile(data, (nfreqs, 1))
|
|
25
|
+
# Add 10% NaNs
|
|
26
|
+
idx_nan = rs.integers(0, nints-1, int(0.1*nints))
|
|
27
|
+
if dim == 1:
|
|
28
|
+
data[idx_nan] = np.nan
|
|
29
|
+
if feature_name is None:
|
|
30
|
+
feature_name = 'rsam'
|
|
31
|
+
xrd = xr.Dataset({feature_name: xr.DataArray(data, coords=[dates], dims=['datetime'])})
|
|
32
|
+
if dim == 2:
|
|
33
|
+
data[:, idx_nan] = np.nan
|
|
34
|
+
freqs = np.arange(nfreqs)
|
|
35
|
+
if feature_name is None:
|
|
36
|
+
feature_name = 'ssam'
|
|
37
|
+
if freq_name is None:
|
|
38
|
+
freq_name = 'frequency'
|
|
39
|
+
xrd = xr.Dataset({feature_name: xr.DataArray(data, coords=[freqs, dates], dims=[freq_name, 'datetime'])})
|
|
40
|
+
xrd.attrs['starttime'] = dates[0].isoformat()
|
|
41
|
+
xrd.attrs['endtime'] = dates[-1].isoformat()
|
|
42
|
+
xrd.attrs['station'] = 'MDR'
|
|
43
|
+
return xrd
|
tonik/xarray2hdf5.py
CHANGED
|
@@ -3,10 +3,9 @@ import logging
|
|
|
3
3
|
import os
|
|
4
4
|
from warnings import filterwarnings
|
|
5
5
|
|
|
6
|
-
from cftime import num2date, date2num
|
|
6
|
+
from cftime import num2date, date2num
|
|
7
7
|
import h5netcdf
|
|
8
8
|
import numpy as np
|
|
9
|
-
import xarray as xr
|
|
10
9
|
|
|
11
10
|
|
|
12
11
|
def xarray2hdf5(xArray, fdir, rootGroupName="original", timedim="datetime"):
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
2
|
Name: tonik
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.3
|
|
4
4
|
Summary: A collection of tools to integrate with GNS Science's time series classification platform.
|
|
5
|
-
Project-URL: Homepage, https://
|
|
5
|
+
Project-URL: Homepage, https://tsc-tools.github.io/tonik.github.io
|
|
6
6
|
Project-URL: Issues, https://github.com/tsc-tools/tonik/issues
|
|
7
7
|
Author-email: Yannik Behr <y.behr@gns.cri.nz>, Christof Mueller <c.mueller@gns.cri.nz>
|
|
8
8
|
License-File: LICENSE
|
|
@@ -10,12 +10,17 @@ Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
|
|
|
10
10
|
Classifier: Operating System :: OS Independent
|
|
11
11
|
Classifier: Programming Language :: Python :: 3
|
|
12
12
|
Requires-Python: >=3.7
|
|
13
|
-
Requires-Dist:
|
|
14
|
-
Requires-Dist:
|
|
15
|
-
Requires-Dist:
|
|
16
|
-
Requires-Dist:
|
|
17
|
-
Requires-Dist:
|
|
18
|
-
Requires-Dist:
|
|
13
|
+
Requires-Dist: datashader>=0.14
|
|
14
|
+
Requires-Dist: fastapi>=0.95
|
|
15
|
+
Requires-Dist: h5netcdf>=1.1
|
|
16
|
+
Requires-Dist: h5py>=3.8
|
|
17
|
+
Requires-Dist: netcdf4>=1.6
|
|
18
|
+
Requires-Dist: pandas>=2.0
|
|
19
|
+
Requires-Dist: python-json-logger>=2.0
|
|
20
|
+
Requires-Dist: uvicorn[standard]>=0.22
|
|
21
|
+
Requires-Dist: xarray>=2023.4
|
|
22
|
+
Provides-Extra: dev
|
|
23
|
+
Requires-Dist: pytest; extra == 'dev'
|
|
19
24
|
Description-Content-Type: text/markdown
|
|
20
25
|
|
|
21
26
|
# Time series classification tools
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
tonik/__init__.py,sha256=p97Bbz-yujI-uNmbqn1S61lq-zfF1VPaS5c1fxs1Fa8,516
|
|
2
|
+
tonik/api.py,sha256=zTZZbpPsetoM5EH8ao8RlwR39lU1jEWpxwATC4S8Qq0,11012
|
|
3
|
+
tonik/storage.py,sha256=F1NnRIZLq7uB7TTK1-5VHYix6_e_fl2J1FmiCFYqIJ0,10719
|
|
4
|
+
tonik/utils.py,sha256=jEjvUNcU9nUhQZTgu8iwfPmF4U2odyOT8EJu0v1DaA0,1433
|
|
5
|
+
tonik/xarray2hdf5.py,sha256=biQ3KVt0QrxJhOWm38FwglzYkLhPtO13G1B1vVF2c6o,4090
|
|
6
|
+
tonik/package_data/index.html,sha256=IaPMU-A_FswjMAKA6PrHp9VFTuKJ6o_PGyjg_CTutrY,3082
|
|
7
|
+
tonik-0.0.3.dist-info/METADATA,sha256=9MLYvdxjaYM0okuTgI2JhOFwiEV_Gir-7N6PnI2VTeo,1085
|
|
8
|
+
tonik-0.0.3.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
9
|
+
tonik-0.0.3.dist-info/entry_points.txt,sha256=VnGfC5qAzpntEHAb5pooUEpYABSgOfQoNhCEtLDJyf8,45
|
|
10
|
+
tonik-0.0.3.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
11
|
+
tonik-0.0.3.dist-info/RECORD,,
|
tonik-0.0.1.dist-info/RECORD
DELETED
|
@@ -1,6 +0,0 @@
|
|
|
1
|
-
tonik/__init__.py,sha256=d7gnshn92xGI-U7YTs7Q5cMWjvLW45m-EXJ5IcYaZrs,36
|
|
2
|
-
tonik/xarray2hdf5.py,sha256=sImRJ80EQ3yI_7xJg34VfS8SSIzkDtRHda3Mg959xPs,4122
|
|
3
|
-
tonik-0.0.1.dist-info/METADATA,sha256=KS91xVocqwwtwLKprecpfp0JU15s-1xERkMqKBSpPS4,873
|
|
4
|
-
tonik-0.0.1.dist-info/WHEEL,sha256=KGYbc1zXlYddvwxnNty23BeaKzh7YuoSIvIMO4jEhvw,87
|
|
5
|
-
tonik-0.0.1.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
6
|
-
tonik-0.0.1.dist-info/RECORD,,
|
|
File without changes
|