tonik 0.0.5__py3-none-any.whl → 0.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tonik/api.py +34 -166
- tonik/package_data/index.html +11 -17
- tonik/storage.py +61 -43
- tonik/utils.py +6 -3
- tonik/xarray2hdf5.py +40 -31
- {tonik-0.0.5.dist-info → tonik-0.0.6.dist-info}/METADATA +2 -2
- tonik-0.0.6.dist-info/RECORD +11 -0
- tonik-0.0.5.dist-info/RECORD +0 -11
- {tonik-0.0.5.dist-info → tonik-0.0.6.dist-info}/WHEEL +0 -0
- {tonik-0.0.5.dist-info → tonik-0.0.6.dist-info}/entry_points.txt +0 -0
- {tonik-0.0.5.dist-info → tonik-0.0.6.dist-info}/licenses/LICENSE +0 -0
tonik/api.py
CHANGED
|
@@ -1,18 +1,19 @@
|
|
|
1
1
|
from argparse import ArgumentParser
|
|
2
|
-
from datetime import timedelta, datetime
|
|
2
|
+
from datetime import timedelta, datetime, timezone
|
|
3
3
|
import logging
|
|
4
4
|
import os
|
|
5
|
+
from urllib.parse import unquote
|
|
5
6
|
|
|
6
7
|
from cftime import num2date, date2num
|
|
7
8
|
import datashader as dsh
|
|
8
9
|
import numpy as np
|
|
9
10
|
import pandas as pd
|
|
10
11
|
import uvicorn
|
|
11
|
-
from fastapi import FastAPI, HTTPException
|
|
12
|
+
from fastapi import FastAPI, HTTPException, Query
|
|
12
13
|
from fastapi.middleware.cors import CORSMiddleware
|
|
13
14
|
from fastapi.responses import HTMLResponse, StreamingResponse
|
|
14
15
|
from pydantic import BaseModel
|
|
15
|
-
from typing import
|
|
16
|
+
from typing import Annotated
|
|
16
17
|
|
|
17
18
|
from .storage import StorageGroup
|
|
18
19
|
from . import get_data
|
|
@@ -24,7 +25,7 @@ class TonikAPI:
|
|
|
24
25
|
|
|
25
26
|
def __init__(self, rootdir) -> None:
|
|
26
27
|
self.rootdir = rootdir
|
|
27
|
-
self.app = FastAPI()
|
|
28
|
+
self.app = FastAPI()
|
|
28
29
|
|
|
29
30
|
# -- allow any origin to query API
|
|
30
31
|
self.app.add_middleware(CORSMiddleware,
|
|
@@ -32,37 +33,46 @@ class TonikAPI:
|
|
|
32
33
|
|
|
33
34
|
self.app.get("/", response_class=HTMLResponse)(self.root)
|
|
34
35
|
self.app.get("/feature")(self.feature)
|
|
36
|
+
self.app.get("/inventory")(self.inventory)
|
|
35
37
|
|
|
36
|
-
|
|
38
|
+
def root(self):
|
|
37
39
|
with open(get_data("package_data/index.html"), "r", encoding="utf-8") as file:
|
|
38
40
|
html_content = file.read()
|
|
39
41
|
return HTMLResponse(content=html_content, status_code=200)
|
|
40
42
|
|
|
43
|
+
def preprocess_datetime(self, dt):
|
|
44
|
+
"""
|
|
45
|
+
Convert datetime string to datetime object.
|
|
46
|
+
"""
|
|
47
|
+
# remove timezone info
|
|
48
|
+
dt = dt.split('+')[0]
|
|
49
|
+
# remove 'Z' at the end
|
|
50
|
+
dt = dt.replace('Z', '')
|
|
51
|
+
# convert html encoded characters
|
|
52
|
+
dt = unquote(dt)
|
|
53
|
+
dt = datetime.fromisoformat(dt)
|
|
54
|
+
dt = dt.replace(tzinfo=None)
|
|
55
|
+
return dt
|
|
41
56
|
|
|
42
57
|
def feature(self,
|
|
43
|
-
name: str='rsam',
|
|
44
58
|
group: str='Ruapehu',
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
starttime: datetime=datetime.utcnow()-timedelta(days=30),
|
|
49
|
-
endtime: datetime=datetime.utcnow(),
|
|
59
|
+
name: str='rsam',
|
|
60
|
+
starttime: str=None,
|
|
61
|
+
endtime: str=None,
|
|
50
62
|
resolution: str='full',
|
|
51
63
|
verticalres: int=10,
|
|
52
|
-
log: bool=
|
|
53
|
-
normalise: bool=False
|
|
54
|
-
|
|
55
|
-
_st =
|
|
56
|
-
|
|
57
|
-
_et = datetime.fromisoformat(str(endtime))
|
|
58
|
-
_et = _et.replace(tzinfo=None)
|
|
64
|
+
log: bool=False,
|
|
65
|
+
normalise: bool=False,
|
|
66
|
+
subdir: Annotated[list[str] | None, Query()]=None):
|
|
67
|
+
_st = self.preprocess_datetime(starttime)
|
|
68
|
+
_et = self.preprocess_datetime(endtime)
|
|
59
69
|
g = StorageGroup(group, rootdir=self.rootdir,
|
|
60
70
|
starttime=_st, endtime=_et)
|
|
61
|
-
c = g.get_store(
|
|
71
|
+
c = g.get_store(*subdir)
|
|
62
72
|
try:
|
|
63
73
|
feat = c(name)
|
|
64
74
|
except ValueError as e:
|
|
65
|
-
msg = f"Feature {name} not found in directory {
|
|
75
|
+
msg = f"Feature {name} not found in directory {c.path}:"
|
|
66
76
|
msg += f"{e}"
|
|
67
77
|
raise HTTPException(status_code=404, detail=msg)
|
|
68
78
|
if len(feat.shape) > 1:
|
|
@@ -117,153 +127,11 @@ class TonikAPI:
|
|
|
117
127
|
dates = num2date(d, units='hours since 1970-01-01 00:00:00.0', calendar='gregorian')
|
|
118
128
|
return freq,dates,spec
|
|
119
129
|
|
|
130
|
+
def inventory(self, group: str) -> dict:
|
|
131
|
+
sg = StorageGroup(group, rootdir=self.rootdir)
|
|
132
|
+
return sg.to_dict()
|
|
120
133
|
|
|
121
|
-
#
|
|
122
|
-
# class Feature(BaseModel):
|
|
123
|
-
# name: list
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
# class Channel(BaseModel):
|
|
127
|
-
# name: str
|
|
128
|
-
# features: List[Feature] = []
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
# class Location(BaseModel):
|
|
132
|
-
# name: str
|
|
133
|
-
# channels: List[Channel] = []
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
# class Station(BaseModel):
|
|
137
|
-
# name: str
|
|
138
|
-
# lat: float
|
|
139
|
-
# lon: float
|
|
140
|
-
# locations: List[Location] = []
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
# class Group(BaseModel):
|
|
144
|
-
# volcano: str
|
|
145
|
-
# stations: List[Station] = []
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
# def get_pydanticModel(group, station, location, channel, feature_list):
|
|
149
|
-
|
|
150
|
-
# channels_data = {"name": channel, "features": feature_list}
|
|
151
|
-
# channel_models = []
|
|
152
|
-
# channel_model = Channel(**channels_data)
|
|
153
|
-
# channel_models.append(channel_model)
|
|
154
|
-
|
|
155
|
-
# location_data = {"name": location, "channels": channel_models}
|
|
156
|
-
# location_models = []
|
|
157
|
-
# location_model = Location(**location_data)
|
|
158
|
-
# location_models.append(location_model)
|
|
159
|
-
|
|
160
|
-
# stations_data = {"name": station, "lat": "42", "lon": "171",
|
|
161
|
-
# "locations": location_models}
|
|
162
|
-
# station_models = []
|
|
163
|
-
# station_model = Station(**stations_data)
|
|
164
|
-
# station_models.append(station_model)
|
|
165
|
-
|
|
166
|
-
# group_model = Group(group=group, stations=station_models)
|
|
167
|
-
|
|
168
|
-
# # Exporting to JSON
|
|
169
|
-
# json_data = group_model.json()
|
|
170
|
-
# return json_data
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
# write a function that scans LOCKERROOMROOT for
|
|
174
|
-
# available groups, stations, locations, channels, and features
|
|
175
|
-
# and returns a pydantic model
|
|
176
|
-
# def get_available_features():
|
|
177
|
-
# groups = os.listdir(ROOT)
|
|
178
|
-
# group_models = []
|
|
179
|
-
# for group in groups:
|
|
180
|
-
# stations = os.listdir(os.path.join(LOCKERROOMROOT, group))
|
|
181
|
-
# station_models = []
|
|
182
|
-
# for station in stations:
|
|
183
|
-
# locations = os.listdir(os.path.join(LOCKERROOMROOT, group, station))
|
|
184
|
-
# location_models = []
|
|
185
|
-
# for location in locations:
|
|
186
|
-
# channels = os.listdir(os.path.join(LOCKERROOMROOT, group, station, location))
|
|
187
|
-
# channel_models = []
|
|
188
|
-
# for channel in channels:
|
|
189
|
-
# features = os.listdir(os.path.join(LOCKERROOMROOT, group, station, location, channel))
|
|
190
|
-
# feature_list = []
|
|
191
|
-
# for feature in features:
|
|
192
|
-
# feature_list.append(feature)
|
|
193
|
-
# channel_data = {"name": channel, "features": feature_list}
|
|
194
|
-
# channel_model = Channel(**channel_data)
|
|
195
|
-
# channel_models.append(channel_model)
|
|
196
|
-
# location_data = {"name": location, "channels": channel_models}
|
|
197
|
-
# location_model = Location(**location_data)
|
|
198
|
-
# location_models.append(location_model)
|
|
199
|
-
# station_data = {"name": station, "lat": "42", "lon": "171", "locations": location_models}
|
|
200
|
-
# station_model = Station(**station_data)
|
|
201
|
-
# station_models.append(station_model)
|
|
202
|
-
# group_data = {"volcano": group, "stations": station_models}
|
|
203
|
-
# group_model = Group(**group_data)
|
|
204
|
-
# group_models.append(group_model)
|
|
205
|
-
# return group_models
|
|
206
|
-
|
|
207
|
-
# @app.get("/featureEndpoint")
|
|
208
|
-
# def featureEndpoint(group: str="all", station: str="all", channel: str="all",
|
|
209
|
-
# type: str="all"):
|
|
210
|
-
# groups = vm.get_available_volcanoes()
|
|
211
|
-
|
|
212
|
-
# station_model_list = []
|
|
213
|
-
# channel_model_list = []
|
|
214
|
-
# volcano_model_list = []
|
|
215
|
-
# for _volcano in volcanoes:
|
|
216
|
-
# streams = vm.get_available_streams(_volcano)
|
|
217
|
-
# for _stream in streams:
|
|
218
|
-
# _, _station, _, _channel = _stream.split('.')
|
|
219
|
-
# stream_dir = os.path.join(FEATUREDIR, _volcano, _station, _channel)
|
|
220
|
-
# try:
|
|
221
|
-
# feature_list = os.listdir(stream_dir)
|
|
222
|
-
# except (NotADirectoryError, FileNotFoundError):
|
|
223
|
-
# continue
|
|
224
|
-
# feature_list = sorted([str(os.path.basename(path)).split('.nc')[0] for path in feature_list])
|
|
225
|
-
# channels_data = {"name": _channel, "features":feature_list}
|
|
226
|
-
# channel_model = Channel(**channels_data)
|
|
227
|
-
# channel_model_list.append(channel_model)
|
|
228
|
-
# try:
|
|
229
|
-
# site_info = vm.get_site_information(_station)
|
|
230
|
-
# lat = site_info['latitude']
|
|
231
|
-
# lon = site_info['longitude']
|
|
232
|
-
# except:
|
|
233
|
-
# lat, lon = -999.9, -999.9
|
|
234
|
-
# stations_data = {"name": _station, "lat": lat, "lon": lon, "channels":channel_model_list}
|
|
235
|
-
# station_model = Station(**stations_data)
|
|
236
|
-
# station_model_list.append(station_model)
|
|
237
|
-
|
|
238
|
-
# volcano_model = Volcano(volcano=_volcano, stations=station_model_list)
|
|
239
|
-
# volcano_model_list.append(volcano_model)
|
|
240
|
-
|
|
241
|
-
# if len(volcano_model_list) == 0:
|
|
242
|
-
# return('no volcano')
|
|
243
|
-
|
|
244
|
-
# scenario_model = Scenario(scenario='VUMT', volcanoes=volcano_model_list)
|
|
245
|
-
# if volcano != "all":
|
|
246
|
-
# # return all stations for a volcano
|
|
247
|
-
# for _volcano in scenario_model.volcanoes:
|
|
248
|
-
# if _volcano.volcano == volcano:
|
|
249
|
-
# if station == "all":
|
|
250
|
-
# return _volcano
|
|
251
|
-
# for _station in _volcano.stations:
|
|
252
|
-
# if _station.name == station:
|
|
253
|
-
# if channel == "all":
|
|
254
|
-
# return _station
|
|
255
|
-
# for _channel in _station.channels:
|
|
256
|
-
# if _channel.name == channel:
|
|
257
|
-
# feature_list_filtered = []
|
|
258
|
-
# for _f in _channel.features:
|
|
259
|
-
# if _f in FeatureRequest.feat_dict[type]:
|
|
260
|
-
# feature_list_filtered.append(_f)
|
|
261
|
-
# _channel.features = feature_list_filtered
|
|
262
|
-
# return _channel
|
|
263
|
-
|
|
264
|
-
# return scenario_model
|
|
265
|
-
|
|
266
|
-
|
|
134
|
+
# ta = TonikAPI('/tmp').feature()
|
|
267
135
|
|
|
268
136
|
def main(argv=None):
|
|
269
137
|
parser = ArgumentParser()
|
tonik/package_data/index.html
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
<head>
|
|
4
4
|
<meta charset="UTF-8">
|
|
5
5
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
6
|
-
<title>
|
|
6
|
+
<title>Tonik API Documentation</title>
|
|
7
7
|
<style>
|
|
8
8
|
body {
|
|
9
9
|
font-family: Arial, sans-serif;
|
|
@@ -37,41 +37,35 @@
|
|
|
37
37
|
<body>
|
|
38
38
|
|
|
39
39
|
<header>
|
|
40
|
-
<h1>
|
|
41
|
-
<p>
|
|
40
|
+
<h1>Tonik</h1>
|
|
41
|
+
<p>Tonik - API</p>
|
|
42
42
|
</header>
|
|
43
43
|
|
|
44
44
|
<section>
|
|
45
45
|
<h2>Overview</h2>
|
|
46
46
|
<p>
|
|
47
|
-
Welcome to the documentation for
|
|
48
|
-
analysis results computed by SAM.
|
|
47
|
+
Welcome to the documentation for the Tonik API. This API provides access to time-series stored with the tonik package.
|
|
49
48
|
</p>
|
|
50
49
|
|
|
51
|
-
<!--h2>Authentication</h2>
|
|
52
|
-
<p>
|
|
53
|
-
To access the API, you need to authenticate using [authentication method]. Obtain your API key from [location].
|
|
54
|
-
</p-->
|
|
55
|
-
|
|
56
50
|
<h2>Endpoints</h2>
|
|
57
51
|
<p>
|
|
58
52
|
The endpoints available in the API are:
|
|
59
53
|
</p>
|
|
60
54
|
<ul>
|
|
61
|
-
<li><strong>GET /feature:</strong> Request
|
|
62
|
-
<li><strong>GET /
|
|
55
|
+
<li><strong>GET /feature:</strong> Request time-series data.</li>
|
|
56
|
+
<li><strong>GET /inventory:</strong> Request information on data.</li>
|
|
63
57
|
</ul>
|
|
64
58
|
|
|
65
59
|
<h2>Code Examples</h2>
|
|
66
60
|
<h3>Requesting meta data</h3>
|
|
67
|
-
The following will return information on available
|
|
61
|
+
The following will return information on available datasets in JSON format.
|
|
68
62
|
<pre>
|
|
69
|
-
curl -X GET "http://your.host.server:yourport/
|
|
63
|
+
curl -X GET "http://your.host.server:yourport/inventory"
|
|
70
64
|
</pre>
|
|
71
65
|
To do the same with Python using requests you can use the following code:
|
|
72
66
|
<pre>
|
|
73
67
|
import requests
|
|
74
|
-
url = "http://your.host.server:yourport/
|
|
68
|
+
url = "http://your.host.server:yourport/inventory"
|
|
75
69
|
response = requests.get(url)
|
|
76
70
|
response.json()
|
|
77
71
|
</pre>
|
|
@@ -79,7 +73,7 @@
|
|
|
79
73
|
The following example shows how to request RSAM data for station WIZ at volcano Whakaari
|
|
80
74
|
between 2019-12-01 and 2019-12-31. The return format is CSV.
|
|
81
75
|
<pre>
|
|
82
|
-
curl -X GET "http://your.host.server:yourport/feature?name=rsam&starttime=2019-12-01T00:00:00&endtime=2019-12-31T00:00:00&
|
|
76
|
+
curl -X GET "http://your.host.server:yourport/feature?name=rsam&starttime=2019-12-01T00:00:00&endtime=2019-12-31T00:00:00&subdir=Whakaari&subdir=WIZ"
|
|
83
77
|
</pre>
|
|
84
78
|
To do the same with Python using pandas you can use the following code:
|
|
85
79
|
<pre>
|
|
@@ -89,7 +83,7 @@
|
|
|
89
83
|
endtime="2019-12-31T00:00:00"
|
|
90
84
|
volcano="Whakaari"
|
|
91
85
|
site="WIZ"
|
|
92
|
-
url = f"http://your.host.server:yourport/feature?name={feature}&starttime={starttime}&endtime={endtime}&
|
|
86
|
+
url = f"http://your.host.server:yourport/feature?name={feature}&starttime={starttime}&endtime={endtime}&subdir={volcano}&subdir={site}"
|
|
93
87
|
pd.read_csv(url, parse_dates=True, index_col=0)
|
|
94
88
|
</pre>
|
|
95
89
|
</section>
|
tonik/storage.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from datetime import datetime, timedelta
|
|
2
|
+
import json
|
|
2
3
|
import glob
|
|
3
4
|
import logging
|
|
4
5
|
import logging.config
|
|
@@ -86,13 +87,18 @@ class Path(object):
|
|
|
86
87
|
def __init__(self, name, parentdir):
|
|
87
88
|
self.name = name
|
|
88
89
|
self.path = os.path.join(parentdir, name)
|
|
89
|
-
|
|
90
|
+
try:
|
|
91
|
+
os.makedirs(self.path, exist_ok=True)
|
|
92
|
+
except FileExistsError:
|
|
93
|
+
pass
|
|
90
94
|
self.children = {}
|
|
91
95
|
|
|
92
96
|
def __str__(self):
|
|
93
97
|
return self.path
|
|
94
98
|
|
|
95
99
|
def __getitem__(self, key):
|
|
100
|
+
if key is None:
|
|
101
|
+
raise ValueError("Key cannot be None")
|
|
96
102
|
try:
|
|
97
103
|
return self.children[key]
|
|
98
104
|
except KeyError:
|
|
@@ -103,7 +109,7 @@ class Path(object):
|
|
|
103
109
|
_feature_path = os.path.join(self.path, feature + ".nc")
|
|
104
110
|
if not os.path.exists(_feature_path):
|
|
105
111
|
raise FileNotFoundError(f"File {_feature_path} not found")
|
|
106
|
-
self.children[feature] =
|
|
112
|
+
self.children[feature] = Path(feature + ".nc", self.path)
|
|
107
113
|
return _feature_path
|
|
108
114
|
|
|
109
115
|
def __call__(self, feature, stack_length=None, interval='10min'):
|
|
@@ -125,7 +131,7 @@ class Path(object):
|
|
|
125
131
|
logger.debug(f"Reading feature {feature} between {self.starttime} and {self.endtime}")
|
|
126
132
|
num_periods = None
|
|
127
133
|
if stack_length is not None:
|
|
128
|
-
valid_stack_units = ['W', 'D', '
|
|
134
|
+
valid_stack_units = ['W', 'D', 'h', 'T', 'min', 'S']
|
|
129
135
|
if not re.match(r'\d*\s*(\w*)', stack_length).group(1)\
|
|
130
136
|
in valid_stack_units:
|
|
131
137
|
raise ValueError(
|
|
@@ -162,9 +168,7 @@ class Path(object):
|
|
|
162
168
|
min_periods=1).mean()
|
|
163
169
|
# Return requested timeframe to that defined in initialisation
|
|
164
170
|
self.starttime += pd.to_timedelta(stack_length)
|
|
165
|
-
xdf_new = xdf.loc[
|
|
166
|
-
self.starttime:
|
|
167
|
-
self.endtime-pd.to_timedelta(interval)]
|
|
171
|
+
xdf_new = xdf.loc[self.starttime:self.endtime]
|
|
168
172
|
xdf_new = xdf_new.rename(feature)
|
|
169
173
|
except ValueError as e:
|
|
170
174
|
logger.error(e)
|
|
@@ -181,11 +185,11 @@ class Path(object):
|
|
|
181
185
|
"""
|
|
182
186
|
self.__call__(*args, **kwargs)
|
|
183
187
|
|
|
184
|
-
def save(self, data):
|
|
188
|
+
def save(self, data, **kwargs):
|
|
185
189
|
"""
|
|
186
190
|
Save a feature to disk
|
|
187
191
|
"""
|
|
188
|
-
xarray2hdf5(data, self.path)
|
|
192
|
+
xarray2hdf5(data, self.path, **kwargs)
|
|
189
193
|
|
|
190
194
|
|
|
191
195
|
class StorageGroup(Path):
|
|
@@ -213,39 +217,28 @@ class StorageGroup(Path):
|
|
|
213
217
|
self.starttime = starttime
|
|
214
218
|
self.endtime = endtime
|
|
215
219
|
super().__init__(name, rootdir)
|
|
220
|
+
|
|
221
|
+
def print_tree(self, site, indent=0, output=''):
|
|
222
|
+
output += ' ' * indent + site.path + '\n'
|
|
223
|
+
for site in site.children.values():
|
|
224
|
+
output += self.print_tree(site, indent + 2)
|
|
225
|
+
return output
|
|
216
226
|
|
|
217
227
|
def __repr__(self):
|
|
218
228
|
rstr = f"Group: {self.name}\n"
|
|
219
|
-
|
|
220
|
-
for j, site in enumerate(self.children.values()):
|
|
221
|
-
if j == len(self.children) - 1:
|
|
222
|
-
last_site = True
|
|
223
|
-
rstr += f"|__ {site.name}\n"
|
|
224
|
-
last_sensor = False
|
|
225
|
-
for i, sensor in enumerate(site.children.values()):
|
|
226
|
-
if i == len(site.children) - 1:
|
|
227
|
-
last_sensor = True
|
|
228
|
-
rstr += (" " if last_site else "|") + f" |__ {sensor.name}\n"
|
|
229
|
-
for k, channel in enumerate(sensor.children.values()):
|
|
230
|
-
rstr += (" " if last_site else "| ")
|
|
231
|
-
rstr += (" " if last_sensor else "| ")
|
|
232
|
-
rstr += f"|__ {channel.name}\n"
|
|
229
|
+
rstr = self.print_tree(self, 0, rstr)
|
|
233
230
|
return rstr
|
|
234
231
|
|
|
235
|
-
def get_store(self,
|
|
232
|
+
def get_store(self, *args):
|
|
236
233
|
# return the store for a given site, sensor, or channel
|
|
237
234
|
# if one of them is None return the store for the level above
|
|
238
235
|
# if all are None return the root store
|
|
239
236
|
try:
|
|
240
|
-
st = self
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
try:
|
|
246
|
-
st = self[site]
|
|
247
|
-
except:
|
|
248
|
-
return self
|
|
237
|
+
st = self
|
|
238
|
+
for arg in args:
|
|
239
|
+
st = st[arg]
|
|
240
|
+
except KeyError:
|
|
241
|
+
return self
|
|
249
242
|
|
|
250
243
|
st.starttime = self.starttime
|
|
251
244
|
st.endtime = self.endtime
|
|
@@ -253,15 +246,38 @@ class StorageGroup(Path):
|
|
|
253
246
|
return st
|
|
254
247
|
|
|
255
248
|
def from_directory(self):
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
249
|
+
"""
|
|
250
|
+
Construct the storage group from the root directory
|
|
251
|
+
"""
|
|
252
|
+
for root, dirs, files in os.walk(self.path):
|
|
253
|
+
if files:
|
|
254
|
+
try:
|
|
255
|
+
subdirs = root.split(self.path)[1].split(os.sep)[1:]
|
|
256
|
+
except IndexError:
|
|
257
|
+
st = self.get_store()
|
|
258
|
+
else:
|
|
259
|
+
try:
|
|
260
|
+
st = self.get_store(*subdirs)
|
|
261
|
+
except TypeError as e:
|
|
262
|
+
raise e
|
|
263
|
+
for _f in files:
|
|
264
|
+
if _f.endswith('.nc'):
|
|
265
|
+
st.feature_path(_f.replace('.nc', ''))
|
|
266
|
+
|
|
267
|
+
@staticmethod
|
|
268
|
+
def directory_tree_to_dict(path):
|
|
269
|
+
name = os.path.basename(path)
|
|
270
|
+
if os.path.isdir(path):
|
|
271
|
+
return {name: [StorageGroup.directory_tree_to_dict(os.path.join(path, child)) for child in sorted(os.listdir(path))]}
|
|
272
|
+
else:
|
|
273
|
+
if path.endswith('.nc'):
|
|
274
|
+
return name.replace('.nc', '')
|
|
275
|
+
|
|
276
|
+
def to_dict(self):
|
|
277
|
+
"""
|
|
278
|
+
Convert the storage group to json
|
|
279
|
+
"""
|
|
280
|
+
return StorageGroup.directory_tree_to_dict(self.path)
|
|
265
281
|
|
|
266
282
|
def get_starttime(self):
|
|
267
283
|
return self.__starttime
|
|
@@ -276,7 +292,8 @@ class StorageGroup(Path):
|
|
|
276
292
|
time.month,
|
|
277
293
|
time.day)
|
|
278
294
|
for s in self.stores:
|
|
279
|
-
s
|
|
295
|
+
if s is not self:
|
|
296
|
+
s.starttime = time
|
|
280
297
|
|
|
281
298
|
def get_endtime(self):
|
|
282
299
|
return self.__endtime
|
|
@@ -291,7 +308,8 @@ class StorageGroup(Path):
|
|
|
291
308
|
time.month,
|
|
292
309
|
time.day)
|
|
293
310
|
for s in self.stores:
|
|
294
|
-
s
|
|
311
|
+
if s is not self:
|
|
312
|
+
s.endtime = time
|
|
295
313
|
|
|
296
314
|
|
|
297
315
|
starttime = property(get_starttime, set_starttime)
|
tonik/utils.py
CHANGED
|
@@ -8,7 +8,7 @@ import xarray as xr
|
|
|
8
8
|
def generate_test_data(dim=1, ndays=30, nfreqs=10,
|
|
9
9
|
tstart=datetime.utcnow(),
|
|
10
10
|
feature_name=None,
|
|
11
|
-
freq_name=None):
|
|
11
|
+
freq_name=None, add_nans=True):
|
|
12
12
|
"""
|
|
13
13
|
Generate a 1D or 2D feature for testing.
|
|
14
14
|
"""
|
|
@@ -25,12 +25,14 @@ def generate_test_data(dim=1, ndays=30, nfreqs=10,
|
|
|
25
25
|
# Add 10% NaNs
|
|
26
26
|
idx_nan = rs.integers(0, nints-1, int(0.1*nints))
|
|
27
27
|
if dim == 1:
|
|
28
|
-
|
|
28
|
+
if add_nans:
|
|
29
|
+
data[idx_nan] = np.nan
|
|
29
30
|
if feature_name is None:
|
|
30
31
|
feature_name = 'rsam'
|
|
31
32
|
xrd = xr.Dataset({feature_name: xr.DataArray(data, coords=[dates], dims=['datetime'])})
|
|
32
33
|
if dim == 2:
|
|
33
|
-
|
|
34
|
+
if add_nans:
|
|
35
|
+
data[:, idx_nan] = np.nan
|
|
34
36
|
freqs = np.arange(nfreqs)
|
|
35
37
|
if feature_name is None:
|
|
36
38
|
feature_name = 'ssam'
|
|
@@ -40,4 +42,5 @@ def generate_test_data(dim=1, ndays=30, nfreqs=10,
|
|
|
40
42
|
xrd.attrs['starttime'] = dates[0].isoformat()
|
|
41
43
|
xrd.attrs['endtime'] = dates[-1].isoformat()
|
|
42
44
|
xrd.attrs['station'] = 'MDR'
|
|
45
|
+
xrd.attrs['interval'] = '10min'
|
|
43
46
|
return xrd
|
tonik/xarray2hdf5.py
CHANGED
|
@@ -8,22 +8,36 @@ import h5netcdf
|
|
|
8
8
|
import numpy as np
|
|
9
9
|
|
|
10
10
|
|
|
11
|
-
def xarray2hdf5(xArray, fdir, rootGroupName="original", timedim="datetime"
|
|
11
|
+
def xarray2hdf5(xArray, fdir, rootGroupName="original", timedim="datetime",
|
|
12
|
+
archive_starttime=datetime(2000, 1, 1), resolution=None):
|
|
12
13
|
"""
|
|
13
14
|
Store an xarray dataset as an HDF5 file.
|
|
14
15
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
16
|
+
Parameters
|
|
17
|
+
----------
|
|
18
|
+
xArray : xarray.Dataset
|
|
19
|
+
Data to store.
|
|
20
|
+
fdir : str
|
|
21
|
+
Directory to store data under.
|
|
22
|
+
rootGroupName : str
|
|
23
|
+
Hdf5 group name.
|
|
24
|
+
timedim : str
|
|
25
|
+
Name of time dimension.
|
|
26
|
+
archive_starttime : datetime
|
|
27
|
+
Start time of archive. If the start time of the data is before this
|
|
28
|
+
time, the data start time is used.
|
|
29
|
+
resolution : float
|
|
30
|
+
Time resolution of the data in hours. If None, the resolution is
|
|
31
|
+
determined from the data.
|
|
23
32
|
"""
|
|
24
33
|
filterwarnings(action='ignore', category=DeprecationWarning,
|
|
25
34
|
message='`np.bool` is a deprecated alias')
|
|
26
35
|
|
|
36
|
+
starttime = xArray[timedim].values[0].astype('datetime64[us]').astype(datetime)
|
|
37
|
+
starttime = min(starttime, archive_starttime)
|
|
38
|
+
if resolution is None:
|
|
39
|
+
resolution = (np.diff(xArray[timedim])/np.timedelta64(1, 'h'))[0]
|
|
40
|
+
|
|
27
41
|
for featureName in list(xArray.data_vars.keys()):
|
|
28
42
|
h5file = os.path.join(fdir, featureName +'.nc')
|
|
29
43
|
|
|
@@ -31,7 +45,8 @@ def xarray2hdf5(xArray, fdir, rootGroupName="original", timedim="datetime"):
|
|
|
31
45
|
|
|
32
46
|
with h5netcdf.File(h5file, mode) as h5f:
|
|
33
47
|
try:
|
|
34
|
-
rootGrp = _create_h5_Structure(rootGroupName, featureName,
|
|
48
|
+
rootGrp = _create_h5_Structure(rootGroupName, featureName,
|
|
49
|
+
h5f, xArray, starttime, timedim)
|
|
35
50
|
except ValueError: # group already exists, append
|
|
36
51
|
rootGrp = h5f[rootGroupName]
|
|
37
52
|
|
|
@@ -39,17 +54,17 @@ def xarray2hdf5(xArray, fdir, rootGroupName="original", timedim="datetime"):
|
|
|
39
54
|
new_time = date2num(xArray[timedim].values.astype('datetime64[us]').astype(datetime),
|
|
40
55
|
units=rootGrp[timedim].attrs['units'],
|
|
41
56
|
calendar=rootGrp[timedim].attrs['calendar'])
|
|
42
|
-
|
|
43
|
-
t0 = date2num(np.datetime64(rootGrp.attrs['starttime']).astype('datetime64[us]').astype(datetime),
|
|
57
|
+
t0 = date2num(starttime,
|
|
44
58
|
units=rootGrp[timedim].attrs['units'],
|
|
45
59
|
calendar=rootGrp[timedim].attrs['calendar'])
|
|
46
|
-
indices = np.rint((new_time - t0)/
|
|
47
|
-
|
|
60
|
+
indices = np.rint((new_time - t0)/resolution).astype(int)
|
|
61
|
+
if not np.all(indices >= 0):
|
|
62
|
+
raise ValueError("Data starts before the archive start time")
|
|
48
63
|
times = rootGrp[timedim]
|
|
49
64
|
newsize = indices[-1] + 1
|
|
50
65
|
if newsize > times.shape[0]:
|
|
51
66
|
rootGrp.resize_dimension(timedim, newsize)
|
|
52
|
-
times[:] = t0 + np.arange(times.shape[0])*
|
|
67
|
+
times[:] = t0 + np.arange(times.shape[0]) * resolution
|
|
53
68
|
data = rootGrp[featureName]
|
|
54
69
|
if len(data.shape) > 1:
|
|
55
70
|
data[:, indices] = xArray[featureName].values
|
|
@@ -63,10 +78,18 @@ def xarray2hdf5(xArray, fdir, rootGroupName="original", timedim="datetime"):
|
|
|
63
78
|
logging.warning(f"Could not set all meta info for {featureName}: {e}")
|
|
64
79
|
|
|
65
80
|
|
|
66
|
-
def _create_h5_Structure(defaultGroupName, featureName, h5f, xArray):
|
|
81
|
+
def _create_h5_Structure(defaultGroupName, featureName, h5f, xArray, starttime, timedim):
|
|
67
82
|
rootGrp = h5f.create_group(defaultGroupName)
|
|
83
|
+
rootGrp.dimensions[timedim] = None
|
|
84
|
+
coordinates = rootGrp.create_variable(timedim, (timedim,), float)
|
|
85
|
+
coordinates.attrs['units'] = 'hours since 1970-01-01 00:00:00.0'
|
|
86
|
+
coordinates.attrs['calendar'] = 'gregorian'
|
|
87
|
+
rootGrp.attrs['starttime'] = str(starttime)
|
|
68
88
|
for label, size in xArray.dims.items():
|
|
69
|
-
|
|
89
|
+
if not np.issubdtype(xArray[label].dtype, np.datetime64):
|
|
90
|
+
rootGrp.dimensions[label] = size
|
|
91
|
+
coordinates = rootGrp.create_variable(label, (label,), float)
|
|
92
|
+
coordinates[:] = xArray[label].values
|
|
70
93
|
# Note: xArray.dims returns a dictionary of dimensions that are not necesarily
|
|
71
94
|
# in the right order; xArray[featureName].dims returns a tuple with dimension
|
|
72
95
|
# names in the correct order
|
|
@@ -74,20 +97,6 @@ def _create_h5_Structure(defaultGroupName, featureName, h5f, xArray):
|
|
|
74
97
|
return rootGrp
|
|
75
98
|
|
|
76
99
|
|
|
77
|
-
def _setAttributes(label, size, rootGrp, xArray):
|
|
78
|
-
if np.issubdtype(xArray[label].dtype, np.datetime64):
|
|
79
|
-
starttime = str(xArray[label].values[0].astype('datetime64[us]').astype(datetime))
|
|
80
|
-
rootGrp.dimensions[label] = None
|
|
81
|
-
coordinates = rootGrp.create_variable(label, (label,), float)
|
|
82
|
-
coordinates.attrs['units'] = 'hours since 1970-01-01 00:00:00.0'
|
|
83
|
-
coordinates.attrs['calendar'] = 'gregorian'
|
|
84
|
-
rootGrp.attrs['starttime'] = starttime
|
|
85
|
-
else:
|
|
86
|
-
rootGrp.dimensions[label] = size
|
|
87
|
-
coordinates = rootGrp.create_variable(label, (label,), float)
|
|
88
|
-
coordinates[:] = xArray[label].values
|
|
89
|
-
|
|
90
|
-
|
|
91
100
|
def _setMetaInfo(featureName, h5f, xArray):
|
|
92
101
|
h5f.attrs['station'] = xArray.attrs['station']
|
|
93
102
|
h5f.attrs['latitude'] = -42
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: tonik
|
|
3
|
-
Version: 0.0.
|
|
4
|
-
Summary:
|
|
3
|
+
Version: 0.0.6
|
|
4
|
+
Summary: Store time series data as HDF5 files and access them through an API.
|
|
5
5
|
Project-URL: Homepage, https://tsc-tools.github.io/tonik
|
|
6
6
|
Project-URL: Issues, https://github.com/tsc-tools/tonik/issues
|
|
7
7
|
Author-email: Yannik Behr <y.behr@gns.cri.nz>, Christof Mueller <c.mueller@gns.cri.nz>
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
tonik/__init__.py,sha256=p97Bbz-yujI-uNmbqn1S61lq-zfF1VPaS5c1fxs1Fa8,516
|
|
2
|
+
tonik/api.py,sha256=PV41vA7FGDqt1LK0nYKc1SNF04-LtINfqkYHH_y3S4U,5645
|
|
3
|
+
tonik/storage.py,sha256=JuDq4T-45kjOeH_gu1E3Z1WdT9x0lQ7DJfz8hRZNwzw,11032
|
|
4
|
+
tonik/utils.py,sha256=nV0lK8Azasr8LUuQGXxfxef6nU3bn3dCTQnQTmWsKAY,1534
|
|
5
|
+
tonik/xarray2hdf5.py,sha256=cekO9vo9ZRlr0VndswJjPC27CEVD3TpRVKLAJ-aAO0g,4465
|
|
6
|
+
tonik/package_data/index.html,sha256=GKDClUhIam_fAYbNfzAolORhSCG3ae1wW3VjWCg4PMk,2732
|
|
7
|
+
tonik-0.0.6.dist-info/METADATA,sha256=HHDURyUqgrCa1-1wZeyQG4lYCa_ZdbzgtY5y5eVJ4lY,1918
|
|
8
|
+
tonik-0.0.6.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
9
|
+
tonik-0.0.6.dist-info/entry_points.txt,sha256=VnGfC5qAzpntEHAb5pooUEpYABSgOfQoNhCEtLDJyf8,45
|
|
10
|
+
tonik-0.0.6.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
11
|
+
tonik-0.0.6.dist-info/RECORD,,
|
tonik-0.0.5.dist-info/RECORD
DELETED
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
tonik/__init__.py,sha256=p97Bbz-yujI-uNmbqn1S61lq-zfF1VPaS5c1fxs1Fa8,516
|
|
2
|
-
tonik/api.py,sha256=zTZZbpPsetoM5EH8ao8RlwR39lU1jEWpxwATC4S8Qq0,11012
|
|
3
|
-
tonik/storage.py,sha256=F1NnRIZLq7uB7TTK1-5VHYix6_e_fl2J1FmiCFYqIJ0,10719
|
|
4
|
-
tonik/utils.py,sha256=jEjvUNcU9nUhQZTgu8iwfPmF4U2odyOT8EJu0v1DaA0,1433
|
|
5
|
-
tonik/xarray2hdf5.py,sha256=biQ3KVt0QrxJhOWm38FwglzYkLhPtO13G1B1vVF2c6o,4090
|
|
6
|
-
tonik/package_data/index.html,sha256=IaPMU-A_FswjMAKA6PrHp9VFTuKJ6o_PGyjg_CTutrY,3082
|
|
7
|
-
tonik-0.0.5.dist-info/METADATA,sha256=4VJUxgbE6FIPk9g4Uj5KlMN2qbW05us5zw3j4byWwjU,1940
|
|
8
|
-
tonik-0.0.5.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
9
|
-
tonik-0.0.5.dist-info/entry_points.txt,sha256=VnGfC5qAzpntEHAb5pooUEpYABSgOfQoNhCEtLDJyf8,45
|
|
10
|
-
tonik-0.0.5.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
11
|
-
tonik-0.0.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|