tonik 0.0.6__py3-none-any.whl → 0.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tonik/__init__.py +2 -2
- tonik/api.py +59 -37
- tonik/storage.py +71 -62
- tonik/utils.py +14 -8
- tonik/xarray2zarr.py +23 -0
- {tonik-0.0.6.dist-info → tonik-0.0.8.dist-info}/METADATA +3 -2
- tonik-0.0.8.dist-info/RECORD +12 -0
- {tonik-0.0.6.dist-info → tonik-0.0.8.dist-info}/WHEEL +1 -1
- tonik-0.0.6.dist-info/RECORD +0 -11
- {tonik-0.0.6.dist-info → tonik-0.0.8.dist-info}/entry_points.txt +0 -0
- {tonik-0.0.6.dist-info → tonik-0.0.8.dist-info}/licenses/LICENSE +0 -0
tonik/__init__.py
CHANGED
|
@@ -2,7 +2,7 @@ import importlib
|
|
|
2
2
|
from os import PathLike
|
|
3
3
|
from typing import Optional
|
|
4
4
|
|
|
5
|
-
from .storage import
|
|
5
|
+
from .storage import Storage, Path
|
|
6
6
|
from .utils import generate_test_data
|
|
7
7
|
|
|
8
8
|
|
|
@@ -20,4 +20,4 @@ def get_data(filename: Optional[PathLike] = None) -> str:
|
|
|
20
20
|
|
|
21
21
|
"""
|
|
22
22
|
f = importlib.resources.files(__package__)
|
|
23
|
-
return str(f) if filename is None else str(f / filename)
|
|
23
|
+
return str(f) if filename is None else str(f / filename)
|
tonik/api.py
CHANGED
|
@@ -15,17 +15,17 @@ from fastapi.responses import HTMLResponse, StreamingResponse
|
|
|
15
15
|
from pydantic import BaseModel
|
|
16
16
|
from typing import Annotated
|
|
17
17
|
|
|
18
|
-
from .storage import
|
|
18
|
+
from .storage import Storage
|
|
19
19
|
from . import get_data
|
|
20
20
|
|
|
21
21
|
logger = logging.getLogger(__name__)
|
|
22
22
|
|
|
23
|
-
|
|
23
|
+
|
|
24
24
|
class TonikAPI:
|
|
25
25
|
|
|
26
26
|
def __init__(self, rootdir) -> None:
|
|
27
27
|
self.rootdir = rootdir
|
|
28
|
-
self.app = FastAPI()
|
|
28
|
+
self.app = FastAPI()
|
|
29
29
|
|
|
30
30
|
# -- allow any origin to query API
|
|
31
31
|
self.app.add_middleware(CORSMiddleware,
|
|
@@ -55,20 +55,23 @@ class TonikAPI:
|
|
|
55
55
|
return dt
|
|
56
56
|
|
|
57
57
|
def feature(self,
|
|
58
|
-
group: str
|
|
59
|
-
name: str
|
|
60
|
-
starttime: str=None,
|
|
61
|
-
endtime: str=None,
|
|
62
|
-
resolution: str='full',
|
|
63
|
-
verticalres: int=10,
|
|
64
|
-
log: bool=False,
|
|
65
|
-
normalise: bool=False,
|
|
66
|
-
subdir: Annotated[list[str] | None, Query()]=None):
|
|
58
|
+
group: str,
|
|
59
|
+
name: str,
|
|
60
|
+
starttime: str = None,
|
|
61
|
+
endtime: str = None,
|
|
62
|
+
resolution: str = 'full',
|
|
63
|
+
verticalres: int = 10,
|
|
64
|
+
log: bool = False,
|
|
65
|
+
normalise: bool = False,
|
|
66
|
+
subdir: Annotated[list[str] | None, Query()] = None):
|
|
67
67
|
_st = self.preprocess_datetime(starttime)
|
|
68
68
|
_et = self.preprocess_datetime(endtime)
|
|
69
|
-
g =
|
|
70
|
-
|
|
71
|
-
|
|
69
|
+
g = Storage(group, rootdir=self.rootdir,
|
|
70
|
+
starttime=_st, endtime=_et)
|
|
71
|
+
if subdir is None:
|
|
72
|
+
c = g
|
|
73
|
+
else:
|
|
74
|
+
c = g.get_substore(*subdir)
|
|
72
75
|
try:
|
|
73
76
|
feat = c(name)
|
|
74
77
|
except ValueError as e:
|
|
@@ -80,7 +83,8 @@ class TonikAPI:
|
|
|
80
83
|
nfreqs = feat.shape[0]
|
|
81
84
|
dates = feat.coords[feat.dims[1]].values
|
|
82
85
|
if resolution != 'full':
|
|
83
|
-
freq, dates, spec = self.aggregate_feature(
|
|
86
|
+
freq, dates, spec = self.aggregate_feature(
|
|
87
|
+
resolution, verticalres, feat, nfreqs, dates)
|
|
84
88
|
else:
|
|
85
89
|
spec = feat.values
|
|
86
90
|
freq = feat.coords[feat.dims[0]].values
|
|
@@ -88,50 +92,67 @@ class TonikAPI:
|
|
|
88
92
|
if log and feat.name != 'sonogram':
|
|
89
93
|
vals = 10*np.log10(vals)
|
|
90
94
|
if normalise:
|
|
91
|
-
vals = (vals - np.nanmin(vals))/
|
|
95
|
+
vals = (vals - np.nanmin(vals)) / \
|
|
96
|
+
(np.nanmax(vals) - np.nanmin(vals))
|
|
92
97
|
freqs = freq.repeat(dates.size)
|
|
93
98
|
dates = np.tile(dates, freq.size)
|
|
94
|
-
df = pd.DataFrame(
|
|
99
|
+
df = pd.DataFrame(
|
|
100
|
+
{'dates': dates, 'freqs': freqs, 'feature': vals})
|
|
95
101
|
output = df.to_csv(index=False,
|
|
96
|
-
|
|
102
|
+
columns=['dates', 'freqs', 'feature'])
|
|
97
103
|
else:
|
|
98
104
|
df = pd.DataFrame(data=feat.to_pandas(), columns=[feat.name])
|
|
99
105
|
df['dates'] = df.index
|
|
100
106
|
try:
|
|
101
|
-
|
|
107
|
+
current_resolution = pd.Timedelta(df['dates'].diff().mean())
|
|
108
|
+
if current_resolution < pd.Timedelta(resolution):
|
|
109
|
+
df = df.resample(pd.Timedelta(resolution)).mean()
|
|
102
110
|
except ValueError as e:
|
|
103
|
-
logger.warning(
|
|
111
|
+
logger.warning(
|
|
112
|
+
f"Cannot resample {feat.name} to {resolution}: e")
|
|
104
113
|
df.rename(columns={feat.name: 'feature'}, inplace=True)
|
|
105
114
|
output = df.to_csv(index=False, columns=['dates', 'feature'])
|
|
106
115
|
return StreamingResponse(iter([output]),
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
116
|
+
media_type='text/csv',
|
|
117
|
+
headers={"Content-Disposition":
|
|
118
|
+
"attachment;filename=<tonik_feature>.csv",
|
|
119
|
+
'Content-Length': str(len(output))})
|
|
112
120
|
|
|
113
121
|
def aggregate_feature(self, resolution, verticalres, feat, nfreqs, dates):
|
|
114
|
-
resolution = np.timedelta64(
|
|
122
|
+
resolution = np.timedelta64(
|
|
123
|
+
pd.Timedelta(resolution), 'ms').astype(float)
|
|
115
124
|
ndays = np.timedelta64(dates[-1] - dates[0], 'ms').astype(float)
|
|
116
|
-
canvas_x =
|
|
125
|
+
canvas_x = int(ndays/resolution)
|
|
117
126
|
canvas_y = min(nfreqs, verticalres)
|
|
118
127
|
dates = date2num(dates.astype('datetime64[us]').astype(datetime),
|
|
119
|
-
|
|
120
|
-
|
|
128
|
+
units='hours since 1970-01-01 00:00:00.0',
|
|
129
|
+
calendar='gregorian')
|
|
121
130
|
feat = feat.assign_coords({'datetime': dates})
|
|
122
131
|
cvs = dsh.Canvas(plot_width=canvas_x,
|
|
123
|
-
|
|
132
|
+
plot_height=canvas_y)
|
|
124
133
|
agg = cvs.raster(source=feat)
|
|
125
134
|
freq_dim = feat.dims[0]
|
|
126
135
|
freq, d, spec = agg.coords[freq_dim].values, agg.coords['datetime'].values, agg.data
|
|
127
|
-
dates = num2date(
|
|
128
|
-
|
|
136
|
+
dates = num2date(
|
|
137
|
+
d, units='hours since 1970-01-01 00:00:00.0', calendar='gregorian')
|
|
138
|
+
return freq, dates, spec
|
|
129
139
|
|
|
130
|
-
def inventory(self, group: str) -> dict:
|
|
131
|
-
sg =
|
|
132
|
-
|
|
140
|
+
def inventory(self, group: str, subdir: Annotated[list[str] | None, Query()] = None, tree: bool = True) -> list | dict:
|
|
141
|
+
sg = Storage(group, rootdir=self.rootdir, create=False)
|
|
142
|
+
try:
|
|
143
|
+
c = sg.get_substore(*subdir)
|
|
144
|
+
except TypeError:
|
|
145
|
+
c = sg
|
|
146
|
+
except FileNotFoundError as e:
|
|
147
|
+
msg = "Directory {} not found.".format(
|
|
148
|
+
'/'.join([sg.path] + subdir))
|
|
149
|
+
raise HTTPException(status_code=404, detail=msg)
|
|
150
|
+
if tree and not subdir:
|
|
151
|
+
return sg.to_dict()
|
|
152
|
+
else:
|
|
153
|
+
dir_contents = os.listdir(c.path)
|
|
154
|
+
return [fn.replace('.nc', '').replace('.zarr', '') for fn in dir_contents]
|
|
133
155
|
|
|
134
|
-
# ta = TonikAPI('/tmp').feature()
|
|
135
156
|
|
|
136
157
|
def main(argv=None):
|
|
137
158
|
parser = ArgumentParser()
|
|
@@ -140,5 +161,6 @@ def main(argv=None):
|
|
|
140
161
|
ta = TonikAPI(args.rootdir)
|
|
141
162
|
uvicorn.run(ta.app, host="0.0.0.0", port=8003)
|
|
142
163
|
|
|
164
|
+
|
|
143
165
|
if __name__ == "__main__":
|
|
144
166
|
main()
|
tonik/storage.py
CHANGED
|
@@ -1,30 +1,25 @@
|
|
|
1
|
-
from datetime import datetime, timedelta
|
|
2
|
-
import json
|
|
3
|
-
import glob
|
|
4
1
|
import logging
|
|
5
2
|
import logging.config
|
|
6
3
|
import os
|
|
7
4
|
import re
|
|
8
|
-
import tempfile
|
|
9
5
|
|
|
10
6
|
import pandas as pd
|
|
11
7
|
import xarray as xr
|
|
12
8
|
|
|
13
9
|
from .xarray2hdf5 import xarray2hdf5
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
ERROR_LOG_FILENAME = "tonik.log"
|
|
10
|
+
from .xarray2zarr import xarray2zarr
|
|
17
11
|
|
|
18
12
|
LOGGING_CONFIG = {
|
|
19
13
|
"version": 1,
|
|
20
14
|
"disable_existing_loggers": False,
|
|
21
|
-
"formatters": {
|
|
15
|
+
"formatters": {
|
|
22
16
|
"default": { # The formatter name, it can be anything that I wish
|
|
23
|
-
|
|
17
|
+
# What to add in the message
|
|
18
|
+
"format": "%(asctime)s:%(name)s:%(process)d:%(lineno)d " "%(levelname)s %(message)s",
|
|
24
19
|
"datefmt": "%Y-%m-%d %H:%M:%S", # How to display dates
|
|
25
20
|
},
|
|
26
21
|
"json": { # The formatter name
|
|
27
|
-
|
|
22
|
+
"()": "pythonjsonlogger.jsonlogger.JsonFormatter", # The class to instantiate!
|
|
28
23
|
# Json is more complex, but easier to read, display all attributes!
|
|
29
24
|
"format": """
|
|
30
25
|
asctime: %(asctime)s
|
|
@@ -48,23 +43,16 @@ LOGGING_CONFIG = {
|
|
|
48
43
|
""",
|
|
49
44
|
"datefmt": "%Y-%m-%d %H:%M:%S", # How to display dates
|
|
50
45
|
},
|
|
51
|
-
},
|
|
46
|
+
},
|
|
52
47
|
"handlers": {
|
|
53
|
-
"logfile": { # The handler name
|
|
54
|
-
"formatter": "json", # Refer to the formatter defined above
|
|
55
|
-
"level": "ERROR", # FILTER: Only ERROR and CRITICAL logs
|
|
56
|
-
"class": "logging.handlers.RotatingFileHandler", # OUTPUT: Which class to use
|
|
57
|
-
"filename": ERROR_LOG_FILENAME, # Param for class above. Defines filename to use, load it from constant
|
|
58
|
-
"backupCount": 2, # Param for class above. Defines how many log files to keep as it grows
|
|
59
|
-
},
|
|
60
48
|
"simple": { # The handler name
|
|
61
49
|
"formatter": "default", # Refer to the formatter defined above
|
|
62
50
|
"class": "logging.StreamHandler", # OUTPUT: Same as above, stream to console
|
|
63
51
|
"stream": "ext://sys.stdout",
|
|
64
52
|
},
|
|
65
53
|
},
|
|
66
|
-
"loggers": {
|
|
67
|
-
"
|
|
54
|
+
"loggers": {
|
|
55
|
+
"storage": { # The name of the logger, this SHOULD match your module!
|
|
68
56
|
"level": "DEBUG", # FILTER: only INFO logs onwards from "tryceratops" logger
|
|
69
57
|
"handlers": [
|
|
70
58
|
"simple", # Refer the handler defined above
|
|
@@ -72,9 +60,9 @@ LOGGING_CONFIG = {
|
|
|
72
60
|
},
|
|
73
61
|
},
|
|
74
62
|
"root": {
|
|
75
|
-
"level": "
|
|
63
|
+
"level": "INFO", # FILTER: only INFO logs onwards
|
|
76
64
|
"handlers": [
|
|
77
|
-
"
|
|
65
|
+
"simple", # Refer the handler defined above
|
|
78
66
|
]
|
|
79
67
|
},
|
|
80
68
|
}
|
|
@@ -84,32 +72,44 @@ logger = logging.getLogger("__name__")
|
|
|
84
72
|
|
|
85
73
|
|
|
86
74
|
class Path(object):
|
|
87
|
-
def __init__(self, name, parentdir):
|
|
75
|
+
def __init__(self, name, parentdir, create=True, backend='zarr'):
|
|
88
76
|
self.name = name
|
|
77
|
+
self.create = create
|
|
78
|
+
self.backend = backend
|
|
89
79
|
self.path = os.path.join(parentdir, name)
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
80
|
+
if create:
|
|
81
|
+
try:
|
|
82
|
+
os.makedirs(self.path, exist_ok=True)
|
|
83
|
+
except FileExistsError:
|
|
84
|
+
pass
|
|
85
|
+
else:
|
|
86
|
+
if not os.path.exists(self.path):
|
|
87
|
+
raise FileNotFoundError(f"Path {self.path} not found")
|
|
94
88
|
self.children = {}
|
|
95
|
-
|
|
89
|
+
|
|
96
90
|
def __str__(self):
|
|
97
91
|
return self.path
|
|
98
|
-
|
|
92
|
+
|
|
99
93
|
def __getitem__(self, key):
|
|
100
94
|
if key is None:
|
|
101
95
|
raise ValueError("Key cannot be None")
|
|
102
96
|
try:
|
|
103
97
|
return self.children[key]
|
|
104
98
|
except KeyError:
|
|
105
|
-
self.children[key] = Path(
|
|
99
|
+
self.children[key] = Path(
|
|
100
|
+
key, self.path, self.create, self.backend)
|
|
106
101
|
return self.children[key]
|
|
107
102
|
|
|
108
103
|
def feature_path(self, feature):
|
|
109
|
-
|
|
104
|
+
|
|
105
|
+
if self.backend == 'h5netcdf':
|
|
106
|
+
file_ending = '.nc'
|
|
107
|
+
elif self.backend == 'zarr':
|
|
108
|
+
file_ending = '.zarr'
|
|
109
|
+
_feature_path = os.path.join(self.path, feature + file_ending)
|
|
110
110
|
if not os.path.exists(_feature_path):
|
|
111
111
|
raise FileNotFoundError(f"File {_feature_path} not found")
|
|
112
|
-
|
|
112
|
+
self.children[feature] = Path(feature + file_ending, self.path)
|
|
113
113
|
return _feature_path
|
|
114
114
|
|
|
115
115
|
def __call__(self, feature, stack_length=None, interval='10min'):
|
|
@@ -125,18 +125,17 @@ class Path(object):
|
|
|
125
125
|
if self.endtime <= self.starttime:
|
|
126
126
|
raise ValueError('Startime has to be smaller than endtime.')
|
|
127
127
|
|
|
128
|
-
feature = feature.lower()
|
|
129
128
|
filename = self.feature_path(feature)
|
|
130
129
|
|
|
131
|
-
logger.debug(
|
|
130
|
+
logger.debug(
|
|
131
|
+
f"Reading feature {feature} between {self.starttime} and {self.endtime}")
|
|
132
132
|
num_periods = None
|
|
133
133
|
if stack_length is not None:
|
|
134
134
|
valid_stack_units = ['W', 'D', 'h', 'T', 'min', 'S']
|
|
135
|
-
if
|
|
136
|
-
in valid_stack_units:
|
|
135
|
+
if re.match(r'\d*\s*(\w*)', stack_length).group(1) not in valid_stack_units:
|
|
137
136
|
raise ValueError(
|
|
138
137
|
'Stack length should be one of: {}'.
|
|
139
|
-
|
|
138
|
+
format(', '.join(valid_stack_units))
|
|
140
139
|
)
|
|
141
140
|
|
|
142
141
|
if pd.to_timedelta(stack_length) < pd.to_timedelta(interval):
|
|
@@ -146,26 +145,29 @@ class Path(object):
|
|
|
146
145
|
# Rewind starttime to account for stack length
|
|
147
146
|
self.starttime -= pd.to_timedelta(stack_length)
|
|
148
147
|
|
|
149
|
-
num_periods = (pd.to_timedelta(stack_length)/
|
|
148
|
+
num_periods = (pd.to_timedelta(stack_length) /
|
|
150
149
|
pd.to_timedelta(interval))
|
|
151
150
|
if not num_periods.is_integer():
|
|
152
151
|
raise ValueError(
|
|
153
152
|
'Stack length {} / interval {} = {}, but it needs'
|
|
154
153
|
' to be a whole number'.
|
|
155
|
-
|
|
154
|
+
format(stack_length, interval, num_periods))
|
|
156
155
|
|
|
157
156
|
xd_index = dict(datetime=slice(self.starttime, self.endtime))
|
|
158
|
-
with xr.open_dataset(filename, group='original', engine=
|
|
159
|
-
|
|
160
|
-
|
|
157
|
+
with xr.open_dataset(filename, group='original', engine=self.backend) as ds:
|
|
158
|
+
try:
|
|
159
|
+
rq = ds.loc[xd_index].load()
|
|
160
|
+
except KeyError:
|
|
161
|
+
ds = ds.sortby("datetime")
|
|
162
|
+
rq = ds.loc[xd_index].load()
|
|
161
163
|
|
|
162
164
|
# Stack features
|
|
163
165
|
if stack_length is not None:
|
|
164
166
|
logger.debug("Stacking feature...")
|
|
165
167
|
try:
|
|
166
168
|
xdf = rq[feature].rolling(datetime=int(num_periods),
|
|
167
|
-
|
|
168
|
-
|
|
169
|
+
center=False,
|
|
170
|
+
min_periods=1).mean()
|
|
169
171
|
# Return requested timeframe to that defined in initialisation
|
|
170
172
|
self.starttime += pd.to_timedelta(stack_length)
|
|
171
173
|
xdf_new = xdf.loc[self.starttime:self.endtime]
|
|
@@ -189,10 +191,13 @@ class Path(object):
|
|
|
189
191
|
"""
|
|
190
192
|
Save a feature to disk
|
|
191
193
|
"""
|
|
192
|
-
|
|
194
|
+
if self.backend == 'h5netcdf':
|
|
195
|
+
xarray2hdf5(data, self.path, **kwargs)
|
|
196
|
+
elif self.backend == 'zarr':
|
|
197
|
+
xarray2zarr(data, self.path, **kwargs)
|
|
193
198
|
|
|
194
199
|
|
|
195
|
-
class
|
|
200
|
+
class Storage(Path):
|
|
196
201
|
"""
|
|
197
202
|
Query computed features
|
|
198
203
|
|
|
@@ -204,7 +209,7 @@ class StorageGroup(Path):
|
|
|
204
209
|
:type endtime: :class:`datetime.datetime`
|
|
205
210
|
|
|
206
211
|
>>> import datetime
|
|
207
|
-
>>> g =
|
|
212
|
+
>>> g = Storage('Whakaari', /tmp)
|
|
208
213
|
>>> start = datetime.datetime(2012,1,1,0,0,0)
|
|
209
214
|
>>> end = datetime.datetime(2012,1,2,23,59,59)
|
|
210
215
|
>>> g.starttime = start
|
|
@@ -212,12 +217,13 @@ class StorageGroup(Path):
|
|
|
212
217
|
>>> c = g.channel(site='WIZ', sensor='00', channel='HHZ')
|
|
213
218
|
>>> rsam = c("rsam")
|
|
214
219
|
"""
|
|
215
|
-
|
|
216
|
-
|
|
220
|
+
|
|
221
|
+
def __init__(self, name, rootdir, starttime=None, endtime=None, create=True, backend='zarr'):
|
|
222
|
+
self.stores = set()
|
|
217
223
|
self.starttime = starttime
|
|
218
224
|
self.endtime = endtime
|
|
219
|
-
super().__init__(name, rootdir)
|
|
220
|
-
|
|
225
|
+
super().__init__(name, rootdir, create, backend)
|
|
226
|
+
|
|
221
227
|
def print_tree(self, site, indent=0, output=''):
|
|
222
228
|
output += ' ' * indent + site.path + '\n'
|
|
223
229
|
for site in site.children.values():
|
|
@@ -229,7 +235,7 @@ class StorageGroup(Path):
|
|
|
229
235
|
rstr = self.print_tree(self, 0, rstr)
|
|
230
236
|
return rstr
|
|
231
237
|
|
|
232
|
-
def
|
|
238
|
+
def get_substore(self, *args):
|
|
233
239
|
# return the store for a given site, sensor, or channel
|
|
234
240
|
# if one of them is None return the store for the level above
|
|
235
241
|
# if all are None return the root store
|
|
@@ -243,7 +249,7 @@ class StorageGroup(Path):
|
|
|
243
249
|
st.starttime = self.starttime
|
|
244
250
|
st.endtime = self.endtime
|
|
245
251
|
self.stores.add(st)
|
|
246
|
-
return st
|
|
252
|
+
return st
|
|
247
253
|
|
|
248
254
|
def from_directory(self):
|
|
249
255
|
"""
|
|
@@ -254,30 +260,35 @@ class StorageGroup(Path):
|
|
|
254
260
|
try:
|
|
255
261
|
subdirs = root.split(self.path)[1].split(os.sep)[1:]
|
|
256
262
|
except IndexError:
|
|
257
|
-
st = self.
|
|
263
|
+
st = self.get_substore()
|
|
258
264
|
else:
|
|
259
265
|
try:
|
|
260
|
-
st = self.
|
|
266
|
+
st = self.get_substore(*subdirs)
|
|
261
267
|
except TypeError as e:
|
|
262
268
|
raise e
|
|
263
269
|
for _f in files:
|
|
264
270
|
if _f.endswith('.nc'):
|
|
265
|
-
st.feature_path(_f.replace(
|
|
271
|
+
st.feature_path(_f.replace(
|
|
272
|
+
'.nc', '').replace('.zarr', ''))
|
|
266
273
|
|
|
267
274
|
@staticmethod
|
|
268
275
|
def directory_tree_to_dict(path):
|
|
269
276
|
name = os.path.basename(path)
|
|
270
|
-
if
|
|
271
|
-
return
|
|
277
|
+
if name.endswith('.zarr'):
|
|
278
|
+
return name.replace('.zarr', '')
|
|
279
|
+
elif os.path.isdir(path):
|
|
280
|
+
return {name: [Storage.directory_tree_to_dict(os.path.join(path, child)) for child in sorted(os.listdir(path))]}
|
|
272
281
|
else:
|
|
273
|
-
if
|
|
282
|
+
if name.endswith('.nc'):
|
|
274
283
|
return name.replace('.nc', '')
|
|
284
|
+
else:
|
|
285
|
+
return
|
|
275
286
|
|
|
276
287
|
def to_dict(self):
|
|
277
288
|
"""
|
|
278
289
|
Convert the storage group to json
|
|
279
290
|
"""
|
|
280
|
-
return
|
|
291
|
+
return Storage.directory_tree_to_dict(self.path)
|
|
281
292
|
|
|
282
293
|
def get_starttime(self):
|
|
283
294
|
return self.__starttime
|
|
@@ -311,7 +322,5 @@ class StorageGroup(Path):
|
|
|
311
322
|
if s is not self:
|
|
312
323
|
s.endtime = time
|
|
313
324
|
|
|
314
|
-
|
|
315
325
|
starttime = property(get_starttime, set_starttime)
|
|
316
326
|
endtime = property(get_endtime, set_endtime)
|
|
317
|
-
|
tonik/utils.py
CHANGED
|
@@ -6,8 +6,9 @@ import xarray as xr
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
def generate_test_data(dim=1, ndays=30, nfreqs=10,
|
|
9
|
-
tstart=datetime.
|
|
10
|
-
|
|
9
|
+
tstart=datetime.now(),
|
|
10
|
+
freq='10min', intervals=None,
|
|
11
|
+
feature_name=None, seed=42,
|
|
11
12
|
freq_name=None, add_nans=True):
|
|
12
13
|
"""
|
|
13
14
|
Generate a 1D or 2D feature for testing.
|
|
@@ -15,9 +16,12 @@ def generate_test_data(dim=1, ndays=30, nfreqs=10,
|
|
|
15
16
|
assert dim < 3
|
|
16
17
|
assert dim > 0
|
|
17
18
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
19
|
+
if intervals is None:
|
|
20
|
+
nints = ndays * 6 * 24
|
|
21
|
+
else:
|
|
22
|
+
nints = intervals
|
|
23
|
+
dates = pd.date_range(tstart, freq=freq, periods=nints)
|
|
24
|
+
rs = np.random.default_rng(seed)
|
|
21
25
|
# Random walk as test signal
|
|
22
26
|
data = np.abs(np.cumsum(rs.normal(0, 8., len(dates))))
|
|
23
27
|
if dim == 2:
|
|
@@ -29,7 +33,8 @@ def generate_test_data(dim=1, ndays=30, nfreqs=10,
|
|
|
29
33
|
data[idx_nan] = np.nan
|
|
30
34
|
if feature_name is None:
|
|
31
35
|
feature_name = 'rsam'
|
|
32
|
-
xrd = xr.Dataset({feature_name: xr.DataArray(
|
|
36
|
+
xrd = xr.Dataset({feature_name: xr.DataArray(
|
|
37
|
+
data, coords=[dates], dims=['datetime'])})
|
|
33
38
|
if dim == 2:
|
|
34
39
|
if add_nans:
|
|
35
40
|
data[:, idx_nan] = np.nan
|
|
@@ -38,9 +43,10 @@ def generate_test_data(dim=1, ndays=30, nfreqs=10,
|
|
|
38
43
|
feature_name = 'ssam'
|
|
39
44
|
if freq_name is None:
|
|
40
45
|
freq_name = 'frequency'
|
|
41
|
-
xrd = xr.Dataset({feature_name: xr.DataArray(
|
|
46
|
+
xrd = xr.Dataset({feature_name: xr.DataArray(
|
|
47
|
+
data, coords=[freqs, dates], dims=[freq_name, 'datetime'])})
|
|
42
48
|
xrd.attrs['starttime'] = dates[0].isoformat()
|
|
43
49
|
xrd.attrs['endtime'] = dates[-1].isoformat()
|
|
44
50
|
xrd.attrs['station'] = 'MDR'
|
|
45
51
|
xrd.attrs['interval'] = '10min'
|
|
46
|
-
return xrd
|
|
52
|
+
return xrd
|
tonik/xarray2zarr.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
import xarray as xr
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def xarray2zarr(xds, path, mode='a'):
|
|
7
|
+
for feature in xds.data_vars.keys():
|
|
8
|
+
fout = os.path.join(path, feature + '.zarr')
|
|
9
|
+
if not os.path.exists(fout) or mode == 'w':
|
|
10
|
+
xds[feature].to_zarr(
|
|
11
|
+
fout, group='original', mode='w')
|
|
12
|
+
else:
|
|
13
|
+
xds_existing = xr.open_zarr(fout, group='original')
|
|
14
|
+
overlap = xds_existing.datetime.where(
|
|
15
|
+
xds_existing.datetime == xds.datetime)
|
|
16
|
+
if overlap.size > 0:
|
|
17
|
+
xds.loc[dict(datetime=overlap)].to_zarr(
|
|
18
|
+
fout, group='original', mode='r+', region='auto')
|
|
19
|
+
xds.drop_sel(datetime=overlap).to_zarr(
|
|
20
|
+
fout, group='original', mode='a', append_dim="datetime")
|
|
21
|
+
else:
|
|
22
|
+
xds[feature].to_zarr(
|
|
23
|
+
fout, group='original', append_dim='datetime')
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
2
|
Name: tonik
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.8
|
|
4
4
|
Summary: Store time series data as HDF5 files and access them through an API.
|
|
5
5
|
Project-URL: Homepage, https://tsc-tools.github.io/tonik
|
|
6
6
|
Project-URL: Issues, https://github.com/tsc-tools/tonik/issues
|
|
@@ -20,6 +20,7 @@ Requires-Dist: pandas>=2.0
|
|
|
20
20
|
Requires-Dist: python-json-logger>=2.0
|
|
21
21
|
Requires-Dist: uvicorn[standard]>=0.22
|
|
22
22
|
Requires-Dist: xarray>=2023.4
|
|
23
|
+
Requires-Dist: zarr
|
|
23
24
|
Provides-Extra: dev
|
|
24
25
|
Requires-Dist: mkdocs; extra == 'dev'
|
|
25
26
|
Requires-Dist: mkdocs-jupyter; extra == 'dev'
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
tonik/__init__.py,sha256=ZBVGh4dm_l9xwiBGb33O5QV9MfZeNiEd3DBDAm6DiHk,511
|
|
2
|
+
tonik/api.py,sha256=vdsWHNGGWo4sbqlDyZQj2tX5oe6hAWCzyL6ffsfpCB4,6437
|
|
3
|
+
tonik/storage.py,sha256=sScIFA4KXURNPwTnV-rvDh6cWCy9sRrErr9BshZpw2I,11303
|
|
4
|
+
tonik/utils.py,sha256=YD2zZx5nKGfTJKTYTsEZVV78uNRCSakvU_6X6Mgwx-s,1664
|
|
5
|
+
tonik/xarray2hdf5.py,sha256=cekO9vo9ZRlr0VndswJjPC27CEVD3TpRVKLAJ-aAO0g,4465
|
|
6
|
+
tonik/xarray2zarr.py,sha256=d7FAOe7DESbKC9CZS41r62DjlNy0S8ik01lMGXBvJKw,901
|
|
7
|
+
tonik/package_data/index.html,sha256=GKDClUhIam_fAYbNfzAolORhSCG3ae1wW3VjWCg4PMk,2732
|
|
8
|
+
tonik-0.0.8.dist-info/METADATA,sha256=uqrHvBl01n05hqWN8lnNETuoYIGZ8vNq9QWnRmajcEY,1938
|
|
9
|
+
tonik-0.0.8.dist-info/WHEEL,sha256=KGYbc1zXlYddvwxnNty23BeaKzh7YuoSIvIMO4jEhvw,87
|
|
10
|
+
tonik-0.0.8.dist-info/entry_points.txt,sha256=VnGfC5qAzpntEHAb5pooUEpYABSgOfQoNhCEtLDJyf8,45
|
|
11
|
+
tonik-0.0.8.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
12
|
+
tonik-0.0.8.dist-info/RECORD,,
|
tonik-0.0.6.dist-info/RECORD
DELETED
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
tonik/__init__.py,sha256=p97Bbz-yujI-uNmbqn1S61lq-zfF1VPaS5c1fxs1Fa8,516
|
|
2
|
-
tonik/api.py,sha256=PV41vA7FGDqt1LK0nYKc1SNF04-LtINfqkYHH_y3S4U,5645
|
|
3
|
-
tonik/storage.py,sha256=JuDq4T-45kjOeH_gu1E3Z1WdT9x0lQ7DJfz8hRZNwzw,11032
|
|
4
|
-
tonik/utils.py,sha256=nV0lK8Azasr8LUuQGXxfxef6nU3bn3dCTQnQTmWsKAY,1534
|
|
5
|
-
tonik/xarray2hdf5.py,sha256=cekO9vo9ZRlr0VndswJjPC27CEVD3TpRVKLAJ-aAO0g,4465
|
|
6
|
-
tonik/package_data/index.html,sha256=GKDClUhIam_fAYbNfzAolORhSCG3ae1wW3VjWCg4PMk,2732
|
|
7
|
-
tonik-0.0.6.dist-info/METADATA,sha256=HHDURyUqgrCa1-1wZeyQG4lYCa_ZdbzgtY5y5eVJ4lY,1918
|
|
8
|
-
tonik-0.0.6.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
9
|
-
tonik-0.0.6.dist-info/entry_points.txt,sha256=VnGfC5qAzpntEHAb5pooUEpYABSgOfQoNhCEtLDJyf8,45
|
|
10
|
-
tonik-0.0.6.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
11
|
-
tonik-0.0.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|