tonik 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tonik/storage.py +3 -47
- tonik/xarray2netcdf.py +6 -6
- tonik/xarray2zarr.py +15 -8
- {tonik-0.1.3.dist-info → tonik-0.1.4.dist-info}/METADATA +1 -1
- tonik-0.1.4.dist-info/RECORD +12 -0
- tonik-0.1.3.dist-info/RECORD +0 -12
- {tonik-0.1.3.dist-info → tonik-0.1.4.dist-info}/WHEEL +0 -0
- {tonik-0.1.3.dist-info → tonik-0.1.4.dist-info}/entry_points.txt +0 -0
- {tonik-0.1.3.dist-info → tonik-0.1.4.dist-info}/licenses/LICENSE +0 -0
tonik/storage.py
CHANGED
|
@@ -1,9 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import logging.config
|
|
3
3
|
import os
|
|
4
|
-
import re
|
|
5
4
|
|
|
6
|
-
import pandas as pd
|
|
7
5
|
import xarray as xr
|
|
8
6
|
|
|
9
7
|
from .xarray2netcdf import xarray2netcdf
|
|
@@ -113,69 +111,27 @@ class Path(object):
|
|
|
113
111
|
self.children[feature] = Path(feature + file_ending, self.path)
|
|
114
112
|
return _feature_path
|
|
115
113
|
|
|
116
|
-
def __call__(self, feature,
|
|
114
|
+
def __call__(self, feature, group='original'):
|
|
117
115
|
"""
|
|
118
116
|
Request a particular feature
|
|
119
117
|
|
|
120
118
|
:param feature: Feature name
|
|
121
119
|
:type feature: str
|
|
122
|
-
:param stack_length: length of moving average in time
|
|
123
|
-
:type stack_length: str
|
|
124
120
|
|
|
125
121
|
"""
|
|
126
|
-
if self.endtime
|
|
122
|
+
if self.endtime < self.starttime:
|
|
127
123
|
raise ValueError('Startime has to be smaller than endtime.')
|
|
128
124
|
|
|
129
125
|
filename = self.feature_path(feature)
|
|
130
126
|
|
|
131
127
|
logger.debug(
|
|
132
128
|
f"Reading feature {feature} between {self.starttime} and {self.endtime}")
|
|
133
|
-
num_periods = None
|
|
134
|
-
if stack_length is not None:
|
|
135
|
-
valid_stack_units = ['W', 'D', 'h', 'T', 'min', 'S']
|
|
136
|
-
if re.match(r'\d*\s*(\w*)', stack_length).group(1) not in valid_stack_units:
|
|
137
|
-
raise ValueError(
|
|
138
|
-
'Stack length should be one of: {}'.
|
|
139
|
-
format(', '.join(valid_stack_units))
|
|
140
|
-
)
|
|
141
|
-
|
|
142
|
-
if pd.to_timedelta(stack_length) < pd.to_timedelta(interval):
|
|
143
|
-
raise ValueError('Stack length {} is less than interval {}'.
|
|
144
|
-
format(stack_length, interval))
|
|
145
|
-
|
|
146
|
-
# Rewind starttime to account for stack length
|
|
147
|
-
self.starttime -= pd.to_timedelta(stack_length)
|
|
148
|
-
|
|
149
|
-
num_periods = (pd.to_timedelta(stack_length) /
|
|
150
|
-
pd.to_timedelta(interval))
|
|
151
|
-
if not num_periods.is_integer():
|
|
152
|
-
raise ValueError(
|
|
153
|
-
'Stack length {} / interval {} = {}, but it needs'
|
|
154
|
-
' to be a whole number'.
|
|
155
|
-
format(stack_length, interval, num_periods))
|
|
156
129
|
|
|
157
130
|
xd_index = dict(datetime=slice(self.starttime, self.endtime))
|
|
158
|
-
with xr.open_dataset(filename, group=
|
|
131
|
+
with xr.open_dataset(filename, group=group, engine=self.engine) as ds:
|
|
159
132
|
rq = ds[feature].loc[xd_index].load()
|
|
160
133
|
rq.attrs = ds.attrs
|
|
161
134
|
|
|
162
|
-
# Stack features
|
|
163
|
-
if stack_length is not None:
|
|
164
|
-
logger.debug("Stacking feature...")
|
|
165
|
-
try:
|
|
166
|
-
xdf = rq.rolling(datetime=int(num_periods),
|
|
167
|
-
center=False,
|
|
168
|
-
min_periods=1).mean()
|
|
169
|
-
# Return requested timeframe to that defined in initialisation
|
|
170
|
-
self.starttime += pd.to_timedelta(stack_length)
|
|
171
|
-
xdf_new = xdf.loc[self.starttime:self.endtime]
|
|
172
|
-
xdf_new = xdf_new.rename(feature)
|
|
173
|
-
except ValueError as e:
|
|
174
|
-
logger.error(e)
|
|
175
|
-
logger.error('Stack length {} is not valid for feature {}'.
|
|
176
|
-
format(stack_length, feature))
|
|
177
|
-
else:
|
|
178
|
-
return xdf_new
|
|
179
135
|
return rq
|
|
180
136
|
|
|
181
137
|
def load(self, *args, **kwargs):
|
tonik/xarray2netcdf.py
CHANGED
|
@@ -11,7 +11,7 @@ from cftime import date2num, num2date
|
|
|
11
11
|
from .utils import merge_arrays
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
def xarray2netcdf(xArray, fdir,
|
|
14
|
+
def xarray2netcdf(xArray, fdir, group="original", timedim="datetime",
|
|
15
15
|
archive_starttime=datetime(2000, 1, 1), resolution=None,
|
|
16
16
|
mode='a'):
|
|
17
17
|
"""
|
|
@@ -23,7 +23,7 @@ def xarray2netcdf(xArray, fdir, rootGroupName="original", timedim="datetime",
|
|
|
23
23
|
Data to store.
|
|
24
24
|
fdir : str
|
|
25
25
|
Directory to store data under.
|
|
26
|
-
|
|
26
|
+
group : str
|
|
27
27
|
Hdf5 group name.
|
|
28
28
|
timedim : str
|
|
29
29
|
Name of time dimension.
|
|
@@ -49,22 +49,22 @@ def xarray2netcdf(xArray, fdir, rootGroupName="original", timedim="datetime",
|
|
|
49
49
|
if os.path.isfile(h5file) and mode == 'a':
|
|
50
50
|
if archive_starttime > data_starttime:
|
|
51
51
|
xds_existing = xr.open_dataset(
|
|
52
|
-
h5file, group=
|
|
52
|
+
h5file, group=group, engine='h5netcdf')
|
|
53
53
|
xda_new = merge_arrays(
|
|
54
54
|
xds_existing[featureName], xArray[featureName],
|
|
55
55
|
resolution=resolution)
|
|
56
56
|
xds_existing.close()
|
|
57
|
-
xda_new.to_netcdf(h5file, group=
|
|
57
|
+
xda_new.to_netcdf(h5file, group=group,
|
|
58
58
|
mode='w', engine='h5netcdf')
|
|
59
59
|
continue
|
|
60
60
|
_mode = 'a'
|
|
61
61
|
|
|
62
62
|
with h5netcdf.File(h5file, _mode) as h5f:
|
|
63
63
|
try:
|
|
64
|
-
rootGrp = _create_h5_Structure(
|
|
64
|
+
rootGrp = _create_h5_Structure(group, featureName,
|
|
65
65
|
h5f, xArray, starttime, timedim)
|
|
66
66
|
except ValueError: # group already exists, append
|
|
67
|
-
rootGrp = h5f[
|
|
67
|
+
rootGrp = h5f[group]
|
|
68
68
|
|
|
69
69
|
# determine indices
|
|
70
70
|
new_time = date2num(xArray[timedim].values.astype('datetime64[us]').astype(datetime),
|
tonik/xarray2zarr.py
CHANGED
|
@@ -2,13 +2,14 @@ import logging
|
|
|
2
2
|
import os
|
|
3
3
|
|
|
4
4
|
import xarray as xr
|
|
5
|
+
from zarr.errors import PathNotFoundError
|
|
5
6
|
|
|
6
7
|
from .utils import merge_arrays
|
|
7
8
|
|
|
8
9
|
logger = logging.getLogger(__name__)
|
|
9
10
|
|
|
10
11
|
|
|
11
|
-
def xarray2zarr(xds: xr.Dataset, path: str, mode: str = 'a'):
|
|
12
|
+
def xarray2zarr(xds: xr.Dataset, path: str, mode: str = 'a', group='original'):
|
|
12
13
|
"""
|
|
13
14
|
Write xarray dataset to zarr files.
|
|
14
15
|
|
|
@@ -20,6 +21,8 @@ def xarray2zarr(xds: xr.Dataset, path: str, mode: str = 'a'):
|
|
|
20
21
|
Path to write the dataset.
|
|
21
22
|
mode : str, optional
|
|
22
23
|
Write mode, by default 'a'.
|
|
24
|
+
group : str, optional
|
|
25
|
+
Group name, by default 'original'
|
|
23
26
|
|
|
24
27
|
Returns
|
|
25
28
|
-------
|
|
@@ -29,28 +32,32 @@ def xarray2zarr(xds: xr.Dataset, path: str, mode: str = 'a'):
|
|
|
29
32
|
fout = os.path.join(path, feature + '.zarr')
|
|
30
33
|
if not os.path.exists(fout) or mode == 'w':
|
|
31
34
|
xds[feature].to_zarr(
|
|
32
|
-
fout, group=
|
|
35
|
+
fout, group=group, mode='w')
|
|
33
36
|
else:
|
|
34
|
-
|
|
37
|
+
try:
|
|
38
|
+
xds_existing = xr.open_zarr(fout, group=group)
|
|
39
|
+
except PathNotFoundError:
|
|
40
|
+
xds[feature].to_zarr(fout, group=group, mode='a')
|
|
41
|
+
continue
|
|
35
42
|
if xds_existing.datetime[0] > xds.datetime[0] or xds_existing.datetime[-1] > xds.datetime[-1]:
|
|
36
43
|
xda_new = merge_arrays(xds_existing[feature], xds[feature])
|
|
37
|
-
xda_new.to_zarr(fout, group=
|
|
44
|
+
xda_new.to_zarr(fout, group=group, mode='w')
|
|
38
45
|
else:
|
|
39
46
|
try:
|
|
40
47
|
overlap = xds_existing.datetime.where(
|
|
41
48
|
xds_existing.datetime == xds.datetime)
|
|
42
49
|
if overlap.size > 0:
|
|
43
50
|
xds[feature].loc[dict(datetime=overlap)].to_zarr(
|
|
44
|
-
fout, group=
|
|
51
|
+
fout, group=group, mode='r+', region='auto')
|
|
45
52
|
xds[feature].drop_sel(datetime=overlap).to_zarr(
|
|
46
|
-
fout, group=
|
|
53
|
+
fout, group=group, mode='a', append_dim="datetime")
|
|
47
54
|
else:
|
|
48
55
|
xds[feature].to_zarr(
|
|
49
|
-
fout, group=
|
|
56
|
+
fout, group=group, append_dim='datetime')
|
|
50
57
|
except Exception as e:
|
|
51
58
|
msg = f"Appending {feature} to {fout} failed: {e}\n"
|
|
52
59
|
msg += "Attempting to merge the two datasets."
|
|
53
60
|
logger.error(msg)
|
|
54
61
|
# remove duplicate datetime entries
|
|
55
62
|
xda_new = merge_arrays(xds_existing[feature], xds[feature])
|
|
56
|
-
xda_new.to_zarr(fout, group=
|
|
63
|
+
xda_new.to_zarr(fout, group=group, mode='w')
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: tonik
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.4
|
|
4
4
|
Summary: Store time series data as HDF5 files and access them through an API.
|
|
5
5
|
Project-URL: Homepage, https://tsc-tools.github.io/tonik
|
|
6
6
|
Project-URL: Issues, https://github.com/tsc-tools/tonik/issues
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
tonik/__init__.py,sha256=ZBVGh4dm_l9xwiBGb33O5QV9MfZeNiEd3DBDAm6DiHk,511
|
|
2
|
+
tonik/api.py,sha256=8YS0WCMlm5xs2N6V8n9BiP87ywJC_BUzYsPvWR9KWJk,6499
|
|
3
|
+
tonik/storage.py,sha256=GNJ6w9VHOeTR_ZJMZ-Ipqe3nFK2I91fkHYwg1k9bEuo,9470
|
|
4
|
+
tonik/utils.py,sha256=3nSRU_GnV6arP4e63YHn4oEV8XbqzVAW8FCvQVIwGdg,2757
|
|
5
|
+
tonik/xarray2netcdf.py,sha256=Bjt7kytnrlBgZrVNmRGir9TfNqyvW_ZNrwYBNvxiWio,5199
|
|
6
|
+
tonik/xarray2zarr.py,sha256=EhEo5kqzrKyXR37RX2zYtgOAviZdRqstZi_4ZtUmSDc,2342
|
|
7
|
+
tonik/package_data/index.html,sha256=GKDClUhIam_fAYbNfzAolORhSCG3ae1wW3VjWCg4PMk,2732
|
|
8
|
+
tonik-0.1.4.dist-info/METADATA,sha256=GkFoQugVoozfs2jTWNchQujP5RDATzeo1s7rqkx63GQ,1938
|
|
9
|
+
tonik-0.1.4.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
10
|
+
tonik-0.1.4.dist-info/entry_points.txt,sha256=VnGfC5qAzpntEHAb5pooUEpYABSgOfQoNhCEtLDJyf8,45
|
|
11
|
+
tonik-0.1.4.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
12
|
+
tonik-0.1.4.dist-info/RECORD,,
|
tonik-0.1.3.dist-info/RECORD
DELETED
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
tonik/__init__.py,sha256=ZBVGh4dm_l9xwiBGb33O5QV9MfZeNiEd3DBDAm6DiHk,511
|
|
2
|
-
tonik/api.py,sha256=8YS0WCMlm5xs2N6V8n9BiP87ywJC_BUzYsPvWR9KWJk,6499
|
|
3
|
-
tonik/storage.py,sha256=DNIfNb3oCgICcUI_MADQjQdUqkmZJIu42zCYGRs9ers,11512
|
|
4
|
-
tonik/utils.py,sha256=3nSRU_GnV6arP4e63YHn4oEV8XbqzVAW8FCvQVIwGdg,2757
|
|
5
|
-
tonik/xarray2netcdf.py,sha256=w8ubAWa2yCnk9-JQ_e7CHdfEjvjcK1pXol8Qy_PT2hY,5241
|
|
6
|
-
tonik/xarray2zarr.py,sha256=jTrHFpjN3lEDIJEI6RyPvvbW2jZsfYwGE_LQaZ6dwx8,2099
|
|
7
|
-
tonik/package_data/index.html,sha256=GKDClUhIam_fAYbNfzAolORhSCG3ae1wW3VjWCg4PMk,2732
|
|
8
|
-
tonik-0.1.3.dist-info/METADATA,sha256=DaGP_RFy6luf75DQMr9UA3cfJXlOVJbdHFPU09RCE_4,1938
|
|
9
|
-
tonik-0.1.3.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
10
|
-
tonik-0.1.3.dist-info/entry_points.txt,sha256=VnGfC5qAzpntEHAb5pooUEpYABSgOfQoNhCEtLDJyf8,45
|
|
11
|
-
tonik-0.1.3.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
12
|
-
tonik-0.1.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|