tonik 0.0.11__py3-none-any.whl → 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tonik/storage.py +7 -6
- tonik/utils.py +33 -0
- tonik/{xarray2hdf5.py → xarray2netcdf.py} +32 -16
- tonik/xarray2zarr.py +22 -8
- {tonik-0.0.11.dist-info → tonik-0.1.0.dist-info}/METADATA +1 -1
- tonik-0.1.0.dist-info/RECORD +12 -0
- tonik-0.0.11.dist-info/RECORD +0 -12
- {tonik-0.0.11.dist-info → tonik-0.1.0.dist-info}/WHEEL +0 -0
- {tonik-0.0.11.dist-info → tonik-0.1.0.dist-info}/entry_points.txt +0 -0
- {tonik-0.0.11.dist-info → tonik-0.1.0.dist-info}/licenses/LICENSE +0 -0
tonik/storage.py
CHANGED
|
@@ -6,7 +6,7 @@ import re
|
|
|
6
6
|
import pandas as pd
|
|
7
7
|
import xarray as xr
|
|
8
8
|
|
|
9
|
-
from .
|
|
9
|
+
from .xarray2netcdf import xarray2netcdf
|
|
10
10
|
from .xarray2zarr import xarray2zarr
|
|
11
11
|
|
|
12
12
|
LOGGING_CONFIG = {
|
|
@@ -102,7 +102,7 @@ class Path(object):
|
|
|
102
102
|
|
|
103
103
|
def feature_path(self, feature):
|
|
104
104
|
|
|
105
|
-
if self.backend == '
|
|
105
|
+
if self.backend == 'netcdf':
|
|
106
106
|
file_ending = '.nc'
|
|
107
107
|
elif self.backend == 'zarr':
|
|
108
108
|
file_ending = '.zarr'
|
|
@@ -154,7 +154,8 @@ class Path(object):
|
|
|
154
154
|
format(stack_length, interval, num_periods))
|
|
155
155
|
|
|
156
156
|
xd_index = dict(datetime=slice(self.starttime, self.endtime))
|
|
157
|
-
|
|
157
|
+
engine = 'h5netcdf' if self.backend == 'netcdf' else self.backend
|
|
158
|
+
with xr.open_dataset(filename, group='original', engine=engine) as ds:
|
|
158
159
|
rq = ds.loc[xd_index].load()
|
|
159
160
|
|
|
160
161
|
# Stack features
|
|
@@ -187,8 +188,8 @@ class Path(object):
|
|
|
187
188
|
"""
|
|
188
189
|
Save a feature to disk
|
|
189
190
|
"""
|
|
190
|
-
if self.backend == '
|
|
191
|
-
|
|
191
|
+
if self.backend == 'netcdf':
|
|
192
|
+
xarray2netcdf(data, self.path, **kwargs)
|
|
192
193
|
elif self.backend == 'zarr':
|
|
193
194
|
xarray2zarr(data, self.path, **kwargs)
|
|
194
195
|
|
|
@@ -214,7 +215,7 @@ class Storage(Path):
|
|
|
214
215
|
>>> rsam = c("rsam")
|
|
215
216
|
"""
|
|
216
217
|
|
|
217
|
-
def __init__(self, name, rootdir, starttime=None, endtime=None, create=True, backend='
|
|
218
|
+
def __init__(self, name, rootdir, starttime=None, endtime=None, create=True, backend='netcdf'):
|
|
218
219
|
self.stores = set()
|
|
219
220
|
self.starttime = starttime
|
|
220
221
|
self.endtime = endtime
|
tonik/utils.py
CHANGED
|
@@ -56,3 +56,36 @@ def generate_test_data(dim=1, ndays=30, nfreqs=10,
|
|
|
56
56
|
xds.attrs['station'] = 'MDR'
|
|
57
57
|
xds.attrs['interval'] = '10min'
|
|
58
58
|
return xds
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def merge_arrays(xds_old: xr.DataArray, xds_new: xr.DataArray,
|
|
62
|
+
resolution: float = None) -> xr.DataArray:
|
|
63
|
+
"""
|
|
64
|
+
Merge two xarray datasets with the same datetime index.
|
|
65
|
+
|
|
66
|
+
Parameters
|
|
67
|
+
----------
|
|
68
|
+
xds_old : xr.DataArray
|
|
69
|
+
Old array.
|
|
70
|
+
xds_new : xr.DataArray
|
|
71
|
+
New array.
|
|
72
|
+
resolution : float
|
|
73
|
+
Time resolution in hours.
|
|
74
|
+
|
|
75
|
+
Returns
|
|
76
|
+
-------
|
|
77
|
+
xr.DataArray
|
|
78
|
+
Merged array.
|
|
79
|
+
"""
|
|
80
|
+
xda_old = xds_old.drop_duplicates(
|
|
81
|
+
'datetime', keep='last')
|
|
82
|
+
xda_new = xds_new.drop_duplicates(
|
|
83
|
+
'datetime', keep='last')
|
|
84
|
+
xda_new = xda_new.combine_first(xda_old)
|
|
85
|
+
if resolution is not None:
|
|
86
|
+
new_dates = pd.date_range(
|
|
87
|
+
xda_new.datetime.values[0],
|
|
88
|
+
xda_new.datetime.values[-1],
|
|
89
|
+
freq=f'{resolution}h')
|
|
90
|
+
xda_new = xda_new.reindex(datetime=new_dates)
|
|
91
|
+
return xda_new
|
|
@@ -1,15 +1,18 @@
|
|
|
1
|
-
from datetime import datetime
|
|
2
1
|
import logging
|
|
3
2
|
import os
|
|
3
|
+
from datetime import datetime
|
|
4
4
|
from warnings import filterwarnings
|
|
5
5
|
|
|
6
|
-
from cftime import num2date, date2num
|
|
7
6
|
import h5netcdf
|
|
8
7
|
import numpy as np
|
|
8
|
+
import xarray as xr
|
|
9
|
+
from cftime import date2num, num2date
|
|
10
|
+
|
|
11
|
+
from .utils import merge_arrays
|
|
9
12
|
|
|
10
13
|
|
|
11
|
-
def
|
|
12
|
-
|
|
14
|
+
def xarray2netcdf(xArray, fdir, rootGroupName="original", timedim="datetime",
|
|
15
|
+
archive_starttime=datetime(2000, 1, 1), resolution=None):
|
|
13
16
|
"""
|
|
14
17
|
Store an xarray dataset as an HDF5 file.
|
|
15
18
|
|
|
@@ -31,23 +34,35 @@ def xarray2hdf5(xArray, fdir, rootGroupName="original", timedim="datetime",
|
|
|
31
34
|
determined from the data.
|
|
32
35
|
"""
|
|
33
36
|
filterwarnings(action='ignore', category=DeprecationWarning,
|
|
34
|
-
|
|
37
|
+
message='`np.bool` is a deprecated alias')
|
|
35
38
|
|
|
36
|
-
|
|
37
|
-
|
|
39
|
+
data_starttime = xArray[timedim].values[0].astype(
|
|
40
|
+
'datetime64[us]').astype(datetime)
|
|
41
|
+
starttime = min(data_starttime, archive_starttime)
|
|
38
42
|
if resolution is None:
|
|
39
43
|
resolution = (np.diff(xArray[timedim])/np.timedelta64(1, 'h'))[0]
|
|
40
44
|
|
|
41
45
|
for featureName in list(xArray.data_vars.keys()):
|
|
42
|
-
h5file = os.path.join(fdir, featureName +'.nc')
|
|
46
|
+
h5file = os.path.join(fdir, featureName + '.nc')
|
|
47
|
+
mode = 'w'
|
|
48
|
+
if os.path.isfile(h5file):
|
|
49
|
+
if archive_starttime > data_starttime:
|
|
50
|
+
xds_existing = xr.open_dataset(
|
|
51
|
+
h5file, group='original', engine='h5netcdf')
|
|
52
|
+
xda_new = merge_arrays(
|
|
53
|
+
xds_existing[featureName], xArray[featureName],
|
|
54
|
+
resolution=resolution)
|
|
55
|
+
xds_existing.close()
|
|
56
|
+
xda_new.to_netcdf(h5file, group='original',
|
|
57
|
+
mode='w', engine='h5netcdf')
|
|
58
|
+
continue
|
|
59
|
+
mode = 'a'
|
|
43
60
|
|
|
44
|
-
mode = 'a' if os.path.isfile(h5file) else 'w'
|
|
45
|
-
|
|
46
61
|
with h5netcdf.File(h5file, mode) as h5f:
|
|
47
62
|
try:
|
|
48
63
|
rootGrp = _create_h5_Structure(rootGroupName, featureName,
|
|
49
64
|
h5f, xArray, starttime, timedim)
|
|
50
|
-
except ValueError:
|
|
65
|
+
except ValueError: # group already exists, append
|
|
51
66
|
rootGrp = h5f[rootGroupName]
|
|
52
67
|
|
|
53
68
|
# determine indices
|
|
@@ -75,7 +90,8 @@ def xarray2hdf5(xArray, fdir, rootGroupName="original", timedim="datetime",
|
|
|
75
90
|
try:
|
|
76
91
|
_setMetaInfo(featureName, h5f, xArray)
|
|
77
92
|
except KeyError as e:
|
|
78
|
-
logging.warning(
|
|
93
|
+
logging.warning(
|
|
94
|
+
f"Could not set all meta info for {featureName}: {e}")
|
|
79
95
|
|
|
80
96
|
|
|
81
97
|
def _create_h5_Structure(defaultGroupName, featureName, h5f, xArray, starttime, timedim):
|
|
@@ -85,15 +101,16 @@ def _create_h5_Structure(defaultGroupName, featureName, h5f, xArray, starttime,
|
|
|
85
101
|
coordinates.attrs['units'] = 'hours since 1970-01-01 00:00:00.0'
|
|
86
102
|
coordinates.attrs['calendar'] = 'gregorian'
|
|
87
103
|
rootGrp.attrs['starttime'] = str(starttime)
|
|
88
|
-
for label, size in xArray.dims.items():
|
|
104
|
+
for label, size in xArray.dims.items():
|
|
89
105
|
if not np.issubdtype(xArray[label].dtype, np.datetime64):
|
|
90
|
-
rootGrp.dimensions[label] = size
|
|
106
|
+
rootGrp.dimensions[label] = size
|
|
91
107
|
coordinates = rootGrp.create_variable(label, (label,), float)
|
|
92
108
|
coordinates[:] = xArray[label].values
|
|
93
109
|
# Note: xArray.dims returns a dictionary of dimensions that are not necesarily
|
|
94
110
|
# in the right order; xArray[featureName].dims returns a tuple with dimension
|
|
95
111
|
# names in the correct order
|
|
96
|
-
rootGrp.create_variable(featureName, tuple(
|
|
112
|
+
rootGrp.create_variable(featureName, tuple(
|
|
113
|
+
xArray[featureName].dims), dtype=float, fillvalue=0.)
|
|
97
114
|
return rootGrp
|
|
98
115
|
|
|
99
116
|
|
|
@@ -102,4 +119,3 @@ def _setMetaInfo(featureName, h5f, xArray):
|
|
|
102
119
|
h5f.attrs['latitude'] = -42
|
|
103
120
|
h5f.attrs['longitude'] = 168
|
|
104
121
|
h5f.attrs['datatype'] = featureName
|
|
105
|
-
|
tonik/xarray2zarr.py
CHANGED
|
@@ -3,10 +3,28 @@ import os
|
|
|
3
3
|
|
|
4
4
|
import xarray as xr
|
|
5
5
|
|
|
6
|
+
from .utils import merge_arrays
|
|
7
|
+
|
|
6
8
|
logger = logging.getLogger(__name__)
|
|
7
9
|
|
|
8
10
|
|
|
9
|
-
def xarray2zarr(xds, path, mode='a'):
|
|
11
|
+
def xarray2zarr(xds: xr.Dataset, path: str, mode: str = 'a'):
|
|
12
|
+
"""
|
|
13
|
+
Write xarray dataset to zarr files.
|
|
14
|
+
|
|
15
|
+
Parameters
|
|
16
|
+
----------
|
|
17
|
+
xds : xr.Dataset
|
|
18
|
+
Dataset to write.
|
|
19
|
+
path : str
|
|
20
|
+
Path to write the dataset.
|
|
21
|
+
mode : str, optional
|
|
22
|
+
Write mode, by default 'a'.
|
|
23
|
+
|
|
24
|
+
Returns
|
|
25
|
+
-------
|
|
26
|
+
None
|
|
27
|
+
"""
|
|
10
28
|
for feature in xds.data_vars.keys():
|
|
11
29
|
fout = os.path.join(path, feature + '.zarr')
|
|
12
30
|
if not os.path.exists(fout) or mode == 'w':
|
|
@@ -15,8 +33,8 @@ def xarray2zarr(xds, path, mode='a'):
|
|
|
15
33
|
else:
|
|
16
34
|
xds_existing = xr.open_zarr(fout, group='original')
|
|
17
35
|
if xds_existing.datetime[0] > xds.datetime[0] or xds_existing.datetime[-1] > xds.datetime[-1]:
|
|
18
|
-
|
|
19
|
-
|
|
36
|
+
xda_new = merge_arrays(xds_existing[feature], xds[feature])
|
|
37
|
+
xda_new.to_zarr(fout, group='original', mode='w')
|
|
20
38
|
else:
|
|
21
39
|
try:
|
|
22
40
|
overlap = xds_existing.datetime.where(
|
|
@@ -34,9 +52,5 @@ def xarray2zarr(xds, path, mode='a'):
|
|
|
34
52
|
msg += "Attempting to merge the two datasets."
|
|
35
53
|
logger.error(msg)
|
|
36
54
|
# remove duplicate datetime entries
|
|
37
|
-
|
|
38
|
-
'datetime', keep='last')
|
|
39
|
-
xda_new = xds[feature].drop_duplicates(
|
|
40
|
-
'datetime', keep='last')
|
|
41
|
-
xda_new = xda_new.combine_first(xda_existing)
|
|
55
|
+
xda_new = merge_arrays(xds_existing[feature], xds[feature])
|
|
42
56
|
xda_new.to_zarr(fout, group='original', mode='w')
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: tonik
|
|
3
|
-
Version: 0.0
|
|
3
|
+
Version: 0.1.0
|
|
4
4
|
Summary: Store time series data as HDF5 files and access them through an API.
|
|
5
5
|
Project-URL: Homepage, https://tsc-tools.github.io/tonik
|
|
6
6
|
Project-URL: Issues, https://github.com/tsc-tools/tonik/issues
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
tonik/__init__.py,sha256=ZBVGh4dm_l9xwiBGb33O5QV9MfZeNiEd3DBDAm6DiHk,511
|
|
2
|
+
tonik/api.py,sha256=8YS0WCMlm5xs2N6V8n9BiP87ywJC_BUzYsPvWR9KWJk,6499
|
|
3
|
+
tonik/storage.py,sha256=Oh3BBQL13yLYHlgk8-a-wstmlUC2vNJ0yi_fnyETK_g,11237
|
|
4
|
+
tonik/utils.py,sha256=3nSRU_GnV6arP4e63YHn4oEV8XbqzVAW8FCvQVIwGdg,2757
|
|
5
|
+
tonik/xarray2netcdf.py,sha256=ey7lY4czlMREUt56SzE3quXNzeqXxV0Ru_EatsJpynA,5088
|
|
6
|
+
tonik/xarray2zarr.py,sha256=jTrHFpjN3lEDIJEI6RyPvvbW2jZsfYwGE_LQaZ6dwx8,2099
|
|
7
|
+
tonik/package_data/index.html,sha256=GKDClUhIam_fAYbNfzAolORhSCG3ae1wW3VjWCg4PMk,2732
|
|
8
|
+
tonik-0.1.0.dist-info/METADATA,sha256=VtdNFXyUplO5s_TmR8xEgW0NdMtKgI4Zl2rWLhxpnK0,1938
|
|
9
|
+
tonik-0.1.0.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
10
|
+
tonik-0.1.0.dist-info/entry_points.txt,sha256=VnGfC5qAzpntEHAb5pooUEpYABSgOfQoNhCEtLDJyf8,45
|
|
11
|
+
tonik-0.1.0.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
12
|
+
tonik-0.1.0.dist-info/RECORD,,
|
tonik-0.0.11.dist-info/RECORD
DELETED
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
tonik/__init__.py,sha256=ZBVGh4dm_l9xwiBGb33O5QV9MfZeNiEd3DBDAm6DiHk,511
|
|
2
|
-
tonik/api.py,sha256=8YS0WCMlm5xs2N6V8n9BiP87ywJC_BUzYsPvWR9KWJk,6499
|
|
3
|
-
tonik/storage.py,sha256=Ts6jKLqwcDQvPIea-swBCpnEjQr_xnAjOl-hkUelTn4,11165
|
|
4
|
-
tonik/utils.py,sha256=_TxXf9o9fOvtuOvGO6-ww9F5m0QelHyfQzQw8RGjTV4,1868
|
|
5
|
-
tonik/xarray2hdf5.py,sha256=cekO9vo9ZRlr0VndswJjPC27CEVD3TpRVKLAJ-aAO0g,4465
|
|
6
|
-
tonik/xarray2zarr.py,sha256=0bWMP_PZ0FLr9AlxKrC5M4aJRh_VGkUfJ-A-BGgKqM8,1979
|
|
7
|
-
tonik/package_data/index.html,sha256=GKDClUhIam_fAYbNfzAolORhSCG3ae1wW3VjWCg4PMk,2732
|
|
8
|
-
tonik-0.0.11.dist-info/METADATA,sha256=cXj6-wI7M6kWVvcffHe3W9BJQVdJfkNigzcrd0s5XlQ,1939
|
|
9
|
-
tonik-0.0.11.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
10
|
-
tonik-0.0.11.dist-info/entry_points.txt,sha256=VnGfC5qAzpntEHAb5pooUEpYABSgOfQoNhCEtLDJyf8,45
|
|
11
|
-
tonik-0.0.11.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
12
|
-
tonik-0.0.11.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|