tonik 0.1.16__py3-none-any.whl → 0.1.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tonik/api.py CHANGED
@@ -26,8 +26,9 @@ InventoryReturnType = Union[list, dict]
26
26
 
27
27
  class TonikAPI:
28
28
 
29
- def __init__(self, rootdir) -> None:
29
+ def __init__(self, rootdir, backend='netcdf') -> None:
30
30
  self.rootdir = rootdir
31
+ self.backend = backend
31
32
  self.app = FastAPI()
32
33
 
33
34
  # -- allow any origin to query API
@@ -72,7 +73,7 @@ class TonikAPI:
72
73
  _et = self.preprocess_datetime(endtime)
73
74
  g = Storage(group, rootdir=self.rootdir,
74
75
  starttime=_st, endtime=_et,
75
- create=False)
76
+ create=False, backend=self.backend)
76
77
  c = g
77
78
  if subdir:
78
79
  c = g.get_substore(*subdir)
@@ -147,7 +148,8 @@ class TonikAPI:
147
148
  return freq, dates, spec
148
149
 
149
150
  async def inventory(self, group: str, subdir: SubdirType = None, tree: bool = True) -> InventoryReturnType:
150
- sg = Storage(group, rootdir=self.rootdir, create=False)
151
+ sg = Storage(group, rootdir=self.rootdir,
152
+ create=False, backend=self.backend)
151
153
  try:
152
154
  c = sg.get_substore(*subdir)
153
155
  except TypeError:
@@ -168,7 +170,8 @@ class TonikAPI:
168
170
  _st = self.preprocess_datetime(starttime)
169
171
  _et = self.preprocess_datetime(endtime)
170
172
  sg = Storage(group, rootdir=self.rootdir,
171
- starttime=_st, endtime=_et, create=False)
173
+ starttime=_st, endtime=_et, create=False,
174
+ backend=self.backend)
172
175
  try:
173
176
  c = sg.get_substore(*subdir)
174
177
  except TypeError:
@@ -183,10 +186,11 @@ class TonikAPI:
183
186
  def main(argv=None):
184
187
  parser = ArgumentParser()
185
188
  parser.add_argument("--rootdir", default='/tmp')
189
+ parser.add_argument("--backend", default='netcdf')
186
190
  parser.add_argument("-p", "--port", default=8003, type=int)
187
191
  parser.add_argument("--host", default='0.0.0.0')
188
192
  args = parser.parse_args(argv)
189
- ta = TonikAPI(args.rootdir)
193
+ ta = TonikAPI(args.rootdir, backend=args.backend)
190
194
  uvicorn.run(ta.app, host=args.host, port=args.port)
191
195
 
192
196
 
tonik/storage.py CHANGED
@@ -112,7 +112,7 @@ class Path(object):
112
112
  self.children[feature] = Path(feature + file_ending, self.path)
113
113
  return _feature_path
114
114
 
115
- def __call__(self, feature, group='original', attributes_only=False):
115
+ def __call__(self, feature, group='original', metadata=False):
116
116
  """
117
117
  Request a particular feature
118
118
 
@@ -122,9 +122,9 @@ class Path(object):
122
122
  """
123
123
  filename = self.feature_path(feature)
124
124
 
125
- if attributes_only:
126
- with xr.open_dataset(filename, group=group, engine=self.engine) as ds:
127
- return ds.attrs
125
+ if metadata:
126
+ with xr.open_dataset(filename, group='meta', engine=self.engine) as ds:
127
+ return ds
128
128
 
129
129
  if self.endtime < self.starttime:
130
130
  raise ValueError('Startime has to be smaller than endtime.')
tonik/xarray2netcdf.py CHANGED
@@ -1,6 +1,6 @@
1
1
  import logging
2
2
  import os
3
- from datetime import datetime
3
+ from datetime import datetime, timezone
4
4
  from warnings import filterwarnings
5
5
 
6
6
  import h5netcdf
@@ -40,6 +40,7 @@ def xarray2netcdf(xArray, fdir, group="original", timedim="datetime",
40
40
  data_starttime = xArray[timedim].values[0].astype(
41
41
  'datetime64[us]').astype(datetime)
42
42
  starttime = min(data_starttime, archive_starttime)
43
+ now = datetime.now(tz=timezone.utc)
43
44
  if resolution is None:
44
45
  resolution = (np.diff(xArray[timedim])/np.timedelta64(1, 'h'))[0]
45
46
 
@@ -61,11 +62,16 @@ def xarray2netcdf(xArray, fdir, group="original", timedim="datetime",
61
62
 
62
63
  with h5netcdf.File(h5file, _mode) as h5f:
63
64
  try:
64
- rootGrp = _create_h5_Structure(group, featureName,
65
- h5f, xArray, starttime, timedim)
65
+ rootGrp = _create_root_group(group, featureName,
66
+ h5f, xArray, starttime, timedim)
66
67
  except ValueError: # group already exists, append
67
68
  rootGrp = h5f[group]
68
69
 
70
+ try:
71
+ metaGrp = _create_meta_group(h5f, resolution)
72
+ except ValueError: # group already exists, append
73
+ metaGrp = h5f['meta']
74
+
69
75
  # determine indices
70
76
  new_time = date2num(xArray[timedim].values.astype('datetime64[us]').astype(datetime),
71
77
  units=rootGrp[timedim].attrs['units'],
@@ -73,6 +79,7 @@ def xarray2netcdf(xArray, fdir, group="original", timedim="datetime",
73
79
  t0 = date2num(starttime,
74
80
  units=rootGrp[timedim].attrs['units'],
75
81
  calendar=rootGrp[timedim].attrs['calendar'])
82
+
76
83
  indices = np.rint((new_time - t0)/resolution).astype(int)
77
84
  if not np.all(indices >= 0):
78
85
  raise ValueError("Data starts before the archive start time")
@@ -86,18 +93,21 @@ def xarray2netcdf(xArray, fdir, group="original", timedim="datetime",
86
93
  data[:, indices] = xArray[featureName].values
87
94
  else:
88
95
  data[indices] = xArray[featureName].values
89
- rootGrp.attrs['endtime'] = str(num2date(times[-1], units=rootGrp[timedim].attrs['units'],
90
- calendar=rootGrp[timedim].attrs['calendar']))
91
- rootGrp.attrs['resolution'] = resolution
92
- rootGrp.attrs['resolution_units'] = 'h'
93
- try:
94
- _setMetaInfo(featureName, rootGrp, xArray)
95
- except KeyError as e:
96
- logging.warning(
97
- f"Could not set all meta info for {featureName}: {e}")
96
+ now_time = date2num(now, units=metaGrp['update_log'].attrs['units'],
97
+ calendar=metaGrp['update_log'].attrs['calendar'])
98
+ ulog = metaGrp['update_log']
99
+ ldata = metaGrp['last_datapoint']
100
+ metaGrp.resize_dimension('update', ulog.shape[0] + 1)
101
+ ulog[-1] = now_time
102
+ metaGrp.resize_dimension('endtime', ldata.shape[0] + 1)
103
+ ldata[-1] = times[-1]
104
+ old_resolution = metaGrp['resolution'][()]
105
+ if old_resolution != resolution:
106
+ raise ValueError(f"Resolution mismatch for {featureName}: "
107
+ f"{old_resolution} != {resolution}")
98
108
 
99
109
 
100
- def _create_h5_Structure(defaultGroupName, featureName, h5f, xArray, starttime, timedim):
110
+ def _create_root_group(defaultGroupName, featureName, h5f, xArray, starttime, timedim):
101
111
  rootGrp = h5f.create_group(defaultGroupName)
102
112
  rootGrp.dimensions[timedim] = None
103
113
  coordinates = rootGrp.create_variable(timedim, (timedim,), float)
@@ -114,10 +124,33 @@ def _create_h5_Structure(defaultGroupName, featureName, h5f, xArray, starttime,
114
124
  # names in the correct order
115
125
  rootGrp.create_variable(featureName, tuple(
116
126
  xArray[featureName].dims), dtype=float, fillvalue=0.)
127
+ _set_attributes(featureName, rootGrp, xArray)
117
128
  return rootGrp
118
129
 
119
130
 
120
- def _setMetaInfo(featureName, rootGrp, xArray):
131
+ def _set_attributes(featureName, rootGrp, xArray):
132
+ """
133
+ Set attributes for the root group. Attributes are assumed to not change
134
+ over time. If they do, they should be stored in the 'meta' group.
135
+ """
121
136
  for key, value in xArray.attrs.items():
122
137
  rootGrp.attrs[key] = value
123
138
  rootGrp.attrs['feature'] = featureName
139
+
140
+
141
+ def _create_meta_group(h5f, resolution):
142
+ """
143
+ Create meta group to track processing history.
144
+ """
145
+ metaGrp = h5f.create_group('meta')
146
+ metaGrp.dimensions['update'] = None
147
+ ulog = metaGrp.create_variable('update_log', ('update',), float)
148
+ ulog.attrs['units'] = 'hours since 1970-01-01 00:00:00.0'
149
+ ulog.attrs['calendar'] = 'gregorian'
150
+ metaGrp.dimensions['endtime'] = None
151
+ ldata = metaGrp.create_variable('last_datapoint', ('endtime',), float)
152
+ ldata.attrs['units'] = 'hours since 1970-01-01 00:00:00.0'
153
+ ldata.attrs['calendar'] = 'gregorian'
154
+ res = metaGrp.create_variable("resolution", (), dtype=float)
155
+ res[()] = resolution
156
+ return metaGrp
tonik/xarray2zarr.py CHANGED
@@ -1,3 +1,4 @@
1
+ from datetime import datetime, timezone
1
2
  import logging
2
3
  import os
3
4
 
@@ -18,7 +19,7 @@ logger = logging.getLogger(__name__)
18
19
  def get_chunks(xda: xr.DataArray, chunks: int = 1,
19
20
  timedim: str = 'datetime') -> dict:
20
21
  """
21
- Determine the chunk size for the datetime dimension. Other dimensions are assumed to be
22
+ Determine the chunk size for the datetime dimension. Other dimensions are assumed to be
22
23
  small enough to not require chunking.
23
24
 
24
25
  Parameters
@@ -45,7 +46,7 @@ def fill_time_gaps_between_datasets(xds_existing: xr.DataArray, xds_new: xr.Data
45
46
  ----------
46
47
  xds_existing : xr.Dataset
47
48
  Existing dataset on disk
48
- xds_new : xr.Dataset
49
+ xds_new : xr.Dataset
49
50
  New dataset to append
50
51
  timedim : str
51
52
  Name of the time dimension
@@ -106,6 +107,22 @@ def _build_append_payload_full_chunks(payload: xr.DataArray, mode: str,
106
107
  chunklen: int, timedim: str = "datetime") -> xr.DataArray:
107
108
  """
108
109
  Construct the sequence to append so that the final total length is a multiple of `chunklen`
110
+
111
+ Parameters
112
+ ----------
113
+ payload : xr.DataArray
114
+ DataArray to append
115
+ mode : str
116
+ 'a' for append, 'p' for prepend
117
+ chunklen : int
118
+ Chunk length in number of time steps
119
+ timedim : str
120
+ Name of the time dimension
121
+
122
+ Returns
123
+ -------
124
+ xr.DataArray
125
+ Padded DataArray with length a multiple of chunklen
109
126
  """
110
127
  if mode not in ['a', 'p']:
111
128
  raise ValueError(
@@ -141,7 +158,58 @@ def _build_append_payload_full_chunks(payload: xr.DataArray, mode: str,
141
158
  payload = xr.concat([pad_da, payload], dim=timedim)
142
159
  payload = payload.chunk({timedim: chunklen})
143
160
  return payload
144
- #
161
+
162
+
163
+ def _update_meta_data(fout: str,
164
+ last_datapoint: np.datetime64,
165
+ resolution: float | None = None,
166
+ meta_group: str = "meta") -> None:
167
+ """
168
+ Append current update time (and last_datapoint) to meta group.
169
+
170
+ Parameters
171
+ ----------
172
+ fout : str
173
+ Base zarr store path (per-variable .zarr directory).
174
+ last_datapoint : np.datetime64
175
+ Latest data time in the feature.
176
+ resolution : float | None
177
+ Optional time resolution (hours) to store once.
178
+ meta_group : str
179
+ Group name for metadata.
180
+ """
181
+
182
+ now = np.datetime64(datetime.now(tz=timezone.utc), 'ns')
183
+ new_update = xr.DataArray([now],
184
+ coords={'update': [now]},
185
+ dims=['update'],
186
+ name='update_log')
187
+ new_last = xr.DataArray([last_datapoint],
188
+ coords={'endtime': [now]},
189
+ dims=['endtime'],
190
+ name='last_datapoint')
191
+
192
+ try:
193
+ meta = xr.open_zarr(fout, group=meta_group, chunks=None)
194
+ # Existing vars -> concatenate
195
+ update_old = meta.get('update_log')
196
+ last_old = meta.get('last_datapoint')
197
+ res_da_old = meta.get('resolution').values[()]
198
+ new_update = xr.concat([update_old, new_update], dim='update')
199
+ new_last = xr.concat([last_old, new_last], dim='endtime')
200
+ if resolution != res_da_old:
201
+ raise ValueError(f"Resolution mismatch for {fout}: "
202
+ f"{res_da_old} != {resolution}")
203
+ res_da = xr.DataArray(resolution, name='resolution')
204
+ except Exception:
205
+ # First creation
206
+ res_da = xr.DataArray(
207
+ resolution, name='resolution') if resolution is not None else None
208
+
209
+ vars = {'update_log': new_update, 'last_datapoint': new_last}
210
+ if res_da is not None:
211
+ vars['resolution'] = res_da
212
+ xr.Dataset(vars).to_zarr(fout, group=meta_group, mode='w')
145
213
 
146
214
 
147
215
  def xarray2zarr(xds: xr.Dataset, path: str, mode: str = 'a', group='original',
@@ -182,6 +250,9 @@ def xarray2zarr(xds: xr.Dataset, path: str, mode: str = 'a', group='original',
182
250
  fout = os.path.join(path, feature + '.zarr')
183
251
  # nchunks = get_chunks(xds[feature], chunks)
184
252
  nchunks = chunks
253
+ last_dp = xds[feature][timedim].values[-1]
254
+ _update_meta_data(fout, last_dp, resolution=float(
255
+ get_dt(xds[timedim]) / pd.Timedelta(1, 'h')))
185
256
  try:
186
257
  xds_existing = xr.open_zarr(fout, group=group)
187
258
  has_store = True
@@ -204,6 +275,7 @@ def xarray2zarr(xds: xr.Dataset, path: str, mode: str = 'a', group='original',
204
275
  combined = xda_new.combine_first(xds_existing[feature]).compute()
205
276
  combined.chunk({timedim: nchunks}).to_zarr(fout, group=group, mode='w',
206
277
  write_empty_chunks=True)
278
+
207
279
  elif xds_existing[timedim][-1] < xds[timedim][0]:
208
280
  # append
209
281
  xda_new = fill_time_gaps_between_datasets(xds_existing[feature].isel({timedim: -1}),
@@ -212,12 +284,14 @@ def xarray2zarr(xds: xr.Dataset, path: str, mode: str = 'a', group='original',
212
284
  xda_new, 'a', nchunks)
213
285
  xda_new.to_zarr(fout, group=group, mode='a',
214
286
  append_dim=timedim)
287
+
215
288
  elif xds_existing[timedim][0] > xds[timedim][0] and xds_existing[timedim][-1] < xds[timedim][-1]:
216
289
  # existing datetimes are contained in new array
217
290
  xda_new = _build_append_payload_full_chunks(
218
291
  xds[feature], 'a', nchunks)
219
292
  xda_new.to_zarr(fout, group=group, mode='w',
220
293
  write_empty_chunks=True)
294
+
221
295
  else:
222
296
  overlap = xds_existing[timedim].where(
223
297
  xds_existing[timedim] == xds[timedim])
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tonik
3
- Version: 0.1.16
3
+ Version: 0.1.18
4
4
  Summary: Store time series data as HDF5 files and access them through an API.
5
5
  Project-URL: Homepage, https://tsc-tools.github.io/tonik
6
6
  Project-URL: Issues, https://github.com/tsc-tools/tonik/issues
@@ -21,15 +21,17 @@ Requires-Dist: python-json-logger>=2.0
21
21
  Requires-Dist: s3fs
22
22
  Requires-Dist: uvicorn[standard]>=0.22
23
23
  Requires-Dist: xarray[accel,io,parallel]
24
- Requires-Dist: zarr[remote-tests]<3; python_version < '3.11'
25
- Requires-Dist: zarr[remote-tests]>=3.0.3; python_version >= '3.11'
24
+ Requires-Dist: zarr
26
25
  Provides-Extra: dev
26
+ Requires-Dist: build; extra == 'dev'
27
27
  Requires-Dist: httpx; extra == 'dev'
28
28
  Requires-Dist: ipykernel; extra == 'dev'
29
29
  Requires-Dist: mkdocs; extra == 'dev'
30
30
  Requires-Dist: mkdocs-jupyter; extra == 'dev'
31
31
  Requires-Dist: mkdocstrings[python]; extra == 'dev'
32
32
  Requires-Dist: pytest; extra == 'dev'
33
+ Requires-Dist: twine; extra == 'dev'
34
+ Requires-Dist: zarr[remote-tests]; extra == 'dev'
33
35
  Description-Content-Type: text/markdown
34
36
 
35
37
  # Tonik
@@ -0,0 +1,14 @@
1
+ tonik/__init__.py,sha256=dov-nMeGFBzLspmj4rWKjC4r736vmaPDgMEkHSUfP98,523
2
+ tonik/api.py,sha256=vW0ykOo5iGAV0_WuOepdrnUyFp83F7KyJTd43ksLmUk,7985
3
+ tonik/grafana_annotations.py,sha256=ZU9Cy-HT4vvMfYIQzD9WboaDVOCBDv__NmXbk1qKWJo,5838
4
+ tonik/storage.py,sha256=jcCVx2N8J1ZBKM73k-OaxB0uxukn4VAM_-CCaCeAKwk,10589
5
+ tonik/utils.py,sha256=vRFMoCU7dbfnnm5RALBR-XrpPGDFtQoeTDzxFiYf3bo,7522
6
+ tonik/xarray2netcdf.py,sha256=hO3adraANvSuvmMJj0moDRAKXK01uTpAuxHPKv5xMwY,6486
7
+ tonik/xarray2zarr.py,sha256=aPaN-gSI0YFAOeD30ex6fq6vtzrNO7YSKraitrUn6Fc,11222
8
+ tonik/package_data/index.html,sha256=ZCZ-BtGRERsL-6c_dfY43qd2WAaggH7xereennGL6ww,4372
9
+ tonik/package_data/whakaari_labels.json,sha256=96UZSq41yXgAJxuKivLBKlRTw-33jkjh7AGKTsDQ9Yg,3993
10
+ tonik-0.1.18.dist-info/METADATA,sha256=bRaWxrTariy4xjpcKTrqDk0_GqtHOReVYIjbWz1f1hQ,2207
11
+ tonik-0.1.18.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
12
+ tonik-0.1.18.dist-info/entry_points.txt,sha256=y82XyTeQddM87gCTzgSQaTlKF3VFicO4hhClHUv6j1A,127
13
+ tonik-0.1.18.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
14
+ tonik-0.1.18.dist-info/RECORD,,
@@ -1,14 +0,0 @@
1
- tonik/__init__.py,sha256=dov-nMeGFBzLspmj4rWKjC4r736vmaPDgMEkHSUfP98,523
2
- tonik/api.py,sha256=XDKiz1AzYNBOwYfaRxpMgqGRDAPJEE6wWJyBxuYPRLc,7751
3
- tonik/grafana_annotations.py,sha256=ZU9Cy-HT4vvMfYIQzD9WboaDVOCBDv__NmXbk1qKWJo,5838
4
- tonik/storage.py,sha256=vFxIrY92cSYOYOpNXHxCAjdXgkrRytaRDpudtK0glmg,10608
5
- tonik/utils.py,sha256=vRFMoCU7dbfnnm5RALBR-XrpPGDFtQoeTDzxFiYf3bo,7522
6
- tonik/xarray2netcdf.py,sha256=gDNT6nxnRbXPeRqZ3URW5oXY3Nfh3TCrfueE-eUrIoY,5181
7
- tonik/xarray2zarr.py,sha256=SSchDqy5oyYrIG4smV8fslsUg2UPSyyQjUA5ZlP1P4I,8630
8
- tonik/package_data/index.html,sha256=ZCZ-BtGRERsL-6c_dfY43qd2WAaggH7xereennGL6ww,4372
9
- tonik/package_data/whakaari_labels.json,sha256=96UZSq41yXgAJxuKivLBKlRTw-33jkjh7AGKTsDQ9Yg,3993
10
- tonik-0.1.16.dist-info/METADATA,sha256=EOwmXNC5b6IJsnTLMelBZ3vL1ljkfZwhM8Hoz6iHiZQ,2191
11
- tonik-0.1.16.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
12
- tonik-0.1.16.dist-info/entry_points.txt,sha256=y82XyTeQddM87gCTzgSQaTlKF3VFicO4hhClHUv6j1A,127
13
- tonik-0.1.16.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
14
- tonik-0.1.16.dist-info/RECORD,,
File without changes