tonik 0.1.19__tar.gz → 0.1.21__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. {tonik-0.1.19 → tonik-0.1.21}/PKG-INFO +1 -1
  2. {tonik-0.1.19 → tonik-0.1.21}/pixi.lock +2 -2
  3. {tonik-0.1.19 → tonik-0.1.21}/pyproject.toml +2 -1
  4. {tonik-0.1.19 → tonik-0.1.21}/src/tonik/storage.py +1 -1
  5. {tonik-0.1.19 → tonik-0.1.21}/src/tonik/utils.py +140 -2
  6. {tonik-0.1.19 → tonik-0.1.21}/src/tonik/xarray2zarr.py +9 -3
  7. {tonik-0.1.19 → tonik-0.1.21}/tests/test_save.py +18 -0
  8. tonik-0.1.21/tests/test_utils.py +92 -0
  9. tonik-0.1.19/tests/test_utils.py +0 -11
  10. {tonik-0.1.19 → tonik-0.1.21}/.devcontainer/devcontainer.json +0 -0
  11. {tonik-0.1.19 → tonik-0.1.21}/.gitattributes +0 -0
  12. {tonik-0.1.19 → tonik-0.1.21}/.gitignore +0 -0
  13. {tonik-0.1.19 → tonik-0.1.21}/HOW_TO_RELEASE.md +0 -0
  14. {tonik-0.1.19 → tonik-0.1.21}/LICENSE +0 -0
  15. {tonik-0.1.19 → tonik-0.1.21}/README.md +0 -0
  16. {tonik-0.1.19 → tonik-0.1.21}/grafana_example/Dockerfile_api +0 -0
  17. {tonik-0.1.19 → tonik-0.1.21}/grafana_example/Dockerfile_grafana +0 -0
  18. {tonik-0.1.19 → tonik-0.1.21}/grafana_example/dashboards/demo_dashboard.json +0 -0
  19. {tonik-0.1.19 → tonik-0.1.21}/grafana_example/docker-compose.yml +0 -0
  20. {tonik-0.1.19 → tonik-0.1.21}/grafana_example/grafana.ini +0 -0
  21. {tonik-0.1.19 → tonik-0.1.21}/grafana_example/provisioning/dashboards/default.yaml +0 -0
  22. {tonik-0.1.19 → tonik-0.1.21}/grafana_example/provisioning/datasources/default.yaml +0 -0
  23. {tonik-0.1.19 → tonik-0.1.21}/mkdocs.yml +0 -0
  24. {tonik-0.1.19 → tonik-0.1.21}/pyproject.toml~ +0 -0
  25. {tonik-0.1.19 → tonik-0.1.21}/src/tonik/__init__.py +0 -0
  26. {tonik-0.1.19 → tonik-0.1.21}/src/tonik/api.py +0 -0
  27. {tonik-0.1.19 → tonik-0.1.21}/src/tonik/grafana_annotations.py +0 -0
  28. {tonik-0.1.19 → tonik-0.1.21}/src/tonik/package_data/index.html +0 -0
  29. {tonik-0.1.19 → tonik-0.1.21}/src/tonik/package_data/whakaari_labels.json +0 -0
  30. {tonik-0.1.19 → tonik-0.1.21}/src/tonik/xarray2netcdf.py +0 -0
  31. {tonik-0.1.19 → tonik-0.1.21}/tests/backend_speed_test.py +0 -0
  32. {tonik-0.1.19 → tonik-0.1.21}/tests/conftest.py +0 -0
  33. {tonik-0.1.19 → tonik-0.1.21}/tests/test_api.py +0 -0
  34. {tonik-0.1.19 → tonik-0.1.21}/tests/test_storage.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tonik
3
- Version: 0.1.19
3
+ Version: 0.1.21
4
4
  Summary: Store time series data as HDF5 files and access them through an API.
5
5
  Project-URL: Homepage, https://tsc-tools.github.io/tonik
6
6
  Project-URL: Issues, https://github.com/tsc-tools/tonik/issues
@@ -3681,8 +3681,8 @@ packages:
3681
3681
  timestamp: 1763054914403
3682
3682
  - pypi: ./
3683
3683
  name: tonik
3684
- version: 0.1.19
3685
- sha256: 03b2f4ca6b94c1b2cf29416606b0ac3bc812c1217ce395773496dfad962da544
3684
+ version: 0.1.21
3685
+ sha256: 8df82fc10e56aba3de54b9483f427f92a415574614dd09716f8cadc9c33a2510
3686
3686
  requires_dist:
3687
3687
  - h5py>=3.8
3688
3688
  - datashader>=0.14
@@ -12,7 +12,7 @@ exclude = [
12
12
 
13
13
  [project]
14
14
  name = "tonik"
15
- version = "0.1.19"
15
+ version = "0.1.21"
16
16
  authors = [
17
17
  { name="Yannik Behr", email="y.behr@gns.cri.nz" },
18
18
  { name="Christof Mueller", email="c.mueller@gns.cri.nz" }
@@ -64,6 +64,7 @@ grafana_annotations = "tonik.grafana_annotations:main"
64
64
 
65
65
  [tool.pytest.ini_options]
66
66
  log_cli = true
67
+ addopts = "-s"
67
68
 
68
69
  [tool.hatch.envs.test]
69
70
  dependencies = [
@@ -18,7 +18,7 @@ LOGGING_CONFIG = {
18
18
  "datefmt": "%Y-%m-%d %H:%M:%S", # How to display dates
19
19
  },
20
20
  "json": { # The formatter name
21
- "()": "pythonjsonlogger.jsonlogger.JsonFormatter", # The class to instantiate!
21
+ "()": "pythonjsonlogger.json.JsonFormatter", # The class to instantiate!
22
22
  # Json is more complex, but easier to read, display all attributes!
23
23
  "format": """
24
24
  asctime: %(asctime)s
@@ -1,4 +1,4 @@
1
- from typing import List
1
+ from typing import List, Union
2
2
  from datetime import datetime, timezone, timedelta
3
3
 
4
4
  import numpy as np
@@ -13,12 +13,40 @@ def generate_test_data(dim=1, ndays=30, nfreqs=10,
13
13
  freq_names=None, add_nans=True):
14
14
  """
15
15
  Generate a 1D or 2D feature for testing.
16
+
17
+ Parameters
18
+ ----------
19
+ dim : int
20
+ Dimension of the data (1 or 2).
21
+ ndays : int
22
+ Number of days to generate data for.
23
+ nfreqs : int
24
+ Number of frequencies (only for dim=2).
25
+ tstart : datetime
26
+ Start time of the data.
27
+ freq : str
28
+ Frequency of the data (e.g., '10min').
29
+ intervals : int
30
+ Number of intervals to generate. If None, calculated from ndays and freq.
31
+ feature_names : list
32
+ Names of the features to generate.
33
+ seed : int
34
+ Random seed for reproducibility.
35
+ freq_names : list
36
+ Names of the frequency dimensions (only for dim=2).
37
+ add_nans : bool
38
+ Whether to add NaN values to the data.
39
+
40
+ Returns
41
+ -------
42
+ xr.Dataset
43
+ Generated test dataset.
16
44
  """
17
45
  assert dim < 3
18
46
  assert dim > 0
19
47
 
20
48
  if intervals is None:
21
- nints = ndays * 6 * 24
49
+ nints = ndays * int(pd.Timedelta('1h')/pd.Timedelta(freq)) * 24
22
50
  else:
23
51
  nints = intervals
24
52
  dates = pd.date_range(tstart, freq=freq, periods=nints)
@@ -59,6 +87,116 @@ def generate_test_data(dim=1, ndays=30, nfreqs=10,
59
87
  return xds
60
88
 
61
89
 
90
+ def round_datetime(dt: datetime, interval: Union[int, float, timedelta]) -> datetime:
91
+ """
92
+ Find closest multiple of interval to given time.
93
+
94
+ Parameters:
95
+ -----------
96
+ dt : datetime
97
+ The datetime to round.
98
+ interval : Union[int, float, timedelta]
99
+ The interval to which to round the datetime.
100
+
101
+ Returns:
102
+ --------
103
+ datetime
104
+ The rounded datetime.
105
+ """
106
+ # Normalize interval to whole seconds (supports float/timedelta inputs)
107
+ if isinstance(interval, timedelta):
108
+ interval_sec = int(interval.total_seconds())
109
+ else:
110
+ interval_sec = int(interval)
111
+
112
+ if interval_sec <= 0:
113
+ raise ValueError("interval must be positive (seconds)")
114
+
115
+ # Accept ObsPy UTCDateTime transparently (preserve type on return)
116
+ _is_obspy = False
117
+ try:
118
+ from obspy import UTCDateTime as _UTCDateTime # type: ignore
119
+ if isinstance(dt, _UTCDateTime):
120
+ _is_obspy = True
121
+ dt_py = dt.datetime # Python datetime in UTC
122
+ else:
123
+ dt_py = dt
124
+ except Exception:
125
+ dt_py = dt
126
+
127
+ epoch = (
128
+ datetime(1970, 1, 1)
129
+ if dt_py.tzinfo is None
130
+ else datetime(1970, 1, 1, tzinfo=dt_py.tzinfo)
131
+ )
132
+
133
+ # Compute integer seconds since epoch to avoid float precision issues
134
+ seconds = int((dt_py - epoch).total_seconds())
135
+ floored = (seconds + 0.5 * interval_sec) % interval_sec
136
+ rounded = epoch + timedelta(seconds=seconds + 0.5 * interval_sec - floored)
137
+
138
+ if _is_obspy:
139
+ from obspy import UTCDateTime as _UTCDateTime # type: ignore
140
+ return _UTCDateTime(rounded)
141
+
142
+ return rounded
143
+
144
+
145
+ def floor_datetime(dt: datetime, interval: Union[int, float, timedelta]) -> datetime:
146
+ """
147
+ Floor a datetime to the latest multiple of a given interval.
148
+
149
+ Assumes ``dt`` represents a UTC time (naive or tz-aware is fine) and
150
+ aligns against the Unix epoch 1970-01-01T00:00:00Z. The interval is in
151
+ seconds (int/float) or a timedelta. Returns a datetime with the same
152
+ "naive vs aware" form as ``dt``.
153
+
154
+ Examples
155
+ --------
156
+ >>> from datetime import datetime
157
+ >>> floor_datetime(datetime.fromisoformat('2025-11-27T10:12:43'), 600)
158
+ datetime.datetime(2025, 11, 27, 10, 10)
159
+ """
160
+
161
+ # Normalize interval to whole seconds (supports float/timedelta inputs)
162
+ if isinstance(interval, timedelta):
163
+ interval_sec = int(interval.total_seconds())
164
+ else:
165
+ interval_sec = int(interval)
166
+
167
+ if interval_sec <= 0:
168
+ raise ValueError("interval must be positive (seconds)")
169
+
170
+ # Accept ObsPy UTCDateTime transparently (preserve type on return)
171
+ _is_obspy = False
172
+ try:
173
+ from obspy import UTCDateTime as _UTCDateTime # type: ignore
174
+ if isinstance(dt, _UTCDateTime):
175
+ _is_obspy = True
176
+ dt_py = dt.datetime # Python datetime in UTC
177
+ else:
178
+ dt_py = dt
179
+ except Exception:
180
+ dt_py = dt
181
+
182
+ epoch = (
183
+ datetime(1970, 1, 1)
184
+ if dt_py.tzinfo is None
185
+ else datetime(1970, 1, 1, tzinfo=dt_py.tzinfo)
186
+ )
187
+
188
+ # Compute integer seconds since epoch to avoid float precision issues
189
+ seconds = int((dt_py - epoch).total_seconds())
190
+ floored = seconds - (seconds % interval_sec)
191
+ rounded = epoch + timedelta(seconds=floored)
192
+
193
+ if _is_obspy:
194
+ from obspy import UTCDateTime as _UTCDateTime # type: ignore
195
+ return _UTCDateTime(rounded)
196
+
197
+ return rounded
198
+
199
+
62
200
  def get_dt(times):
63
201
  """
64
202
  Infer the sampling of the time dimension.
@@ -267,7 +267,7 @@ def xarray2zarr(xds: xr.Dataset, path: str, mode: str = 'a', group='original',
267
267
  continue
268
268
 
269
269
  if xds_existing[timedim][0] > xds[timedim][-1]:
270
- # prepend
270
+ logger.debug("Prepending data to existing zarr store.")
271
271
  xda_new = fill_time_gaps_between_datasets(xds_existing[feature].isel({timedim: 0}),
272
272
  xds[feature], mode='p')
273
273
  xda_new = _build_append_payload_full_chunks(
@@ -277,7 +277,7 @@ def xarray2zarr(xds: xr.Dataset, path: str, mode: str = 'a', group='original',
277
277
  write_empty_chunks=True)
278
278
 
279
279
  elif xds_existing[timedim][-1] < xds[timedim][0]:
280
- # append
280
+ logger.debug("Appending data to existing zarr store.")
281
281
  xda_new = fill_time_gaps_between_datasets(xds_existing[feature].isel({timedim: -1}),
282
282
  xds[feature], mode='a')
283
283
  xda_new = _build_append_payload_full_chunks(
@@ -286,13 +286,19 @@ def xarray2zarr(xds: xr.Dataset, path: str, mode: str = 'a', group='original',
286
286
  append_dim=timedim)
287
287
 
288
288
  elif xds_existing[timedim][0] > xds[timedim][0] and xds_existing[timedim][-1] < xds[timedim][-1]:
289
- # existing datetimes are contained in new array
289
+ logger.debug(
290
+ "Data in zarr store contained in new data. Rewriting zarr store.")
290
291
  xda_new = _build_append_payload_full_chunks(
291
292
  xds[feature], 'a', nchunks)
292
293
  xda_new.to_zarr(fout, group=group, mode='w',
293
294
  write_empty_chunks=True)
294
295
 
295
296
  else:
297
+ logger.debug("Data in zarr store overlaps with new data.")
298
+ logger.debug(
299
+ f"Endtime of existing data: {xds_existing[timedim][-1].values}")
300
+ logger.debug(f"Starttime of new data: {xds[timedim][0].values}")
301
+ xds_existing = xds_existing.drop_duplicates(timedim, keep='last')
296
302
  overlap = xds_existing[timedim].where(
297
303
  xds_existing[timedim] == xds[timedim])
298
304
  xds[feature].loc[{timedim: overlap}].to_zarr(
@@ -169,6 +169,24 @@ def test_xarray2netcdf_multi_access(tmp_path_factory):
169
169
  group='original', engine='h5netcdf')
170
170
  xarray2netcdf(xdf2, temp_dir)
171
171
 
172
+ @pytest.mark.slow
173
+ def test_netcdf_attribute_bug(tmp_path_factory):
174
+ """
175
+ Test to replicate behaviour when attribute is updated more than
176
+ 2^16 times.
177
+ """
178
+ temp_dir = tmp_path_factory.mktemp('test_netcdf_attribute_bug')
179
+ g = Storage('test_experiment', rootdir=temp_dir, backend='netcdf')
180
+ c = g.get_substore('MDR', '00', 'HHZ')
181
+ tstart = datetime(2022, 7, 18, 8, 0, 0)
182
+ for i in range(70000):
183
+ if i % 1000 == 0:
184
+ print(f'Iteration {i}')
185
+ xdf = generate_test_data(tstart=tstart, dim=1, intervals=3, freq='1h')
186
+ xdf.attrs['last_update'] = str(tstart + timedelta(hours=3))
187
+ tstart += timedelta(days=1)
188
+ c.save(xdf)
189
+
172
190
 
173
191
  def test_xarray2zarr(tmp_path_factory):
174
192
  xdf = generate_test_data(
@@ -0,0 +1,92 @@
1
+ from datetime import datetime, timedelta, timezone
2
+ import numpy as np
3
+ import pytest
4
+
5
+ from tonik.utils import (extract_consecutive_integers,
6
+ generate_test_data,
7
+ round_datetime,
8
+ floor_datetime)
9
+
10
+
11
+ def test_extract_consecutive_integers():
12
+ nums = [1, 2, 3, 5, 6, 7, 8, 10]
13
+ assert extract_consecutive_integers(
14
+ nums) == [[1, 2, 3], [5, 6, 7, 8], [10]]
15
+ assert extract_consecutive_integers([1]) == [[1]]
16
+ assert extract_consecutive_integers(np.array([1, 2, 4])) == [[1, 2], [4]]
17
+
18
+
19
+ def test_generate_test_data():
20
+ """
21
+ Test data generation function.
22
+ """
23
+ tstart = datetime.now(timezone.utc) - timedelta(days=30)
24
+ tstart = floor_datetime(tstart, timedelta(days=1))
25
+ tstart = tstart.replace(tzinfo=None)
26
+ data = generate_test_data(tstart='2023-01-01', freq='1min', seed=42,
27
+ ndays=3)
28
+ assert 'datetime' in data.coords
29
+ assert data.rsam.shape[0] == 3*24*60 # 24 hours + start point
30
+ assert 'rsam' in data.data_vars
31
+ assert 'dsar' in data.data_vars
32
+ # Check for NaNs
33
+ n_nans = np.isnan(data.dsar.values).sum()
34
+ assert n_nans == 408
35
+
36
+
37
+ def test_floor_datetime_basic_10min():
38
+ dt = datetime.fromisoformat("2025-11-27T10:12:43")
39
+ out = floor_datetime(dt, 600)
40
+ assert out == datetime(2025, 11, 27, 10, 10, 0)
41
+
42
+
43
+ def test_floor_datetime_on_boundary():
44
+ dt = datetime.fromisoformat("2025-11-27T10:20:00")
45
+ out = floor_datetime(dt, 600)
46
+ assert out == dt
47
+
48
+
49
+ def test_floor_datetime_timedelta_interval():
50
+ dt = datetime.fromisoformat("2025-11-27T10:29:59")
51
+ out = floor_datetime(dt, timedelta(minutes=10))
52
+ assert out == datetime(2025, 11, 27, 10, 20, 0)
53
+
54
+
55
+ def test_floor_datetime_invalid_interval():
56
+ dt = datetime.fromisoformat("2025-11-27T10:12:43")
57
+ with pytest.raises(ValueError):
58
+ floor_datetime(dt, 0)
59
+ with pytest.raises(ValueError):
60
+ floor_datetime(dt, -15)
61
+
62
+
63
+ def test_floor_datetime_preserves_timezone_utc():
64
+ dt = datetime(2025, 11, 27, 10, 12, 43, tzinfo=timezone.utc)
65
+ out = floor_datetime(dt, 600)
66
+ assert out == datetime(2025, 11, 27, 10, 10, 0, tzinfo=timezone.utc)
67
+
68
+
69
+ def test_floor_datetime_with_obspy_UTCDateTime():
70
+ try:
71
+ from obspy import UTCDateTime
72
+ except Exception:
73
+ pytest.skip("obspy not available")
74
+
75
+ t = UTCDateTime(2025, 11, 27, 10, 12, 43)
76
+ out = floor_datetime(t, 600)
77
+ assert isinstance(out, UTCDateTime)
78
+ assert out == UTCDateTime(2025, 11, 27, 10, 10, 0)
79
+
80
+
81
+ def test_round_datetime_basic_10min():
82
+ dt = datetime.fromisoformat("2025-11-27T10:12:43")
83
+ out = round_datetime(dt, 600)
84
+ assert out == datetime(2025, 11, 27, 10, 10)
85
+
86
+ dt = datetime.fromisoformat("2025-11-27T10:10:00")
87
+ out = round_datetime(dt, 600)
88
+ assert out == datetime(2025, 11, 27, 10, 10)
89
+
90
+ dt = datetime.fromisoformat("2025-11-27T10:17:00")
91
+ out = round_datetime(dt, 600)
92
+ assert out == datetime(2025, 11, 27, 10, 20)
@@ -1,11 +0,0 @@
1
- import numpy as np
2
-
3
- from tonik.utils import extract_consecutive_integers
4
-
5
-
6
- def test_extract_consecutive_integers():
7
- nums = [1, 2, 3, 5, 6, 7, 8, 10]
8
- assert extract_consecutive_integers(
9
- nums) == [[1, 2, 3], [5, 6, 7, 8], [10]]
10
- assert extract_consecutive_integers([1]) == [[1]]
11
- assert extract_consecutive_integers(np.array([1, 2, 4])) == [[1, 2], [4]]
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes