PyPI - tonik - Versions diffs - 0.1.19__tar.gz → 0.1.21__tar.gz - Mend

tonik 0.1.19tar.gz → 0.1.21tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

{tonik-0.1.19 → tonik-0.1.21}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: tonik
-Version: 0.1.19
+Version: 0.1.21
 Summary: Store time series data as HDF5 files and access them through an API.
 Project-URL: Homepage, https://tsc-tools.github.io/tonik
 Project-URL: Issues, https://github.com/tsc-tools/tonik/issues

{tonik-0.1.19 → tonik-0.1.21}/pixi.lock RENAMED Viewed

@@ -3681,8 +3681,8 @@ packages:
   timestamp: 1763054914403
 - pypi: ./
   name: tonik
-  version: 0.1.19
-  sha256: 03b2f4ca6b94c1b2cf29416606b0ac3bc812c1217ce395773496dfad962da544
+  version: 0.1.21
+  sha256: 8df82fc10e56aba3de54b9483f427f92a415574614dd09716f8cadc9c33a2510
   requires_dist:
   - h5py>=3.8
   - datashader>=0.14

{tonik-0.1.19 → tonik-0.1.21}/pyproject.toml RENAMED Viewed

@@ -12,7 +12,7 @@ exclude = [
 [project]
 name = "tonik"
-version = "0.1.19"
+version = "0.1.21"
 authors = [
   { name="Yannik Behr", email="y.behr@gns.cri.nz" },
   { name="Christof Mueller", email="c.mueller@gns.cri.nz" }
@@ -64,6 +64,7 @@ grafana_annotations = "tonik.grafana_annotations:main"
 [tool.pytest.ini_options]
 log_cli = true
+addopts = "-s"
 [tool.hatch.envs.test]
 dependencies = [

{tonik-0.1.19 → tonik-0.1.21}/src/tonik/storage.py RENAMED Viewed

@@ -18,7 +18,7 @@ LOGGING_CONFIG = {
             "datefmt": "%Y-%m-%d %H:%M:%S",  # How to display dates
         },
         "json": {  # The formatter name
-            "()": "pythonjsonlogger.jsonlogger.JsonFormatter",  # The class to instantiate!
+            "()": "pythonjsonlogger.json.JsonFormatter",  # The class to instantiate!
             # Json is more complex, but easier to read, display all attributes!
             "format": """
                     asctime: %(asctime)s

{tonik-0.1.19 → tonik-0.1.21}/src/tonik/utils.py RENAMED Viewed

@@ -1,4 +1,4 @@
-from typing import List
+from typing import List, Union
 from datetime import datetime, timezone, timedelta
 import numpy as np
@@ -13,12 +13,40 @@ def generate_test_data(dim=1, ndays=30, nfreqs=10,
                        freq_names=None, add_nans=True):
     """
     Generate a 1D or 2D feature for testing.
+    Parameters
+    ----------
+    dim : int
+        Dimension of the data (1 or 2).
+    ndays : int
+        Number of days to generate data for.
+    nfreqs : int
+        Number of frequencies (only for dim=2).
+    tstart : datetime
+        Start time of the data.
+    freq : str
+        Frequency of the data (e.g., '10min').
+    intervals : int
+        Number of intervals to generate. If None, calculated from ndays and freq.
+    feature_names : list
+        Names of the features to generate.
+    seed : int
+        Random seed for reproducibility.
+    freq_names : list
+        Names of the frequency dimensions (only for dim=2).
+    add_nans : bool
+        Whether to add NaN values to the data.
+    Returns
+    -------
+    xr.Dataset
+        Generated test dataset.
     """
     assert dim < 3
     assert dim > 0
     if intervals is None:
-        nints = ndays * 6 * 24
+        nints = ndays * int(pd.Timedelta('1h')/pd.Timedelta(freq)) * 24
     else:
         nints = intervals
     dates = pd.date_range(tstart, freq=freq, periods=nints)
@@ -59,6 +87,116 @@ def generate_test_data(dim=1, ndays=30, nfreqs=10,
     return xds
+def round_datetime(dt: datetime, interval: Union[int, float, timedelta]) -> datetime:
+    """
+    Find closest multiple of interval to given time.
+    Parameters:
+    -----------
+    dt : datetime
+        The datetime to round.
+    interval : Union[int, float, timedelta]
+        The interval to which to round the datetime.
+    Returns:
+    --------
+    datetime
+        The rounded datetime.
+    """
+    # Normalize interval to whole seconds (supports float/timedelta inputs)
+    if isinstance(interval, timedelta):
+        interval_sec = int(interval.total_seconds())
+    else:
+        interval_sec = int(interval)
+    if interval_sec <= 0:
+        raise ValueError("interval must be positive (seconds)")
+    # Accept ObsPy UTCDateTime transparently (preserve type on return)
+    _is_obspy = False
+    try:
+        from obspy import UTCDateTime as _UTCDateTime  # type: ignore
+        if isinstance(dt, _UTCDateTime):
+            _is_obspy = True
+            dt_py = dt.datetime  # Python datetime in UTC
+        else:
+            dt_py = dt
+    except Exception:
+        dt_py = dt
+    epoch = (
+        datetime(1970, 1, 1)
+        if dt_py.tzinfo is None
+        else datetime(1970, 1, 1, tzinfo=dt_py.tzinfo)
+    )
+    # Compute integer seconds since epoch to avoid float precision issues
+    seconds = int((dt_py - epoch).total_seconds())
+    floored = (seconds + 0.5 * interval_sec) % interval_sec
+    rounded = epoch + timedelta(seconds=seconds + 0.5 * interval_sec - floored)
+    if _is_obspy:
+        from obspy import UTCDateTime as _UTCDateTime  # type: ignore
+        return _UTCDateTime(rounded)
+    return rounded
+def floor_datetime(dt: datetime, interval: Union[int, float, timedelta]) -> datetime:
+    """
+    Floor a datetime to the latest multiple of a given interval.
+    Assumes ``dt`` represents a UTC time (naive or tz-aware is fine) and
+    aligns against the Unix epoch 1970-01-01T00:00:00Z. The interval is in
+    seconds (int/float) or a timedelta. Returns a datetime with the same
+    "naive vs aware" form as ``dt``.
+    Examples
+    --------
+    >>> from datetime import datetime
+    >>> floor_datetime(datetime.fromisoformat('2025-11-27T10:12:43'), 600)
+    datetime.datetime(2025, 11, 27, 10, 10)
+    """
+    # Normalize interval to whole seconds (supports float/timedelta inputs)
+    if isinstance(interval, timedelta):
+        interval_sec = int(interval.total_seconds())
+    else:
+        interval_sec = int(interval)
+    if interval_sec <= 0:
+        raise ValueError("interval must be positive (seconds)")
+    # Accept ObsPy UTCDateTime transparently (preserve type on return)
+    _is_obspy = False
+    try:
+        from obspy import UTCDateTime as _UTCDateTime  # type: ignore
+        if isinstance(dt, _UTCDateTime):
+            _is_obspy = True
+            dt_py = dt.datetime  # Python datetime in UTC
+        else:
+            dt_py = dt
+    except Exception:
+        dt_py = dt
+    epoch = (
+        datetime(1970, 1, 1)
+        if dt_py.tzinfo is None
+        else datetime(1970, 1, 1, tzinfo=dt_py.tzinfo)
+    )
+    # Compute integer seconds since epoch to avoid float precision issues
+    seconds = int((dt_py - epoch).total_seconds())
+    floored = seconds - (seconds % interval_sec)
+    rounded = epoch + timedelta(seconds=floored)
+    if _is_obspy:
+        from obspy import UTCDateTime as _UTCDateTime  # type: ignore
+        return _UTCDateTime(rounded)
+    return rounded
 def get_dt(times):
     """
     Infer the sampling of the time dimension.

{tonik-0.1.19 → tonik-0.1.21}/src/tonik/xarray2zarr.py RENAMED Viewed

@@ -267,7 +267,7 @@ def xarray2zarr(xds: xr.Dataset, path: str, mode: str = 'a', group='original',
             continue
         if xds_existing[timedim][0] > xds[timedim][-1]:
-            # prepend
+            logger.debug("Prepending data to existing zarr store.")
             xda_new = fill_time_gaps_between_datasets(xds_existing[feature].isel({timedim: 0}),
                                                       xds[feature], mode='p')
             xda_new = _build_append_payload_full_chunks(
@@ -277,7 +277,7 @@ def xarray2zarr(xds: xr.Dataset, path: str, mode: str = 'a', group='original',
                                                        write_empty_chunks=True)
         elif xds_existing[timedim][-1] < xds[timedim][0]:
-            # append
+            logger.debug("Appending data to existing zarr store.")
             xda_new = fill_time_gaps_between_datasets(xds_existing[feature].isel({timedim: -1}),
                                                       xds[feature], mode='a')
             xda_new = _build_append_payload_full_chunks(
@@ -286,13 +286,19 @@ def xarray2zarr(xds: xr.Dataset, path: str, mode: str = 'a', group='original',
                             append_dim=timedim)
         elif xds_existing[timedim][0] > xds[timedim][0] and xds_existing[timedim][-1] < xds[timedim][-1]:
-            # existing datetimes are contained in new array
+            logger.debug(
+                "Data in zarr store contained in new data. Rewriting zarr store.")
             xda_new = _build_append_payload_full_chunks(
                 xds[feature], 'a', nchunks)
             xda_new.to_zarr(fout, group=group, mode='w',
                             write_empty_chunks=True)
         else:
+            logger.debug("Data in zarr store overlaps with new data.")
+            logger.debug(
+                f"Endtime of existing data: {xds_existing[timedim][-1].values}")
+            logger.debug(f"Starttime of new data: {xds[timedim][0].values}")
+            xds_existing = xds_existing.drop_duplicates(timedim, keep='last')
             overlap = xds_existing[timedim].where(
                 xds_existing[timedim] == xds[timedim])
             xds[feature].loc[{timedim: overlap}].to_zarr(

{tonik-0.1.19 → tonik-0.1.21}/tests/test_save.py RENAMED Viewed

@@ -169,6 +169,24 @@ def test_xarray2netcdf_multi_access(tmp_path_factory):
                                 group='original', engine='h5netcdf')
     xarray2netcdf(xdf2, temp_dir)
+@pytest.mark.slow
+def test_netcdf_attribute_bug(tmp_path_factory):
+    """
+    Test to replicate behaviour when attribute is updated more than
+    2^16 times.
+    """
+    temp_dir = tmp_path_factory.mktemp('test_netcdf_attribute_bug')
+    g = Storage('test_experiment', rootdir=temp_dir, backend='netcdf')
+    c = g.get_substore('MDR', '00', 'HHZ')
+    tstart = datetime(2022, 7, 18, 8, 0, 0)
+    for i in range(70000):
+        if i % 1000 == 0:
+            print(f'Iteration {i}')
+        xdf = generate_test_data(tstart=tstart, dim=1, intervals=3, freq='1h')
+        xdf.attrs['last_update'] = str(tstart + timedelta(hours=3))
+        tstart += timedelta(days=1)
+        c.save(xdf)
 def test_xarray2zarr(tmp_path_factory):
     xdf = generate_test_data(

tonik-0.1.21/tests/test_utils.py ADDED Viewed

@@ -0,0 +1,92 @@
+from datetime import datetime, timedelta, timezone
+import numpy as np
+import pytest
+from tonik.utils import (extract_consecutive_integers,
+                         generate_test_data,
+                         round_datetime,
+                         floor_datetime)
+def test_extract_consecutive_integers():
+    nums = [1, 2, 3, 5, 6, 7, 8, 10]
+    assert extract_consecutive_integers(
+        nums) == [[1, 2, 3], [5, 6, 7, 8], [10]]
+    assert extract_consecutive_integers([1]) == [[1]]
+    assert extract_consecutive_integers(np.array([1, 2, 4])) == [[1, 2], [4]]
+def test_generate_test_data():
+    """
+    Test data generation function.
+    """
+    tstart = datetime.now(timezone.utc) - timedelta(days=30)
+    tstart = floor_datetime(tstart, timedelta(days=1))
+    tstart = tstart.replace(tzinfo=None)
+    data = generate_test_data(tstart='2023-01-01', freq='1min', seed=42,
+                              ndays=3)
+    assert 'datetime' in data.coords
+    assert data.rsam.shape[0] == 3*24*60  # 24 hours + start point
+    assert 'rsam' in data.data_vars
+    assert 'dsar' in data.data_vars
+    # Check for NaNs
+    n_nans = np.isnan(data.dsar.values).sum()
+    assert n_nans == 408
+def test_floor_datetime_basic_10min():
+    dt = datetime.fromisoformat("2025-11-27T10:12:43")
+    out = floor_datetime(dt, 600)
+    assert out == datetime(2025, 11, 27, 10, 10, 0)
+def test_floor_datetime_on_boundary():
+    dt = datetime.fromisoformat("2025-11-27T10:20:00")
+    out = floor_datetime(dt, 600)
+    assert out == dt
+def test_floor_datetime_timedelta_interval():
+    dt = datetime.fromisoformat("2025-11-27T10:29:59")
+    out = floor_datetime(dt, timedelta(minutes=10))
+    assert out == datetime(2025, 11, 27, 10, 20, 0)
+def test_floor_datetime_invalid_interval():
+    dt = datetime.fromisoformat("2025-11-27T10:12:43")
+    with pytest.raises(ValueError):
+        floor_datetime(dt, 0)
+    with pytest.raises(ValueError):
+        floor_datetime(dt, -15)
+def test_floor_datetime_preserves_timezone_utc():
+    dt = datetime(2025, 11, 27, 10, 12, 43, tzinfo=timezone.utc)
+    out = floor_datetime(dt, 600)
+    assert out == datetime(2025, 11, 27, 10, 10, 0, tzinfo=timezone.utc)
+def test_floor_datetime_with_obspy_UTCDateTime():
+    try:
+        from obspy import UTCDateTime
+    except Exception:
+        pytest.skip("obspy not available")
+    t = UTCDateTime(2025, 11, 27, 10, 12, 43)
+    out = floor_datetime(t, 600)
+    assert isinstance(out, UTCDateTime)
+    assert out == UTCDateTime(2025, 11, 27, 10, 10, 0)
+def test_round_datetime_basic_10min():
+    dt = datetime.fromisoformat("2025-11-27T10:12:43")
+    out = round_datetime(dt, 600)
+    assert out == datetime(2025, 11, 27, 10, 10)
+    dt = datetime.fromisoformat("2025-11-27T10:10:00")
+    out = round_datetime(dt, 600)
+    assert out == datetime(2025, 11, 27, 10, 10)
+    dt = datetime.fromisoformat("2025-11-27T10:17:00")
+    out = round_datetime(dt, 600)
+    assert out == datetime(2025, 11, 27, 10, 20)

tonik-0.1.19/tests/test_utils.py DELETED Viewed

@@ -1,11 +0,0 @@
-import numpy as np
-from tonik.utils import extract_consecutive_integers
-def test_extract_consecutive_integers():
-    nums = [1, 2, 3, 5, 6, 7, 8, 10]
-    assert extract_consecutive_integers(
-        nums) == [[1, 2, 3], [5, 6, 7, 8], [10]]
-    assert extract_consecutive_integers([1]) == [[1]]
-    assert extract_consecutive_integers(np.array([1, 2, 4])) == [[1, 2], [4]]