PyPI - tonik - Versions diffs - 0.1.2__tar.gz → 0.1.4__tar.gz - Mend

tonik 0.1.2tar.gz → 0.1.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

{tonik-0.1.2 → tonik-0.1.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: tonik
-Version: 0.1.2
+Version: 0.1.4
 Summary: Store time series data as HDF5 files and access them through an API.
 Project-URL: Homepage, https://tsc-tools.github.io/tonik
 Project-URL: Issues, https://github.com/tsc-tools/tonik/issues

{tonik-0.1.2 → tonik-0.1.4}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "tonik"
-version = "0.1.2"
+version = "0.1.4"
 authors = [
   { name="Yannik Behr", email="y.behr@gns.cri.nz" },
   { name="Christof Mueller", email="c.mueller@gns.cri.nz" }

{tonik-0.1.2 → tonik-0.1.4}/src/tonik/storage.py RENAMED Viewed

@@ -1,9 +1,7 @@
 import logging
 import logging.config
 import os
-import re
-import pandas as pd
 import xarray as xr
 from .xarray2netcdf import xarray2netcdf
@@ -113,70 +111,28 @@ class Path(object):
             self.children[feature] = Path(feature + file_ending, self.path)
         return _feature_path
-    def __call__(self, feature, stack_length=None, interval='10min'):
+    def __call__(self, feature, group='original'):
         """
         Request a particular feature
         :param feature: Feature name
         :type feature: str
-        :param stack_length: length of moving average in time
-        :type stack_length: str
         """
-        if self.endtime <= self.starttime:
+        if self.endtime < self.starttime:
             raise ValueError('Startime has to be smaller than endtime.')
         filename = self.feature_path(feature)
         logger.debug(
             f"Reading feature {feature} between {self.starttime} and {self.endtime}")
-        num_periods = None
-        if stack_length is not None:
-            valid_stack_units = ['W', 'D', 'h', 'T', 'min', 'S']
-            if re.match(r'\d*\s*(\w*)', stack_length).group(1) not in valid_stack_units:
-                raise ValueError(
-                    'Stack length should be one of: {}'.
-                    format(', '.join(valid_stack_units))
-                )
-            if pd.to_timedelta(stack_length) < pd.to_timedelta(interval):
-                raise ValueError('Stack length {} is less than interval {}'.
-                                 format(stack_length, interval))
-            # Rewind starttime to account for stack length
-            self.starttime -= pd.to_timedelta(stack_length)
-            num_periods = (pd.to_timedelta(stack_length) /
-                           pd.to_timedelta(interval))
-            if not num_periods.is_integer():
-                raise ValueError(
-                    'Stack length {} / interval {} = {}, but it needs'
-                    ' to be a whole number'.
-                    format(stack_length, interval, num_periods))
         xd_index = dict(datetime=slice(self.starttime, self.endtime))
-        with xr.open_dataset(filename, group='original', engine=self.engine) as ds:
-            rq = ds.loc[xd_index].load()
-        # Stack features
-        if stack_length is not None:
-            logger.debug("Stacking feature...")
-            try:
-                xdf = rq[feature].rolling(datetime=int(num_periods),
-                                          center=False,
-                                          min_periods=1).mean()
-                # Return requested timeframe to that defined in initialisation
-                self.starttime += pd.to_timedelta(stack_length)
-                xdf_new = xdf.loc[self.starttime:self.endtime]
-                xdf_new = xdf_new.rename(feature)
-            except ValueError as e:
-                logger.error(e)
-                logger.error('Stack length {} is not valid for feature {}'.
-                             format(stack_length, feature))
-            else:
-                return xdf_new
+        with xr.open_dataset(filename, group=group, engine=self.engine) as ds:
+            rq = ds[feature].loc[xd_index].load()
+            rq.attrs = ds.attrs
-        return rq[feature]
+        return rq
     def load(self, *args, **kwargs):
         """

{tonik-0.1.2 → tonik-0.1.4}/src/tonik/xarray2netcdf.py RENAMED Viewed

@@ -11,7 +11,7 @@ from cftime import date2num, num2date
 from .utils import merge_arrays
-def xarray2netcdf(xArray, fdir, rootGroupName="original", timedim="datetime",
+def xarray2netcdf(xArray, fdir, group="original", timedim="datetime",
                   archive_starttime=datetime(2000, 1, 1), resolution=None,
                   mode='a'):
     """
@@ -23,7 +23,7 @@ def xarray2netcdf(xArray, fdir, rootGroupName="original", timedim="datetime",
         Data to store.
     fdir : str
         Directory to store data under.
-    rootGroupName : str
+    group : str
         Hdf5 group name.
     timedim : str
         Name of time dimension.
@@ -49,22 +49,22 @@ def xarray2netcdf(xArray, fdir, rootGroupName="original", timedim="datetime",
         if os.path.isfile(h5file) and mode == 'a':
             if archive_starttime > data_starttime:
                 xds_existing = xr.open_dataset(
-                    h5file, group='original', engine='h5netcdf')
+                    h5file, group=group, engine='h5netcdf')
                 xda_new = merge_arrays(
                     xds_existing[featureName], xArray[featureName],
                     resolution=resolution)
                 xds_existing.close()
-                xda_new.to_netcdf(h5file, group='original',
+                xda_new.to_netcdf(h5file, group=group,
                                   mode='w', engine='h5netcdf')
                 continue
             _mode = 'a'
         with h5netcdf.File(h5file, _mode) as h5f:
             try:
-                rootGrp = _create_h5_Structure(rootGroupName, featureName,
+                rootGrp = _create_h5_Structure(group, featureName,
                                                h5f, xArray, starttime, timedim)
             except ValueError:  # group already exists, append
-                rootGrp = h5f[rootGroupName]
+                rootGrp = h5f[group]
             # determine indices
             new_time = date2num(xArray[timedim].values.astype('datetime64[us]').astype(datetime),
@@ -88,6 +88,8 @@ def xarray2netcdf(xArray, fdir, rootGroupName="original", timedim="datetime",
                 data[indices] = xArray[featureName].values
             rootGrp.attrs['endtime'] = str(num2date(times[-1], units=rootGrp[timedim].attrs['units'],
                                                     calendar=rootGrp[timedim].attrs['calendar']))
+            rootGrp.attrs['resolution'] = resolution
+            rootGrp.attrs['resolution_units'] = 'h'
             try:
                 _setMetaInfo(featureName, h5f, xArray)
             except KeyError as e:
@@ -102,7 +104,7 @@ def _create_h5_Structure(defaultGroupName, featureName, h5f, xArray, starttime,
     coordinates.attrs['units'] = 'hours since 1970-01-01 00:00:00.0'
     coordinates.attrs['calendar'] = 'gregorian'
     rootGrp.attrs['starttime'] = str(starttime)
-    for label, size in xArray.dims.items():
+    for label, size in xArray.sizes.items():
         if not np.issubdtype(xArray[label].dtype, np.datetime64):
             rootGrp.dimensions[label] = size
             coordinates = rootGrp.create_variable(label, (label,), float)

{tonik-0.1.2 → tonik-0.1.4}/src/tonik/xarray2zarr.py RENAMED Viewed

@@ -2,13 +2,14 @@ import logging
 import os
 import xarray as xr
+from zarr.errors import PathNotFoundError
 from .utils import merge_arrays
 logger = logging.getLogger(__name__)
-def xarray2zarr(xds: xr.Dataset, path: str, mode: str = 'a'):
+def xarray2zarr(xds: xr.Dataset, path: str, mode: str = 'a', group='original'):
     """
     Write xarray dataset to zarr files.
@@ -20,6 +21,8 @@ def xarray2zarr(xds: xr.Dataset, path: str, mode: str = 'a'):
         Path to write the dataset.
     mode : str, optional
         Write mode, by default 'a'.
+    group : str, optional
+        Group name, by default 'original'
     Returns
     -------
@@ -29,28 +32,32 @@ def xarray2zarr(xds: xr.Dataset, path: str, mode: str = 'a'):
         fout = os.path.join(path, feature + '.zarr')
         if not os.path.exists(fout) or mode == 'w':
             xds[feature].to_zarr(
-                fout, group='original', mode='w')
+                fout, group=group, mode='w')
         else:
-            xds_existing = xr.open_zarr(fout, group='original')
+            try:
+                xds_existing = xr.open_zarr(fout, group=group)
+            except PathNotFoundError:
+                xds[feature].to_zarr(fout, group=group, mode='a')
+                continue
             if xds_existing.datetime[0] > xds.datetime[0] or xds_existing.datetime[-1] > xds.datetime[-1]:
                 xda_new = merge_arrays(xds_existing[feature], xds[feature])
-                xda_new.to_zarr(fout, group='original', mode='w')
+                xda_new.to_zarr(fout, group=group, mode='w')
             else:
                 try:
                     overlap = xds_existing.datetime.where(
                         xds_existing.datetime == xds.datetime)
                     if overlap.size > 0:
                         xds[feature].loc[dict(datetime=overlap)].to_zarr(
-                            fout, group='original', mode='r+', region='auto')
+                            fout, group=group, mode='r+', region='auto')
                         xds[feature].drop_sel(datetime=overlap).to_zarr(
-                            fout, group='original', mode='a', append_dim="datetime")
+                            fout, group=group, mode='a', append_dim="datetime")
                     else:
                         xds[feature].to_zarr(
-                            fout, group='original', append_dim='datetime')
+                            fout, group=group, append_dim='datetime')
                 except Exception as e:
                     msg = f"Appending {feature} to {fout} failed: {e}\n"
                     msg += "Attempting to merge the two datasets."
                     logger.error(msg)
                     # remove duplicate datetime entries
                     xda_new = merge_arrays(xds_existing[feature], xds[feature])
-                    xda_new.to_zarr(fout, group='original', mode='w')
+                    xda_new.to_zarr(fout, group=group, mode='w')

{tonik-0.1.2 → tonik-0.1.4}/tests/backend_speed_test.py RENAMED Viewed

@@ -18,7 +18,10 @@ def write_read(backend):
     test_dir = tempfile.mkdtemp()
     sg = Storage('speed_test', test_dir, starttime=tstart, endtime=tend,
                  backend=backend)
-    sg.save(spec)
+    kwargs = {}
+    if backend == 'netcdf':
+        kwargs['archive_starttime'] = tstart
+    sg.save(spec, **kwargs)
     spec_test = sg('ssam')

{tonik-0.1.2 → tonik-0.1.4}/tests/test_group.py RENAMED Viewed

@@ -27,6 +27,35 @@ def test_group(tmp_path_factory):
     c = g.get_substore('MDR1', '00', 'HHZ')
+def test_subgroup(tmp_path_factory):
+    """
+    Test storing data in different subgroups in netcdf and zarr.
+    """
+    startdate = datetime(2016, 1, 1)
+    enddate = datetime(2016, 1, 2, 12)
+    rootdir = tmp_path_factory.mktemp('data')
+    g = Storage('volcanoes', rootdir=rootdir,
+                starttime=startdate, endtime=enddate)
+    xdf = generate_test_data(dim=1, ndays=20, tstart=startdate)
+    g.save(xdf)
+    xdf.rsam.values += 100.
+    g.save(xdf, group='modified')
+    rsam_original = g('rsam')
+    rsam_modified = g('rsam', group='modified')
+    assert int(rsam_modified.mean()) == (int(rsam_original.mean()) + 100)
+    g = Storage('volcanoes', rootdir=rootdir,
+                starttime=startdate, endtime=enddate,
+                backend='zarr')
+    xdf = generate_test_data(dim=1, ndays=20, tstart=startdate)
+    g.save(xdf)
+    xdf.rsam.values += 100.
+    g.save(xdf, group='modified')
+    rsam_original = g('rsam')
+    rsam_modified = g('rsam', group='modified')
+    assert int(rsam_modified.mean()) == (int(rsam_original.mean()) + 100)
 def test_non_existant_feature(tmp_path_factory):
     rootdir = tmp_path_factory.mktemp('data')
     g = Storage('test_experiment', rootdir)
@@ -108,34 +137,18 @@ def test_call_single_day(tmp_path_factory):
     assert pd.to_datetime(enddate) == last_time
-def test_rolling_window(tmp_path_factory):
+def test_call_single_datapoint(tmp_path_factory):
     rootdir = tmp_path_factory.mktemp('data')
-    startdate = datetime(2016, 1, 1)
-    enddate = datetime(2016, 1, 2, 12)
-    xdf = generate_test_data(dim=1, ndays=20, tstart=startdate)
     g = Storage('volcanoes', rootdir=rootdir)
-    g.save(xdf)
-    stack_len_seconds = 3600
-    stack_len_string = '1h'
-    num_windows = int(stack_len_seconds / pd.Timedelta(xdf.interval).seconds)
+    startdate = datetime(2016, 1, 2, 1)
+    enddate = startdate
     g.starttime = startdate
     g.endtime = enddate
+    xdf = generate_test_data(dim=1, tstart=startdate)
+    g.save(xdf)
     rsam = g('rsam')
-    rsam_rolling = g('rsam', stack_length=stack_len_string)
-    # Check correct datetime array
-    np.testing.assert_array_equal(rsam.datetime.values,
-                                  rsam_rolling.datetime.values)
-    # Check correct values
-    rolling_mean = [
-        np.nanmean(rsam.data[(ind-num_windows+1):ind+1])
-        for ind in np.arange(num_windows, len(rsam_rolling.data))
-    ]
-    np.testing.assert_array_almost_equal(
-        np.array(rolling_mean), rsam_rolling.values[num_windows:], 6
-    )
+    assert float(
+        xdf.rsam.loc[dict(datetime='2016-01-02T01:00:00')]) == float(rsam)
 def test_shape(tmp_path_factory):

{tonik-0.1.2 → tonik-0.1.4}/tests/test_save.py RENAMED Viewed

@@ -85,6 +85,8 @@ def test_xarray2netcdf_resolution(tmp_path_factory):
     xdf_test = c('rsam')
     assert xdf_test.loc['2022-07-18T00:12:00'] == xdf['rsam'].loc['2022-07-18T00:10:00']
     assert np.isnan(xdf_test.loc['2022-07-18T00:06:00'].data)
+    assert xdf_test.attrs['resolution'] == 0.1
+    assert xdf_test.attrs['resolution_units'] == 'h'
 def test_xarray2netcdf_with_gaps(tmp_path_factory):

{tonik-0.1.2 → tonik-0.1.4}/.devcontainer/devcontainer.json RENAMED Viewed

File without changes

{tonik-0.1.2 → tonik-0.1.4}/.gitignore RENAMED Viewed

File without changes

{tonik-0.1.2 → tonik-0.1.4}/HOW_TO_RELEASE.md RENAMED Viewed

File without changes

{tonik-0.1.2 → tonik-0.1.4}/LICENSE RENAMED Viewed

File without changes

{tonik-0.1.2 → tonik-0.1.4}/README.md RENAMED Viewed

File without changes

{tonik-0.1.2 → tonik-0.1.4}/docs/index.md RENAMED Viewed

File without changes

{tonik-0.1.2 → tonik-0.1.4}/docs/tonik_example.ipynb RENAMED Viewed

File without changes

{tonik-0.1.2 → tonik-0.1.4}/mkdocs.yml RENAMED Viewed

File without changes

{tonik-0.1.2 → tonik-0.1.4}/src/tonik/__init__.py RENAMED Viewed

File without changes

{tonik-0.1.2 → tonik-0.1.4}/src/tonik/api.py RENAMED Viewed

File without changes

{tonik-0.1.2 → tonik-0.1.4}/src/tonik/package_data/index.html RENAMED Viewed

File without changes

{tonik-0.1.2 → tonik-0.1.4}/src/tonik/utils.py RENAMED Viewed

File without changes

{tonik-0.1.2 → tonik-0.1.4}/tests/conftest.py RENAMED Viewed

File without changes

{tonik-0.1.2 → tonik-0.1.4}/tests/test_api.py RENAMED Viewed

File without changes

tonik 0.1.2__tar.gz → 0.1.4__tar.gz

tonik 0.1.2tar.gz → 0.1.4tar.gz