PyPI - tonik - Versions diffs - 0.0.7__py3-none-any.whl → 0.0.8__py3-none-any.whl - Mend

tonik 0.0.7py3-none-any.whl → 0.0.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

tonik/__init__.py +2 -2
tonik/api.py +19 -9
tonik/storage.py +52 -44
tonik/utils.py +14 -8
tonik/xarray2zarr.py +23 -0
{tonik-0.0.7.dist-info → tonik-0.0.8.dist-info}/METADATA +3 -2
tonik-0.0.8.dist-info/RECORD +12 -0
{tonik-0.0.7.dist-info → tonik-0.0.8.dist-info}/WHEEL +1 -1
tonik-0.0.7.dist-info/RECORD +0 -11
{tonik-0.0.7.dist-info → tonik-0.0.8.dist-info}/entry_points.txt +0 -0
{tonik-0.0.7.dist-info → tonik-0.0.8.dist-info}/licenses/LICENSE +0 -0

tonik/__init__.py CHANGED Viewed

@@ -2,7 +2,7 @@ import importlib
 from os import PathLike
 from typing import Optional
-from .storage import StorageGroup, Path
+from .storage import Storage, Path
 from .utils import generate_test_data
@@ -20,4 +20,4 @@ def get_data(filename: Optional[PathLike] = None) -> str:
     """
     f = importlib.resources.files(__package__)
-    return str(f) if filename is None else str(f / filename)
+    return str(f) if filename is None else str(f / filename)

tonik/api.py CHANGED Viewed

@@ -15,7 +15,7 @@ from fastapi.responses import HTMLResponse, StreamingResponse
 from pydantic import BaseModel
 from typing import Annotated
-from .storage import StorageGroup
+from .storage import Storage
 from . import get_data
 logger = logging.getLogger(__name__)
@@ -66,12 +66,12 @@ class TonikAPI:
                 subdir: Annotated[list[str] | None, Query()] = None):
         _st = self.preprocess_datetime(starttime)
         _et = self.preprocess_datetime(endtime)
-        g = StorageGroup(group, rootdir=self.rootdir,
-                         starttime=_st, endtime=_et)
+        g = Storage(group, rootdir=self.rootdir,
+                    starttime=_st, endtime=_et)
         if subdir is None:
             c = g
         else:
-            c = g.get_store(*subdir)
+            c = g.get_substore(*subdir)
         try:
             feat = c(name)
         except ValueError as e:
@@ -137,11 +137,21 @@ class TonikAPI:
             d, units='hours since 1970-01-01 00:00:00.0', calendar='gregorian')
         return freq, dates, spec
-    def inventory(self, group: str) -> dict:
-        sg = StorageGroup(group, rootdir=self.rootdir)
-        return sg.to_dict()
-# ta = TonikAPI('/tmp').feature()
+    def inventory(self, group: str, subdir: Annotated[list[str] | None, Query()] = None, tree: bool = True) -> list | dict:
+        sg = Storage(group, rootdir=self.rootdir, create=False)
+        try:
+            c = sg.get_substore(*subdir)
+        except TypeError:
+            c = sg
+        except FileNotFoundError as e:
+            msg = "Directory {} not found.".format(
+                '/'.join([sg.path] + subdir))
+            raise HTTPException(status_code=404, detail=msg)
+        if tree and not subdir:
+            return sg.to_dict()
+        else:
+            dir_contents = os.listdir(c.path)
+            return [fn.replace('.nc', '').replace('.zarr', '') for fn in dir_contents]
 def main(argv=None):

tonik/storage.py CHANGED Viewed

@@ -1,19 +1,13 @@
-from datetime import datetime, timedelta
-import json
-import glob
 import logging
 import logging.config
 import os
 import re
-import tempfile
 import pandas as pd
 import xarray as xr
 from .xarray2hdf5 import xarray2hdf5
-ERROR_LOG_FILENAME = "tonik.log"
+from .xarray2zarr import xarray2zarr
 LOGGING_CONFIG = {
     "version": 1,
@@ -51,14 +45,6 @@ LOGGING_CONFIG = {
         },
     },
     "handlers": {
-        "logfile": {  # The handler name
-            "formatter": "json",  # Refer to the formatter defined above
-            "level": "ERROR",  # FILTER: Only ERROR and CRITICAL logs
-            "class": "logging.handlers.RotatingFileHandler",  # OUTPUT: Which class to use
-            # Param for class above. Defines filename to use, load it from constant
-            "filename": ERROR_LOG_FILENAME,
-            "backupCount": 2,  # Param for class above. Defines how many log files to keep as it grows
-        },
         "simple": {  # The handler name
             "formatter": "default",  # Refer to the formatter defined above
             "class": "logging.StreamHandler",  # OUTPUT: Same as above, stream to console
@@ -66,7 +52,7 @@ LOGGING_CONFIG = {
         },
     },
     "loggers": {
-        "zizou": {  # The name of the logger, this SHOULD match your module!
+        "storage": {  # The name of the logger, this SHOULD match your module!
             "level": "DEBUG",  # FILTER: only INFO logs onwards from "tryceratops" logger
             "handlers": [
                 "simple",  # Refer the handler defined above
@@ -74,9 +60,9 @@ LOGGING_CONFIG = {
         },
     },
     "root": {
-        "level": "ERROR",  # FILTER: only INFO logs onwards
+        "level": "INFO",  # FILTER: only INFO logs onwards
         "handlers": [
-            "logfile",  # Refer the handler defined above
+            "simple",  # Refer the handler defined above
         ]
     },
 }
@@ -86,13 +72,19 @@ logger = logging.getLogger("__name__")
 class Path(object):
-    def __init__(self, name, parentdir):
+    def __init__(self, name, parentdir, create=True, backend='zarr'):
         self.name = name
+        self.create = create
+        self.backend = backend
         self.path = os.path.join(parentdir, name)
-        try:
-            os.makedirs(self.path, exist_ok=True)
-        except FileExistsError:
-            pass
+        if create:
+            try:
+                os.makedirs(self.path, exist_ok=True)
+            except FileExistsError:
+                pass
+        else:
+            if not os.path.exists(self.path):
+                raise FileNotFoundError(f"Path {self.path} not found")
         self.children = {}
     def __str__(self):
@@ -104,14 +96,20 @@ class Path(object):
         try:
             return self.children[key]
         except KeyError:
-            self.children[key] = Path(key, self.path)
+            self.children[key] = Path(
+                key, self.path, self.create, self.backend)
             return self.children[key]
     def feature_path(self, feature):
-        _feature_path = os.path.join(self.path, feature + ".nc")
+        if self.backend == 'h5netcdf':
+            file_ending = '.nc'
+        elif self.backend == 'zarr':
+            file_ending = '.zarr'
+        _feature_path = os.path.join(self.path, feature + file_ending)
         if not os.path.exists(_feature_path):
             raise FileNotFoundError(f"File {_feature_path} not found")
-        self.children[feature] = Path(feature + ".nc", self.path)
+            self.children[feature] = Path(feature + file_ending, self.path)
         return _feature_path
     def __call__(self, feature, stack_length=None, interval='10min'):
@@ -134,8 +132,7 @@ class Path(object):
         num_periods = None
         if stack_length is not None:
             valid_stack_units = ['W', 'D', 'h', 'T', 'min', 'S']
-            if not re.match(r'\d*\s*(\w*)', stack_length).group(1)\
-                    in valid_stack_units:
+            if re.match(r'\d*\s*(\w*)', stack_length).group(1) not in valid_stack_units:
                 raise ValueError(
                     'Stack length should be one of: {}'.
                     format(', '.join(valid_stack_units))
@@ -157,9 +154,12 @@ class Path(object):
                     format(stack_length, interval, num_periods))
         xd_index = dict(datetime=slice(self.starttime, self.endtime))
-        with xr.open_dataset(filename, group='original', engine='h5netcdf') as ds:
-            ds.sortby("datetime")
-            rq = ds.loc[xd_index].load()
+        with xr.open_dataset(filename, group='original', engine=self.backend) as ds:
+            try:
+                rq = ds.loc[xd_index].load()
+            except KeyError:
+                ds = ds.sortby("datetime")
+                rq = ds.loc[xd_index].load()
         # Stack features
         if stack_length is not None:
@@ -191,10 +191,13 @@ class Path(object):
         """
         Save a feature to disk
         """
-        xarray2hdf5(data, self.path, **kwargs)
+        if self.backend == 'h5netcdf':
+            xarray2hdf5(data, self.path, **kwargs)
+        elif self.backend == 'zarr':
+            xarray2zarr(data, self.path, **kwargs)
-class StorageGroup(Path):
+class Storage(Path):
     """
     Query computed features
@@ -206,7 +209,7 @@ class StorageGroup(Path):
     :type endtime: :class:`datetime.datetime`
     >>> import datetime
-    >>> g = Group('Whakaari')
+    >>> g = Storage('Whakaari', /tmp)
     >>> start = datetime.datetime(2012,1,1,0,0,0)
     >>> end = datetime.datetime(2012,1,2,23,59,59)
     >>> g.starttime = start
@@ -215,11 +218,11 @@ class StorageGroup(Path):
     >>> rsam = c("rsam")
     """
-    def __init__(self, name, rootdir=None, starttime=None, endtime=None):
+    def __init__(self, name, rootdir, starttime=None, endtime=None, create=True, backend='zarr'):
         self.stores = set()
         self.starttime = starttime
         self.endtime = endtime
-        super().__init__(name, rootdir)
+        super().__init__(name, rootdir, create, backend)
     def print_tree(self, site, indent=0, output=''):
         output += ' ' * indent + site.path + '\n'
@@ -232,7 +235,7 @@ class StorageGroup(Path):
         rstr = self.print_tree(self, 0, rstr)
         return rstr
-    def get_store(self, *args):
+    def get_substore(self, *args):
         # return the store for a given site, sensor, or channel
         # if one of them is None return the store for the level above
         # if all are None return the root store
@@ -257,30 +260,35 @@ class StorageGroup(Path):
                 try:
                     subdirs = root.split(self.path)[1].split(os.sep)[1:]
                 except IndexError:
-                    st = self.get_store()
+                    st = self.get_substore()
                 else:
                     try:
-                        st = self.get_store(*subdirs)
+                        st = self.get_substore(*subdirs)
                     except TypeError as e:
                         raise e
                 for _f in files:
                     if _f.endswith('.nc'):
-                        st.feature_path(_f.replace('.nc', ''))
+                        st.feature_path(_f.replace(
+                            '.nc', '').replace('.zarr', ''))
     @staticmethod
     def directory_tree_to_dict(path):
         name = os.path.basename(path)
-        if os.path.isdir(path):
-            return {name: [StorageGroup.directory_tree_to_dict(os.path.join(path, child)) for child in sorted(os.listdir(path))]}
+        if name.endswith('.zarr'):
+            return name.replace('.zarr', '')
+        elif os.path.isdir(path):
+            return {name: [Storage.directory_tree_to_dict(os.path.join(path, child)) for child in sorted(os.listdir(path))]}
         else:
-            if path.endswith('.nc'):
+            if name.endswith('.nc'):
                 return name.replace('.nc', '')
+            else:
+                return
     def to_dict(self):
         """
         Convert the storage group to json
         """
-        return StorageGroup.directory_tree_to_dict(self.path)
+        return Storage.directory_tree_to_dict(self.path)
     def get_starttime(self):
         return self.__starttime

tonik/utils.py CHANGED Viewed

@@ -6,8 +6,9 @@ import xarray as xr
 def generate_test_data(dim=1, ndays=30, nfreqs=10,
-                       tstart=datetime.utcnow(),
-                       feature_name=None,
+                       tstart=datetime.now(),
+                       freq='10min', intervals=None,
+                       feature_name=None, seed=42,
                        freq_name=None, add_nans=True):
     """
     Generate a 1D or 2D feature for testing.
@@ -15,9 +16,12 @@ def generate_test_data(dim=1, ndays=30, nfreqs=10,
     assert dim < 3
     assert dim > 0
-    nints = ndays * 6 * 24
-    dates = pd.date_range(tstart.strftime('%Y-%m-%d'), freq='10min', periods=nints)
-    rs = np.random.default_rng(42)
+    if intervals is None:
+        nints = ndays * 6 * 24
+    else:
+        nints = intervals
+    dates = pd.date_range(tstart, freq=freq, periods=nints)
+    rs = np.random.default_rng(seed)
     # Random walk as test signal
     data = np.abs(np.cumsum(rs.normal(0, 8., len(dates))))
     if dim == 2:
@@ -29,7 +33,8 @@ def generate_test_data(dim=1, ndays=30, nfreqs=10,
             data[idx_nan] = np.nan
         if feature_name is None:
             feature_name = 'rsam'
-        xrd = xr.Dataset({feature_name: xr.DataArray(data, coords=[dates], dims=['datetime'])})
+        xrd = xr.Dataset({feature_name: xr.DataArray(
+            data, coords=[dates], dims=['datetime'])})
     if dim == 2:
         if add_nans:
             data[:, idx_nan] = np.nan
@@ -38,9 +43,10 @@ def generate_test_data(dim=1, ndays=30, nfreqs=10,
             feature_name = 'ssam'
         if freq_name is None:
             freq_name = 'frequency'
-        xrd = xr.Dataset({feature_name: xr.DataArray(data, coords=[freqs, dates], dims=[freq_name, 'datetime'])})
+        xrd = xr.Dataset({feature_name: xr.DataArray(
+            data, coords=[freqs, dates], dims=[freq_name, 'datetime'])})
     xrd.attrs['starttime'] = dates[0].isoformat()
     xrd.attrs['endtime'] = dates[-1].isoformat()
     xrd.attrs['station'] = 'MDR'
     xrd.attrs['interval'] = '10min'
-    return xrd
+    return xrd

tonik/xarray2zarr.py ADDED Viewed

@@ -0,0 +1,23 @@
+import os
+import xarray as xr
+def xarray2zarr(xds, path, mode='a'):
+    for feature in xds.data_vars.keys():
+        fout = os.path.join(path, feature + '.zarr')
+        if not os.path.exists(fout) or mode == 'w':
+            xds[feature].to_zarr(
+                fout, group='original', mode='w')
+        else:
+            xds_existing = xr.open_zarr(fout, group='original')
+            overlap = xds_existing.datetime.where(
+                xds_existing.datetime == xds.datetime)
+            if overlap.size > 0:
+                xds.loc[dict(datetime=overlap)].to_zarr(
+                    fout, group='original', mode='r+', region='auto')
+                xds.drop_sel(datetime=overlap).to_zarr(
+                    fout, group='original', mode='a', append_dim="datetime")
+            else:
+                xds[feature].to_zarr(
+                    fout, group='original', append_dim='datetime')

{tonik-0.0.7.dist-info → tonik-0.0.8.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.3
+Metadata-Version: 2.1
 Name: tonik
-Version: 0.0.7
+Version: 0.0.8
 Summary: Store time series data as HDF5 files and access them through an API.
 Project-URL: Homepage, https://tsc-tools.github.io/tonik
 Project-URL: Issues, https://github.com/tsc-tools/tonik/issues
@@ -20,6 +20,7 @@ Requires-Dist: pandas>=2.0
 Requires-Dist: python-json-logger>=2.0
 Requires-Dist: uvicorn[standard]>=0.22
 Requires-Dist: xarray>=2023.4
+Requires-Dist: zarr
 Provides-Extra: dev
 Requires-Dist: mkdocs; extra == 'dev'
 Requires-Dist: mkdocs-jupyter; extra == 'dev'

tonik-0.0.8.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,12 @@
+tonik/__init__.py,sha256=ZBVGh4dm_l9xwiBGb33O5QV9MfZeNiEd3DBDAm6DiHk,511
+tonik/api.py,sha256=vdsWHNGGWo4sbqlDyZQj2tX5oe6hAWCzyL6ffsfpCB4,6437
+tonik/storage.py,sha256=sScIFA4KXURNPwTnV-rvDh6cWCy9sRrErr9BshZpw2I,11303
+tonik/utils.py,sha256=YD2zZx5nKGfTJKTYTsEZVV78uNRCSakvU_6X6Mgwx-s,1664
+tonik/xarray2hdf5.py,sha256=cekO9vo9ZRlr0VndswJjPC27CEVD3TpRVKLAJ-aAO0g,4465
+tonik/xarray2zarr.py,sha256=d7FAOe7DESbKC9CZS41r62DjlNy0S8ik01lMGXBvJKw,901
+tonik/package_data/index.html,sha256=GKDClUhIam_fAYbNfzAolORhSCG3ae1wW3VjWCg4PMk,2732
+tonik-0.0.8.dist-info/METADATA,sha256=uqrHvBl01n05hqWN8lnNETuoYIGZ8vNq9QWnRmajcEY,1938
+tonik-0.0.8.dist-info/WHEEL,sha256=KGYbc1zXlYddvwxnNty23BeaKzh7YuoSIvIMO4jEhvw,87
+tonik-0.0.8.dist-info/entry_points.txt,sha256=VnGfC5qAzpntEHAb5pooUEpYABSgOfQoNhCEtLDJyf8,45
+tonik-0.0.8.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
+tonik-0.0.8.dist-info/RECORD,,

{tonik-0.0.7.dist-info → tonik-0.0.8.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,4 @@
 Wheel-Version: 1.0
-Generator: hatchling 1.25.0
+Generator: hatchling 1.17.1
 Root-Is-Purelib: true
 Tag: py3-none-any

tonik-0.0.7.dist-info/RECORD DELETED Viewed

@@ -1,11 +0,0 @@
-tonik/__init__.py,sha256=p97Bbz-yujI-uNmbqn1S61lq-zfF1VPaS5c1fxs1Fa8,516
-tonik/api.py,sha256=gnwoss7UV8FaY92xzumhcoVPjkzB695qgByHUYcLSw4,5916
-tonik/storage.py,sha256=pJnvoGFb8uZqnpkjOsgnntW-a7dhKVlvevs725nAS54,11009
-tonik/utils.py,sha256=nV0lK8Azasr8LUuQGXxfxef6nU3bn3dCTQnQTmWsKAY,1534
-tonik/xarray2hdf5.py,sha256=cekO9vo9ZRlr0VndswJjPC27CEVD3TpRVKLAJ-aAO0g,4465
-tonik/package_data/index.html,sha256=GKDClUhIam_fAYbNfzAolORhSCG3ae1wW3VjWCg4PMk,2732
-tonik-0.0.7.dist-info/METADATA,sha256=6DhYEfnEAWSKLEZJQQRiRF_cZAGAQFK6mLmHQEYJbuE,1918
-tonik-0.0.7.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
-tonik-0.0.7.dist-info/entry_points.txt,sha256=VnGfC5qAzpntEHAb5pooUEpYABSgOfQoNhCEtLDJyf8,45
-tonik-0.0.7.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
-tonik-0.0.7.dist-info/RECORD,,

{tonik-0.0.7.dist-info → tonik-0.0.8.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{tonik-0.0.7.dist-info → tonik-0.0.8.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

tonik 0.0.7__py3-none-any.whl → 0.0.8__py3-none-any.whl

tonik 0.0.7py3-none-any.whl → 0.0.8py3-none-any.whl