PyPI - tonik - Versions diffs - 0.0.6__tar.gz → 0.0.7__tar.gz - Mend

tonik 0.0.6tar.gz → 0.0.7tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

{tonik-0.0.6 → tonik-0.0.7}/.gitignore RENAMED Viewed

@@ -4,4 +4,5 @@ dist/
 tonik.log
 .vscode
 *.nfs*
-.gitignore
+.gitignore
+site/

{tonik-0.0.6 → tonik-0.0.7}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: tonik
-Version: 0.0.6
+Version: 0.0.7
 Summary: Store time series data as HDF5 files and access them through an API.
 Project-URL: Homepage, https://tsc-tools.github.io/tonik
 Project-URL: Issues, https://github.com/tsc-tools/tonik/issues

{tonik-0.0.6 → tonik-0.0.7}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "tonik"
-version = "0.0.6"
+version = "0.0.7"
 authors = [
   { name="Yannik Behr", email="y.behr@gns.cri.nz" },
   { name="Christof Mueller", email="c.mueller@gns.cri.nz" }

{tonik-0.0.6 → tonik-0.0.7}/src/tonik/api.py RENAMED Viewed

@@ -20,12 +20,12 @@ from . import get_data
 logger = logging.getLogger(__name__)
 class TonikAPI:
     def __init__(self, rootdir) -> None:
         self.rootdir = rootdir
-        self.app = FastAPI()
+        self.app = FastAPI()
         # -- allow any origin to query API
         self.app.add_middleware(CORSMiddleware,
@@ -55,20 +55,23 @@ class TonikAPI:
         return dt
     def feature(self,
-                group: str='Ruapehu',
-                name: str='rsam',
-                starttime: str=None,
-                endtime: str=None,
-                resolution: str='full',
-                verticalres: int=10,
-                log: bool=False,
-                normalise: bool=False,
-                subdir: Annotated[list[str] | None, Query()]=None):
+                group: str,
+                name: str,
+                starttime: str = None,
+                endtime: str = None,
+                resolution: str = 'full',
+                verticalres: int = 10,
+                log: bool = False,
+                normalise: bool = False,
+                subdir: Annotated[list[str] | None, Query()] = None):
         _st = self.preprocess_datetime(starttime)
         _et = self.preprocess_datetime(endtime)
         g = StorageGroup(group, rootdir=self.rootdir,
-                        starttime=_st, endtime=_et)
-        c = g.get_store(*subdir)
+                         starttime=_st, endtime=_et)
+        if subdir is None:
+            c = g
+        else:
+            c = g.get_store(*subdir)
         try:
             feat = c(name)
         except ValueError as e:
@@ -80,7 +83,8 @@ class TonikAPI:
             nfreqs = feat.shape[0]
             dates = feat.coords[feat.dims[1]].values
             if resolution != 'full':
-                freq, dates, spec = self.aggregate_feature(resolution, verticalres, feat, nfreqs, dates)
+                freq, dates, spec = self.aggregate_feature(
+                    resolution, verticalres, feat, nfreqs, dates)
             else:
                 spec = feat.values
                 freq = feat.coords[feat.dims[0]].values
@@ -88,44 +92,50 @@ class TonikAPI:
             if log and feat.name != 'sonogram':
                 vals = 10*np.log10(vals)
             if normalise:
-                vals = (vals - np.nanmin(vals))/(np.nanmax(vals) - np.nanmin(vals))
+                vals = (vals - np.nanmin(vals)) / \
+                    (np.nanmax(vals) - np.nanmin(vals))
             freqs = freq.repeat(dates.size)
             dates = np.tile(dates, freq.size)
-            df = pd.DataFrame({'dates': dates, 'freqs': freqs, 'feature': vals})
+            df = pd.DataFrame(
+                {'dates': dates, 'freqs': freqs, 'feature': vals})
             output = df.to_csv(index=False,
-                            columns=['dates', 'freqs', 'feature'])
+                               columns=['dates', 'freqs', 'feature'])
         else:
             df = pd.DataFrame(data=feat.to_pandas(), columns=[feat.name])
             df['dates'] = df.index
             try:
-                df = df.resample(str(float(resolution)/60000.0)+'T').mean()
+                current_resolution = pd.Timedelta(df['dates'].diff().mean())
+                if current_resolution < pd.Timedelta(resolution):
+                    df = df.resample(pd.Timedelta(resolution)).mean()
             except ValueError as e:
-                logger.warning(f"Cannot resample {feat.name} to {resolution}: e")
+                logger.warning(
+                    f"Cannot resample {feat.name} to {resolution}: e")
             df.rename(columns={feat.name: 'feature'}, inplace=True)
             output = df.to_csv(index=False, columns=['dates', 'feature'])
         return StreamingResponse(iter([output]),
-                                media_type='text/csv',
-                                headers={"Content-Disposition":
-                                        "attachment;filename=<VUMT_feature>.csv",
-                                        'Content-Length': str(len(output))})
+                                 media_type='text/csv',
+                                 headers={"Content-Disposition":
+                                          "attachment;filename=<tonik_feature>.csv",
+                                          'Content-Length': str(len(output))})
     def aggregate_feature(self, resolution, verticalres, feat, nfreqs, dates):
-        resolution = np.timedelta64(pd.Timedelta(resolution), 'ms').astype(float)
+        resolution = np.timedelta64(
+            pd.Timedelta(resolution), 'ms').astype(float)
         ndays = np.timedelta64(dates[-1] - dates[0], 'ms').astype(float)
-        canvas_x =  int(ndays/resolution)
+        canvas_x = int(ndays/resolution)
         canvas_y = min(nfreqs, verticalres)
         dates = date2num(dates.astype('datetime64[us]').astype(datetime),
-                                units='hours since 1970-01-01 00:00:00.0',
-                                calendar='gregorian')
+                         units='hours since 1970-01-01 00:00:00.0',
+                         calendar='gregorian')
         feat = feat.assign_coords({'datetime': dates})
         cvs = dsh.Canvas(plot_width=canvas_x,
-                                plot_height=canvas_y)
+                         plot_height=canvas_y)
         agg = cvs.raster(source=feat)
         freq_dim = feat.dims[0]
         freq, d, spec = agg.coords[freq_dim].values, agg.coords['datetime'].values, agg.data
-        dates = num2date(d, units='hours since 1970-01-01 00:00:00.0', calendar='gregorian')
-        return freq,dates,spec
+        dates = num2date(
+            d, units='hours since 1970-01-01 00:00:00.0', calendar='gregorian')
+        return freq, dates, spec
     def inventory(self, group: str) -> dict:
         sg = StorageGroup(group, rootdir=self.rootdir)
@@ -133,6 +143,7 @@ class TonikAPI:
 # ta = TonikAPI('/tmp').feature()
 def main(argv=None):
     parser = ArgumentParser()
     parser.add_argument("--rootdir", default='/tmp')
@@ -140,5 +151,6 @@ def main(argv=None):
     ta = TonikAPI(args.rootdir)
     uvicorn.run(ta.app, host="0.0.0.0", port=8003)
 if __name__ == "__main__":
     main()

{tonik-0.0.6 → tonik-0.0.7}/src/tonik/storage.py RENAMED Viewed

@@ -18,13 +18,14 @@ ERROR_LOG_FILENAME = "tonik.log"
 LOGGING_CONFIG = {
     "version": 1,
     "disable_existing_loggers": False,
-    "formatters": {
+    "formatters": {
         "default": {  # The formatter name, it can be anything that I wish
-            "format": "%(asctime)s:%(name)s:%(process)d:%(lineno)d " "%(levelname)s %(message)s",  #  What to add in the message
+            # What to add in the message
+            "format": "%(asctime)s:%(name)s:%(process)d:%(lineno)d " "%(levelname)s %(message)s",
             "datefmt": "%Y-%m-%d %H:%M:%S",  # How to display dates
         },
         "json": {  # The formatter name
-         "()": "pythonjsonlogger.jsonlogger.JsonFormatter",  # The class to instantiate!
+            "()": "pythonjsonlogger.jsonlogger.JsonFormatter",  # The class to instantiate!
             # Json is more complex, but easier to read, display all attributes!
             "format": """
                     asctime: %(asctime)s
@@ -48,22 +49,23 @@ LOGGING_CONFIG = {
                 """,
             "datefmt": "%Y-%m-%d %H:%M:%S",  # How to display dates
         },
-    },
+    },
     "handlers": {
         "logfile": {  # The handler name
             "formatter": "json",  # Refer to the formatter defined above
             "level": "ERROR",  # FILTER: Only ERROR and CRITICAL logs
             "class": "logging.handlers.RotatingFileHandler",  # OUTPUT: Which class to use
-            "filename": ERROR_LOG_FILENAME,  # Param for class above. Defines filename to use, load it from constant
+            # Param for class above. Defines filename to use, load it from constant
+            "filename": ERROR_LOG_FILENAME,
             "backupCount": 2,  # Param for class above. Defines how many log files to keep as it grows
-        },
+        },
         "simple": {  # The handler name
             "formatter": "default",  # Refer to the formatter defined above
             "class": "logging.StreamHandler",  # OUTPUT: Same as above, stream to console
             "stream": "ext://sys.stdout",
         },
     },
-    "loggers": {
+    "loggers": {
         "zizou": {  # The name of the logger, this SHOULD match your module!
             "level": "DEBUG",  # FILTER: only INFO logs onwards from "tryceratops" logger
             "handlers": [
@@ -92,10 +94,10 @@ class Path(object):
         except FileExistsError:
             pass
         self.children = {}
     def __str__(self):
         return self.path
     def __getitem__(self, key):
         if key is None:
             raise ValueError("Key cannot be None")
@@ -125,18 +127,18 @@ class Path(object):
         if self.endtime <= self.starttime:
             raise ValueError('Startime has to be smaller than endtime.')
-        feature = feature.lower()
         filename = self.feature_path(feature)
-        logger.debug(f"Reading feature {feature} between {self.starttime} and {self.endtime}")
+        logger.debug(
+            f"Reading feature {feature} between {self.starttime} and {self.endtime}")
         num_periods = None
         if stack_length is not None:
             valid_stack_units = ['W', 'D', 'h', 'T', 'min', 'S']
             if not re.match(r'\d*\s*(\w*)', stack_length).group(1)\
-                   in valid_stack_units:
+                    in valid_stack_units:
                 raise ValueError(
                     'Stack length should be one of: {}'.
-                        format(', '.join(valid_stack_units))
+                    format(', '.join(valid_stack_units))
                 )
             if pd.to_timedelta(stack_length) < pd.to_timedelta(interval):
@@ -146,13 +148,13 @@ class Path(object):
             # Rewind starttime to account for stack length
             self.starttime -= pd.to_timedelta(stack_length)
-            num_periods = (pd.to_timedelta(stack_length)/
+            num_periods = (pd.to_timedelta(stack_length) /
                            pd.to_timedelta(interval))
             if not num_periods.is_integer():
                 raise ValueError(
                     'Stack length {} / interval {} = {}, but it needs'
                     ' to be a whole number'.
-                        format(stack_length, interval, num_periods))
+                    format(stack_length, interval, num_periods))
         xd_index = dict(datetime=slice(self.starttime, self.endtime))
         with xr.open_dataset(filename, group='original', engine='h5netcdf') as ds:
@@ -164,8 +166,8 @@ class Path(object):
             logger.debug("Stacking feature...")
             try:
                 xdf = rq[feature].rolling(datetime=int(num_periods),
-                                        center=False,
-                                        min_periods=1).mean()
+                                          center=False,
+                                          min_periods=1).mean()
                 # Return requested timeframe to that defined in initialisation
                 self.starttime += pd.to_timedelta(stack_length)
                 xdf_new = xdf.loc[self.starttime:self.endtime]
@@ -212,12 +214,13 @@ class StorageGroup(Path):
     >>> c = g.channel(site='WIZ', sensor='00', channel='HHZ')
     >>> rsam = c("rsam")
     """
     def __init__(self, name, rootdir=None, starttime=None, endtime=None):
-        self.stores = set()
+        self.stores = set()
         self.starttime = starttime
         self.endtime = endtime
         super().__init__(name, rootdir)
     def print_tree(self, site, indent=0, output=''):
         output += ' ' * indent + site.path + '\n'
         for site in site.children.values():
@@ -243,7 +246,7 @@ class StorageGroup(Path):
         st.starttime = self.starttime
         st.endtime = self.endtime
         self.stores.add(st)
-        return st
+        return st
     def from_directory(self):
         """
@@ -311,7 +314,5 @@ class StorageGroup(Path):
             if s is not self:
                 s.endtime = time
     starttime = property(get_starttime, set_starttime)
     endtime = property(get_endtime, set_endtime)

{tonik-0.0.6 → tonik-0.0.7}/tests/test_api.py RENAMED Viewed

@@ -8,6 +8,27 @@ import pandas as pd
 import pytest
+def test_errors(setup_api):
+    client, l = setup_api
+    params = dict(name='rsam',
+                  subdir=['MDR', '00', 'BHZ'],
+                  starttime=str(l.starttime),
+                  endtime=str(l.endtime))
+    with client.stream("GET", "/feature", params=params) as r:
+        r.read()
+        txt = r.text
+    assert r.status_code == 422
+    params = dict(group='volcanoes',
+                  subdir=['MDR', '00', 'BHZ'],
+                  starttime=str(l.starttime),
+                  endtime=str(l.endtime))
+    with client.stream("GET", "/feature", params=params) as r:
+        r.read()
+        txt = r.text
+    assert r.status_code == 422
 def test_read_1Dfeature(setup_api):
     client, l = setup_api
     params = dict(name='rsam',
@@ -21,7 +42,7 @@ def test_read_1Dfeature(setup_api):
     df = pd.read_csv(StringIO(txt), parse_dates=True, index_col=0)
     np.testing.assert_array_almost_equal(df['feature'].values,
                                          l('rsam').values)
 def test_html_tags(setup_api):
     client, l = setup_api
@@ -36,7 +57,8 @@ def test_html_tags(setup_api):
     df = pd.read_csv(StringIO(txt), parse_dates=True, index_col=0)
     np.testing.assert_array_almost_equal(df['feature'].values,
                                          l('rsam').values)
 def test_read_ssam(setup_api):
     client, l = setup_api
     params = dict(name='ssam',
@@ -63,8 +85,9 @@ def test_read_ssam(setup_api):
         r.read()
         txt = r.text
     df = pd.read_csv(StringIO(txt), parse_dates=True, index_col=0)
-    assert len(np.unique(df.index)) ==  5
-    assert len(np.unique(df['freqs'])) == 8
+    assert len(np.unique(df.index)) == 5
+    assert len(np.unique(df['freqs'])) == 8
 def test_read_filterbank(setup_api):
     client, l = setup_api
@@ -82,6 +105,7 @@ def test_read_filterbank(setup_api):
     np.testing.assert_array_almost_equal(df['feature'].values,
                                          l('filterbank').values.ravel(order='C'))
 def test_log(setup_api):
     client, l = setup_api
     params = dict(name='filterbank',
@@ -115,6 +139,7 @@ def test_autoencoder(setup_api):
     np.testing.assert_array_almost_equal(df['feature'].values,
                                          l('autoencoder').values.ravel(order='C'))
 def test_normalise(setup_api):
     client, l = setup_api
     params = dict(name='sonogram',
@@ -129,46 +154,48 @@ def test_normalise(setup_api):
         r.read()
         txt = r.text
     df = pd.read_csv(StringIO(txt), parse_dates=True, index_col=0)
-    assert np.nanmax(df['feature'].values) ==  1.
+    assert np.nanmax(df['feature'].values) == 1.
     assert np.nanmin(df['feature'].values) == 0.
-@pytest.mark.xfail
 def test_aggregate1DFeature(setup_api):
     client, fq = setup_api
     params = dict(name='rsam',
-                  volcano='Mt Doom',
+                  group='volcanoes',
                   subdir=['MDR', '00', 'BHZ'],
                   starttime=str(fq.starttime),
                   endtime=str(fq.endtime),
-                  resolution=3600000, #given in ms seconds by Grafana (here 1 hr)
+                  # given in ms seconds by Grafana (here 1 hr)
+                  resolution='1D',
                   log=False)
     with client.stream("GET", "/feature", params=params) as r:
         r.read()
         txt = r.text
     df = pd.read_csv(StringIO(txt), parse_dates=True, index_col=0)
-    assert df.index[1].value == 1448933100000000000
-    assert df.index[2].value == 1448936700000000000
+    assert pd.Timedelta(df.index.diff().mean()) > pd.Timedelta('10min')
+    assert pd.Timedelta(df.index.diff().mean()) <= pd.Timedelta('1D')
 def test_inventory(setup_api):
     client, fq = setup_api
     params = dict(group='volcanoes')
     with client.stream("GET", "/inventory", params=params) as r:
         r.read()
-        txt = r.text
+        txt = r.text
     features = sorted(["sonogram", "predom_freq", "ssam", "bandwidth",
                        "filterbank", "central_freq", "rsam", "dsar",
                        "rsam_energy_prop", "autoencoder"])
     result_expected = {"volcanoes": [
-                            {"MDR":[
-                                {"00":[
-                                    {"BHZ": features}
-                                    ]
-                                }
-                                ]
-                            }
-                        ]
-                    }
+        {"MDR": [
+            {"00": [
+                {"BHZ": features}
+            ]
+            }
+        ]
+        }
+    ]
+    }
     result_test = json.loads(txt)
     assert result_test['volcanoes'][1] == result_expected['volcanoes'][0]
@@ -177,5 +204,4 @@ def test_inventory(setup_api):
         txt = r.text
     result_test = json.loads(txt)
     test_features = result_test['volcanoes'][1]['MDR'][0]['00'][0]['BHZ']
-    assert sorted(test_features) == features
+    assert sorted(test_features) == features