PyPI - tonik - Versions diffs - 0.0.5__py3-none-any.whl → 0.0.7__py3-none-any.whl - Mend

tonik 0.0.5py3-none-any.whl → 0.0.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

tonik/api.py +67 -187
tonik/package_data/index.html +11 -17
tonik/storage.py +83 -64
tonik/utils.py +6 -3
tonik/xarray2hdf5.py +40 -31
{tonik-0.0.5.dist-info → tonik-0.0.7.dist-info}/METADATA +2 -2
tonik-0.0.7.dist-info/RECORD +11 -0
tonik-0.0.5.dist-info/RECORD +0 -11
{tonik-0.0.5.dist-info → tonik-0.0.7.dist-info}/WHEEL +0 -0
{tonik-0.0.5.dist-info → tonik-0.0.7.dist-info}/entry_points.txt +0 -0
{tonik-0.0.5.dist-info → tonik-0.0.7.dist-info}/licenses/LICENSE +0 -0

tonik/api.py CHANGED Viewed

@@ -1,25 +1,26 @@
 from argparse import ArgumentParser
-from datetime import timedelta, datetime
+from datetime import timedelta, datetime, timezone
 import logging
 import os
+from urllib.parse import unquote
 from cftime import num2date, date2num
 import datashader as dsh
 import numpy as np
 import pandas as pd
 import uvicorn
-from fastapi import FastAPI, HTTPException
+from fastapi import FastAPI, HTTPException, Query
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import HTMLResponse, StreamingResponse
 from pydantic import BaseModel
-from typing import List
+from typing import Annotated
 from .storage import StorageGroup
 from . import get_data
 logger = logging.getLogger(__name__)
 class TonikAPI:
     def __init__(self, rootdir) -> None:
@@ -32,37 +33,49 @@ class TonikAPI:
         self.app.get("/", response_class=HTMLResponse)(self.root)
         self.app.get("/feature")(self.feature)
+        self.app.get("/inventory")(self.inventory)
-    async def root(self):
+    def root(self):
         with open(get_data("package_data/index.html"), "r", encoding="utf-8") as file:
             html_content = file.read()
         return HTMLResponse(content=html_content, status_code=200)
+    def preprocess_datetime(self, dt):
+        """
+        Convert datetime string to datetime object.
+        """
+        # remove timezone info
+        dt = dt.split('+')[0]
+        # remove 'Z' at the end
+        dt = dt.replace('Z', '')
+        # convert html encoded characters
+        dt = unquote(dt)
+        dt = datetime.fromisoformat(dt)
+        dt = dt.replace(tzinfo=None)
+        return dt
     def feature(self,
-                name: str='rsam',
-                group: str='Ruapehu',
-                site: str='MAVZ',
-                sensor: str='10',
-                channel: str='HHZ',
-                starttime: datetime=datetime.utcnow()-timedelta(days=30),
-                endtime: datetime=datetime.utcnow(),
-                resolution: str='full',
-                verticalres: int=10,
-                log: bool=True,
-                normalise: bool=False):
-        _st = datetime.fromisoformat(str(starttime))
-        _st = _st.replace(tzinfo=None)
-        _et = datetime.fromisoformat(str(endtime))
-        _et = _et.replace(tzinfo=None)
+                group: str,
+                name: str,
+                starttime: str = None,
+                endtime: str = None,
+                resolution: str = 'full',
+                verticalres: int = 10,
+                log: bool = False,
+                normalise: bool = False,
+                subdir: Annotated[list[str] | None, Query()] = None):
+        _st = self.preprocess_datetime(starttime)
+        _et = self.preprocess_datetime(endtime)
         g = StorageGroup(group, rootdir=self.rootdir,
-                        starttime=_st, endtime=_et)
-        c = g.get_store(site=site, sensor=sensor, channel=channel)
+                         starttime=_st, endtime=_et)
+        if subdir is None:
+            c = g
+        else:
+            c = g.get_store(*subdir)
         try:
             feat = c(name)
         except ValueError as e:
-            msg = f"Feature {name} not found in directory {l.sitedir}:"
+            msg = f"Feature {name} not found in directory {c.path}:"
             msg += f"{e}"
             raise HTTPException(status_code=404, detail=msg)
         if len(feat.shape) > 1:
@@ -70,7 +83,8 @@ class TonikAPI:
             nfreqs = feat.shape[0]
             dates = feat.coords[feat.dims[1]].values
             if resolution != 'full':
-                freq, dates, spec = self.aggregate_feature(resolution, verticalres, feat, nfreqs, dates)
+                freq, dates, spec = self.aggregate_feature(
+                    resolution, verticalres, feat, nfreqs, dates)
             else:
                 spec = feat.values
                 freq = feat.coords[feat.dims[0]].values
@@ -78,191 +92,56 @@ class TonikAPI:
             if log and feat.name != 'sonogram':
                 vals = 10*np.log10(vals)
             if normalise:
-                vals = (vals - np.nanmin(vals))/(np.nanmax(vals) - np.nanmin(vals))
+                vals = (vals - np.nanmin(vals)) / \
+                    (np.nanmax(vals) - np.nanmin(vals))
             freqs = freq.repeat(dates.size)
             dates = np.tile(dates, freq.size)
-            df = pd.DataFrame({'dates': dates, 'freqs': freqs, 'feature': vals})
+            df = pd.DataFrame(
+                {'dates': dates, 'freqs': freqs, 'feature': vals})
             output = df.to_csv(index=False,
-                            columns=['dates', 'freqs', 'feature'])
+                               columns=['dates', 'freqs', 'feature'])
         else:
             df = pd.DataFrame(data=feat.to_pandas(), columns=[feat.name])
             df['dates'] = df.index
             try:
-                df = df.resample(str(float(resolution)/60000.0)+'T').mean()
+                current_resolution = pd.Timedelta(df['dates'].diff().mean())
+                if current_resolution < pd.Timedelta(resolution):
+                    df = df.resample(pd.Timedelta(resolution)).mean()
             except ValueError as e:
-                logger.warning(f"Cannot resample {feat.name} to {resolution}: e")
+                logger.warning(
+                    f"Cannot resample {feat.name} to {resolution}: e")
             df.rename(columns={feat.name: 'feature'}, inplace=True)
             output = df.to_csv(index=False, columns=['dates', 'feature'])
         return StreamingResponse(iter([output]),
-                                media_type='text/csv',
-                                headers={"Content-Disposition":
-                                        "attachment;filename=<VUMT_feature>.csv",
-                                        'Content-Length': str(len(output))})
+                                 media_type='text/csv',
+                                 headers={"Content-Disposition":
+                                          "attachment;filename=<tonik_feature>.csv",
+                                          'Content-Length': str(len(output))})
     def aggregate_feature(self, resolution, verticalres, feat, nfreqs, dates):
-        resolution = np.timedelta64(pd.Timedelta(resolution), 'ms').astype(float)
+        resolution = np.timedelta64(
+            pd.Timedelta(resolution), 'ms').astype(float)
         ndays = np.timedelta64(dates[-1] - dates[0], 'ms').astype(float)
-        canvas_x =  int(ndays/resolution)
+        canvas_x = int(ndays/resolution)
         canvas_y = min(nfreqs, verticalres)
         dates = date2num(dates.astype('datetime64[us]').astype(datetime),
-                                units='hours since 1970-01-01 00:00:00.0',
-                                calendar='gregorian')
+                         units='hours since 1970-01-01 00:00:00.0',
+                         calendar='gregorian')
         feat = feat.assign_coords({'datetime': dates})
         cvs = dsh.Canvas(plot_width=canvas_x,
-                                plot_height=canvas_y)
+                         plot_height=canvas_y)
         agg = cvs.raster(source=feat)
         freq_dim = feat.dims[0]
         freq, d, spec = agg.coords[freq_dim].values, agg.coords['datetime'].values, agg.data
-        dates = num2date(d, units='hours since 1970-01-01 00:00:00.0', calendar='gregorian')
-        return freq,dates,spec
-# #pydanticmodel output: Json file
-# class Feature(BaseModel):
-#     name: list
-# class Channel(BaseModel):
-#     name: str
-#     features: List[Feature] = []
-# class Location(BaseModel):
-#     name: str
-#     channels: List[Channel] = []
-# class Station(BaseModel):
-#     name: str
-#     lat: float
-#     lon: float
-#     locations: List[Location] = []
-# class Group(BaseModel):
-#     volcano: str
-#     stations: List[Station] = []
+        dates = num2date(
+            d, units='hours since 1970-01-01 00:00:00.0', calendar='gregorian')
+        return freq, dates, spec
-# def get_pydanticModel(group, station, location, channel, feature_list):
-#     channels_data = {"name": channel, "features": feature_list}
-#     channel_models = []
-#     channel_model = Channel(**channels_data)
-#     channel_models.append(channel_model)
-#     location_data = {"name": location, "channels": channel_models}
-#     location_models = []
-#     location_model = Location(**location_data)
-#     location_models.append(location_model)
-#     stations_data = {"name": station, "lat": "42", "lon": "171",
-#                      "locations": location_models}
-#     station_models = []
-#     station_model = Station(**stations_data)
-#     station_models.append(station_model)
-#     group_model = Group(group=group, stations=station_models)
-#     # Exporting to JSON
-#     json_data = group_model.json()
-#     return json_data
-# write a function that scans LOCKERROOMROOT for
-# available groups, stations, locations, channels, and features
-# and returns a pydantic model
-# def get_available_features():
-#     groups = os.listdir(ROOT)
-#     group_models = []
-#     for group in groups:
-#         stations = os.listdir(os.path.join(LOCKERROOMROOT, group))
-#         station_models = []
-#         for station in stations:
-#             locations = os.listdir(os.path.join(LOCKERROOMROOT, group, station))
-#             location_models = []
-#             for location in locations:
-#                 channels = os.listdir(os.path.join(LOCKERROOMROOT, group, station, location))
-#                 channel_models = []
-#                 for channel in channels:
-#                     features = os.listdir(os.path.join(LOCKERROOMROOT, group, station, location, channel))
-#                     feature_list = []
-#                     for feature in features:
-#                         feature_list.append(feature)
-#                     channel_data = {"name": channel, "features": feature_list}
-#                     channel_model = Channel(**channel_data)
-#                     channel_models.append(channel_model)
-#                 location_data = {"name": location, "channels": channel_models}
-#                 location_model = Location(**location_data)
-#                 location_models.append(location_model)
-#             station_data = {"name": station, "lat": "42", "lon": "171", "locations": location_models}
-#             station_model = Station(**station_data)
-#             station_models.append(station_model)
-#         group_data = {"volcano": group, "stations": station_models}
-#         group_model = Group(**group_data)
-#         group_models.append(group_model)
-#     return group_models
-# @app.get("/featureEndpoint")
-# def featureEndpoint(group: str="all", station: str="all", channel: str="all",
-#                     type: str="all"):
-#     groups = vm.get_available_volcanoes()
-#     station_model_list = []
-#     channel_model_list = []
-#     volcano_model_list = []
-#     for _volcano in volcanoes:
-#         streams = vm.get_available_streams(_volcano)
-#         for _stream in streams:
-#             _, _station, _, _channel = _stream.split('.')
-#             stream_dir = os.path.join(FEATUREDIR, _volcano, _station, _channel)
-#             try:
-#                 feature_list = os.listdir(stream_dir)
-#             except (NotADirectoryError, FileNotFoundError):
-#                 continue
-#             feature_list = sorted([str(os.path.basename(path)).split('.nc')[0] for path in feature_list])
-#             channels_data = {"name": _channel, "features":feature_list}
-#             channel_model = Channel(**channels_data)
-#             channel_model_list.append(channel_model)
-#             try:
-#                 site_info = vm.get_site_information(_station)
-#                 lat = site_info['latitude']
-#                 lon = site_info['longitude']
-#             except:
-#                 lat, lon = -999.9, -999.9
-#             stations_data = {"name": _station, "lat": lat, "lon": lon, "channels":channel_model_list}
-#             station_model = Station(**stations_data)
-#             station_model_list.append(station_model)
-#         volcano_model = Volcano(volcano=_volcano, stations=station_model_list)
-#         volcano_model_list.append(volcano_model)
-#     if len(volcano_model_list) == 0:
-#         return('no volcano')
-#     scenario_model = Scenario(scenario='VUMT', volcanoes=volcano_model_list)
-#     if volcano != "all":
-#         # return all stations for a volcano
-#         for _volcano in scenario_model.volcanoes:
-#             if _volcano.volcano == volcano:
-#                 if station == "all":
-#                     return _volcano
-#                 for _station in _volcano.stations:
-#                     if _station.name == station:
-#                         if channel == "all":
-#                             return _station
-#                         for _channel in _station.channels:
-#                             if _channel.name == channel:
-#                                 feature_list_filtered = []
-#                                 for _f in _channel.features:
-#                                     if _f in FeatureRequest.feat_dict[type]:
-#                                         feature_list_filtered.append(_f)
-#                                 _channel.features = feature_list_filtered
-#                                 return _channel
-#     return scenario_model
+    def inventory(self, group: str) -> dict:
+        sg = StorageGroup(group, rootdir=self.rootdir)
+        return sg.to_dict()
+# ta = TonikAPI('/tmp').feature()
 def main(argv=None):
@@ -272,5 +151,6 @@ def main(argv=None):
     ta = TonikAPI(args.rootdir)
     uvicorn.run(ta.app, host="0.0.0.0", port=8003)
 if __name__ == "__main__":
     main()

tonik/package_data/index.html CHANGED Viewed

@@ -3,7 +3,7 @@
 <head>
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>SAM API Documentation</title>
+    <title>Tonik API Documentation</title>
     <style>
         body {
             font-family: Arial, sans-serif;
@@ -37,41 +37,35 @@
 <body>
 <header>
-    <h1>Your API Name</h1>
-    <p>Seismic Acoustic Monitoring Tool (SAM) - API</p>
+    <h1>Tonik</h1>
+    <p>Tonik - API</p>
 </header>
 <section>
     <h2>Overview</h2>
     <p>
-        Welcome to the documentation for Seismic Acoustic Monitoring API. This API provides access to waveform features and
-        analysis results computed by SAM.
+        Welcome to the documentation for the Tonik API. This API provides access to time-series stored with the tonik package.
     </p>
-    <!--h2>Authentication</h2>
-    <p>
-        To access the API, you need to authenticate using [authentication method]. Obtain your API key from [location].
-    </p-->
     <h2>Endpoints</h2>
     <p>
         The endpoints available in the API are:
     </p>
     <ul>
-        <li><strong>GET /feature:</strong> Request waveform features and analysis results.</li>
-        <li><strong>GET /featureEndpoint:</strong> Request meta information on available stations, features and results.</li>
+        <li><strong>GET /feature:</strong> Request time-series data.</li>
+        <li><strong>GET /inventory:</strong> Request information on data.</li>
     </ul>
     <h2>Code Examples</h2>
     <h3>Requesting meta data</h3>
-    The following will return information on available stations, features and results in JSON format.
+    The following will return information on available datasets in JSON format.
     <pre>
-        curl -X GET "http://your.host.server:yourport/featureEndpoint"
+        curl -X GET "http://your.host.server:yourport/inventory"
     </pre>
     To do the same with Python using requests you can use the following code:
     <pre>
         import requests
-        url = "http://your.host.server:yourport/featureEndpoint"
+        url = "http://your.host.server:yourport/inventory"
         response = requests.get(url)
         response.json()
     </pre>
@@ -79,7 +73,7 @@
     The following example shows how to request RSAM data for station WIZ at volcano Whakaari
     between 2019-12-01 and 2019-12-31. The return format is CSV.
     <pre>
-    curl -X GET "http://your.host.server:yourport/feature?name=rsam&starttime=2019-12-01T00:00:00&endtime=2019-12-31T00:00:00&volcano=Whakaari&site=WIZ"
+    curl -X GET "http://your.host.server:yourport/feature?name=rsam&starttime=2019-12-01T00:00:00&endtime=2019-12-31T00:00:00&subdir=Whakaari&subdir=WIZ"
     </pre>
     To do the same with Python using pandas you can use the following code:
     <pre>
@@ -89,7 +83,7 @@
         endtime="2019-12-31T00:00:00"
         volcano="Whakaari"
         site="WIZ"
-        url = f"http://your.host.server:yourport/feature?name={feature}&starttime={starttime}&endtime={endtime}&volcano={volcano}&site={site}"
+        url = f"http://your.host.server:yourport/feature?name={feature}&starttime={starttime}&endtime={endtime}&subdir={volcano}&subdir={site}"
         pd.read_csv(url, parse_dates=True, index_col=0)
     </pre>
 </section>

tonik/storage.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from datetime import datetime, timedelta
+import json
 import glob
 import logging
 import logging.config
@@ -17,13 +18,14 @@ ERROR_LOG_FILENAME = "tonik.log"
 LOGGING_CONFIG = {
     "version": 1,
     "disable_existing_loggers": False,
-    "formatters": {
+    "formatters": {
         "default": {  # The formatter name, it can be anything that I wish
-            "format": "%(asctime)s:%(name)s:%(process)d:%(lineno)d " "%(levelname)s %(message)s",  #  What to add in the message
+            # What to add in the message
+            "format": "%(asctime)s:%(name)s:%(process)d:%(lineno)d " "%(levelname)s %(message)s",
             "datefmt": "%Y-%m-%d %H:%M:%S",  # How to display dates
         },
         "json": {  # The formatter name
-         "()": "pythonjsonlogger.jsonlogger.JsonFormatter",  # The class to instantiate!
+            "()": "pythonjsonlogger.jsonlogger.JsonFormatter",  # The class to instantiate!
             # Json is more complex, but easier to read, display all attributes!
             "format": """
                     asctime: %(asctime)s
@@ -47,22 +49,23 @@ LOGGING_CONFIG = {
                 """,
             "datefmt": "%Y-%m-%d %H:%M:%S",  # How to display dates
         },
-    },
+    },
     "handlers": {
         "logfile": {  # The handler name
             "formatter": "json",  # Refer to the formatter defined above
             "level": "ERROR",  # FILTER: Only ERROR and CRITICAL logs
             "class": "logging.handlers.RotatingFileHandler",  # OUTPUT: Which class to use
-            "filename": ERROR_LOG_FILENAME,  # Param for class above. Defines filename to use, load it from constant
+            # Param for class above. Defines filename to use, load it from constant
+            "filename": ERROR_LOG_FILENAME,
             "backupCount": 2,  # Param for class above. Defines how many log files to keep as it grows
-        },
+        },
         "simple": {  # The handler name
             "formatter": "default",  # Refer to the formatter defined above
             "class": "logging.StreamHandler",  # OUTPUT: Same as above, stream to console
             "stream": "ext://sys.stdout",
         },
     },
-    "loggers": {
+    "loggers": {
         "zizou": {  # The name of the logger, this SHOULD match your module!
             "level": "DEBUG",  # FILTER: only INFO logs onwards from "tryceratops" logger
             "handlers": [
@@ -86,13 +89,18 @@ class Path(object):
     def __init__(self, name, parentdir):
         self.name = name
         self.path = os.path.join(parentdir, name)
-        os.makedirs(self.path, exist_ok=True)
+        try:
+            os.makedirs(self.path, exist_ok=True)
+        except FileExistsError:
+            pass
         self.children = {}
     def __str__(self):
         return self.path
     def __getitem__(self, key):
+        if key is None:
+            raise ValueError("Key cannot be None")
         try:
             return self.children[key]
         except KeyError:
@@ -103,7 +111,7 @@ class Path(object):
         _feature_path = os.path.join(self.path, feature + ".nc")
         if not os.path.exists(_feature_path):
             raise FileNotFoundError(f"File {_feature_path} not found")
-        self.children[feature] = _feature_path
+        self.children[feature] = Path(feature + ".nc", self.path)
         return _feature_path
     def __call__(self, feature, stack_length=None, interval='10min'):
@@ -119,18 +127,18 @@ class Path(object):
         if self.endtime <= self.starttime:
             raise ValueError('Startime has to be smaller than endtime.')
-        feature = feature.lower()
         filename = self.feature_path(feature)
-        logger.debug(f"Reading feature {feature} between {self.starttime} and {self.endtime}")
+        logger.debug(
+            f"Reading feature {feature} between {self.starttime} and {self.endtime}")
         num_periods = None
         if stack_length is not None:
-            valid_stack_units = ['W', 'D', 'H', 'T', 'min', 'S']
+            valid_stack_units = ['W', 'D', 'h', 'T', 'min', 'S']
             if not re.match(r'\d*\s*(\w*)', stack_length).group(1)\
-                   in valid_stack_units:
+                    in valid_stack_units:
                 raise ValueError(
                     'Stack length should be one of: {}'.
-                        format(', '.join(valid_stack_units))
+                    format(', '.join(valid_stack_units))
                 )
             if pd.to_timedelta(stack_length) < pd.to_timedelta(interval):
@@ -140,13 +148,13 @@ class Path(object):
             # Rewind starttime to account for stack length
             self.starttime -= pd.to_timedelta(stack_length)
-            num_periods = (pd.to_timedelta(stack_length)/
+            num_periods = (pd.to_timedelta(stack_length) /
                            pd.to_timedelta(interval))
             if not num_periods.is_integer():
                 raise ValueError(
                     'Stack length {} / interval {} = {}, but it needs'
                     ' to be a whole number'.
-                        format(stack_length, interval, num_periods))
+                    format(stack_length, interval, num_periods))
         xd_index = dict(datetime=slice(self.starttime, self.endtime))
         with xr.open_dataset(filename, group='original', engine='h5netcdf') as ds:
@@ -158,13 +166,11 @@ class Path(object):
             logger.debug("Stacking feature...")
             try:
                 xdf = rq[feature].rolling(datetime=int(num_periods),
-                                        center=False,
-                                        min_periods=1).mean()
+                                          center=False,
+                                          min_periods=1).mean()
                 # Return requested timeframe to that defined in initialisation
                 self.starttime += pd.to_timedelta(stack_length)
-                xdf_new = xdf.loc[
-                        self.starttime:
-                        self.endtime-pd.to_timedelta(interval)]
+                xdf_new = xdf.loc[self.starttime:self.endtime]
                 xdf_new = xdf_new.rename(feature)
             except ValueError as e:
                 logger.error(e)
@@ -181,11 +187,11 @@ class Path(object):
         """
         self.__call__(*args, **kwargs)
-    def save(self, data):
+    def save(self, data, **kwargs):
         """
         Save a feature to disk
         """
-        xarray2hdf5(data, self.path)
+        xarray2hdf5(data, self.path, **kwargs)
 class StorageGroup(Path):
@@ -208,60 +214,73 @@ class StorageGroup(Path):
     >>> c = g.channel(site='WIZ', sensor='00', channel='HHZ')
     >>> rsam = c("rsam")
     """
     def __init__(self, name, rootdir=None, starttime=None, endtime=None):
-        self.stores = set()
+        self.stores = set()
         self.starttime = starttime
         self.endtime = endtime
         super().__init__(name, rootdir)
+    def print_tree(self, site, indent=0, output=''):
+        output += ' ' * indent + site.path + '\n'
+        for site in site.children.values():
+            output += self.print_tree(site, indent + 2)
+        return output
     def __repr__(self):
         rstr = f"Group: {self.name}\n"
-        last_site = False
-        for j, site in enumerate(self.children.values()):
-            if j == len(self.children) - 1:
-                last_site = True
-            rstr += f"|__ {site.name}\n"
-            last_sensor = False
-            for i, sensor in enumerate(site.children.values()):
-                if i == len(site.children) - 1:
-                    last_sensor = True
-                rstr += (" " if last_site else "|") + f"  |__ {sensor.name}\n"
-                for k, channel in enumerate(sensor.children.values()):
-                    rstr += ("   " if last_site else "|  ")
-                    rstr += ("   " if last_sensor else "|  ")
-                    rstr += f"|__ {channel.name}\n"
+        rstr = self.print_tree(self, 0, rstr)
         return rstr
-    def get_store(self, site, sensor, channel):
+    def get_store(self, *args):
         # return the store for a given site, sensor, or channel
         # if one of them is None return the store for the level above
         # if all are None return the root store
         try:
-            st = self[site][sensor][channel]
-        except:
-            try:
-                st = self[site][sensor]
-            except:
-                try:
-                    st = self[site]
-                except:
-                    return self
+            st = self
+            for arg in args:
+                st = st[arg]
+        except KeyError:
+            return self
         st.starttime = self.starttime
         st.endtime = self.endtime
         self.stores.add(st)
-        return st
+        return st
     def from_directory(self):
-        feature_files = glob.glob(os.path.join(self.path, '**', '*.nc'),
-                                  recursive=True)
-        for _f in feature_files:
-            subdir = _f.split(self.path)[1].strip(os.sep)
-            # split the path into parts
-            # get the subdirectories
-            site, sensor, channel, ffile = subdir.split(os.sep)
-            fname = ffile.strip('.nc')
-            c = self.get_store(site, sensor, channel)
+        """
+        Construct the storage group from the root directory
+        """
+        for root, dirs, files in os.walk(self.path):
+            if files:
+                try:
+                    subdirs = root.split(self.path)[1].split(os.sep)[1:]
+                except IndexError:
+                    st = self.get_store()
+                else:
+                    try:
+                        st = self.get_store(*subdirs)
+                    except TypeError as e:
+                        raise e
+                for _f in files:
+                    if _f.endswith('.nc'):
+                        st.feature_path(_f.replace('.nc', ''))
+    @staticmethod
+    def directory_tree_to_dict(path):
+        name = os.path.basename(path)
+        if os.path.isdir(path):
+            return {name: [StorageGroup.directory_tree_to_dict(os.path.join(path, child)) for child in sorted(os.listdir(path))]}
+        else:
+            if path.endswith('.nc'):
+                return name.replace('.nc', '')
+    def to_dict(self):
+        """
+        Convert the storage group to json
+        """
+        return StorageGroup.directory_tree_to_dict(self.path)
     def get_starttime(self):
         return self.__starttime
@@ -276,7 +295,8 @@ class StorageGroup(Path):
                                                time.month,
                                                time.day)
         for s in self.stores:
-            s.starttime = time
+            if s is not self:
+                s.starttime = time
     def get_endtime(self):
         return self.__endtime
@@ -291,9 +311,8 @@ class StorageGroup(Path):
                                                time.month,
                                                time.day)
         for s in self.stores:
-            s.endtime = time
+            if s is not self:
+                s.endtime = time
     starttime = property(get_starttime, set_starttime)
     endtime = property(get_endtime, set_endtime)

tonik/utils.py CHANGED Viewed

@@ -8,7 +8,7 @@ import xarray as xr
 def generate_test_data(dim=1, ndays=30, nfreqs=10,
                        tstart=datetime.utcnow(),
                        feature_name=None,
-                       freq_name=None):
+                       freq_name=None, add_nans=True):
     """
     Generate a 1D or 2D feature for testing.
     """
@@ -25,12 +25,14 @@ def generate_test_data(dim=1, ndays=30, nfreqs=10,
     # Add 10% NaNs
     idx_nan = rs.integers(0, nints-1, int(0.1*nints))
     if dim == 1:
-        data[idx_nan] = np.nan
+        if add_nans:
+            data[idx_nan] = np.nan
         if feature_name is None:
             feature_name = 'rsam'
         xrd = xr.Dataset({feature_name: xr.DataArray(data, coords=[dates], dims=['datetime'])})
     if dim == 2:
-        data[:, idx_nan] = np.nan
+        if add_nans:
+            data[:, idx_nan] = np.nan
         freqs = np.arange(nfreqs)
         if feature_name is None:
             feature_name = 'ssam'
@@ -40,4 +42,5 @@ def generate_test_data(dim=1, ndays=30, nfreqs=10,
     xrd.attrs['starttime'] = dates[0].isoformat()
     xrd.attrs['endtime'] = dates[-1].isoformat()
     xrd.attrs['station'] = 'MDR'
+    xrd.attrs['interval'] = '10min'
     return xrd

tonik/xarray2hdf5.py CHANGED Viewed

@@ -8,22 +8,36 @@ import h5netcdf
 import numpy as np
-def xarray2hdf5(xArray, fdir, rootGroupName="original", timedim="datetime"):
+def xarray2hdf5(xArray, fdir, rootGroupName="original", timedim="datetime",
+                archive_starttime=datetime(2000, 1, 1), resolution=None):
     """
     Store an xarray dataset as an HDF5 file.
-    :param xArray: Data to store.
-    :type xArray: :class:`xarray.Dataset`
-    :param fdir: Directory to store data under.
-    :type fdir: str
-    :param rootGroupName: Hdf5 group name.
-    :type rootGroupName: str
-    :param timedim: Name of time dimension.
-    :type timedim: str
+    Parameters
+    ----------
+    xArray : xarray.Dataset
+        Data to store.
+    fdir : str
+        Directory to store data under.
+    rootGroupName : str
+        Hdf5 group name.
+    timedim : str
+        Name of time dimension.
+    archive_starttime : datetime
+        Start time of archive. If the start time of the data is before this
+        time, the data start time is used.
+    resolution : float
+        Time resolution of the data in hours. If None, the resolution is
+        determined from the data.
     """
     filterwarnings(action='ignore', category=DeprecationWarning,
                message='`np.bool` is a deprecated alias')
+    starttime = xArray[timedim].values[0].astype('datetime64[us]').astype(datetime)
+    starttime = min(starttime, archive_starttime)
+    if resolution is None:
+        resolution = (np.diff(xArray[timedim])/np.timedelta64(1, 'h'))[0]
     for featureName in list(xArray.data_vars.keys()):
         h5file = os.path.join(fdir, featureName +'.nc')
@@ -31,7 +45,8 @@ def xarray2hdf5(xArray, fdir, rootGroupName="original", timedim="datetime"):
         with h5netcdf.File(h5file, mode) as h5f:
             try:
-                rootGrp = _create_h5_Structure(rootGroupName, featureName, h5f, xArray)
+                rootGrp = _create_h5_Structure(rootGroupName, featureName,
+                                               h5f, xArray, starttime, timedim)
             except ValueError: # group already exists, append
                 rootGrp = h5f[rootGroupName]
@@ -39,17 +54,17 @@ def xarray2hdf5(xArray, fdir, rootGroupName="original", timedim="datetime"):
             new_time = date2num(xArray[timedim].values.astype('datetime64[us]').astype(datetime),
                                 units=rootGrp[timedim].attrs['units'],
                                 calendar=rootGrp[timedim].attrs['calendar'])
-            dt = (np.diff(xArray['datetime'])/np.timedelta64(1, 'h'))[0]
-            t0 = date2num(np.datetime64(rootGrp.attrs['starttime']).astype('datetime64[us]').astype(datetime),
+            t0 = date2num(starttime,
                           units=rootGrp[timedim].attrs['units'],
                           calendar=rootGrp[timedim].attrs['calendar'])
-            indices = np.rint((new_time - t0)/dt).astype(int)
-            assert np.all(indices >= 0)
+            indices = np.rint((new_time - t0)/resolution).astype(int)
+            if not np.all(indices >= 0):
+                raise ValueError("Data starts before the archive start time")
             times = rootGrp[timedim]
             newsize = indices[-1] + 1
             if newsize > times.shape[0]:
                 rootGrp.resize_dimension(timedim, newsize)
-            times[:] = t0 + np.arange(times.shape[0])*dt
+            times[:] = t0 + np.arange(times.shape[0]) * resolution
             data = rootGrp[featureName]
             if len(data.shape) > 1:
                 data[:, indices] = xArray[featureName].values
@@ -63,10 +78,18 @@ def xarray2hdf5(xArray, fdir, rootGroupName="original", timedim="datetime"):
                 logging.warning(f"Could not set all meta info for {featureName}: {e}")
-def _create_h5_Structure(defaultGroupName, featureName, h5f, xArray):
+def _create_h5_Structure(defaultGroupName, featureName, h5f, xArray, starttime, timedim):
     rootGrp = h5f.create_group(defaultGroupName)
+    rootGrp.dimensions[timedim] = None
+    coordinates = rootGrp.create_variable(timedim, (timedim,), float)
+    coordinates.attrs['units'] = 'hours since 1970-01-01 00:00:00.0'
+    coordinates.attrs['calendar'] = 'gregorian'
+    rootGrp.attrs['starttime'] = str(starttime)
     for label, size in xArray.dims.items():
-        _setAttributes(label, size, rootGrp, xArray)
+        if not np.issubdtype(xArray[label].dtype, np.datetime64):
+            rootGrp.dimensions[label] = size
+            coordinates = rootGrp.create_variable(label, (label,), float)
+            coordinates[:] = xArray[label].values
     # Note: xArray.dims returns a dictionary of dimensions that are not necesarily
     # in the right order; xArray[featureName].dims returns a tuple with dimension
     # names in the correct order
@@ -74,20 +97,6 @@ def _create_h5_Structure(defaultGroupName, featureName, h5f, xArray):
     return rootGrp
-def _setAttributes(label, size, rootGrp, xArray):
-    if np.issubdtype(xArray[label].dtype, np.datetime64):
-        starttime = str(xArray[label].values[0].astype('datetime64[us]').astype(datetime))
-        rootGrp.dimensions[label] = None
-        coordinates = rootGrp.create_variable(label, (label,), float)
-        coordinates.attrs['units'] = 'hours since 1970-01-01 00:00:00.0'
-        coordinates.attrs['calendar'] = 'gregorian'
-        rootGrp.attrs['starttime'] = starttime
-    else:
-        rootGrp.dimensions[label] = size
-        coordinates = rootGrp.create_variable(label, (label,), float)
-        coordinates[:] = xArray[label].values
 def _setMetaInfo(featureName, h5f, xArray):
     h5f.attrs['station'] = xArray.attrs['station']
     h5f.attrs['latitude'] = -42

{tonik-0.0.5.dist-info → tonik-0.0.7.dist-info}/METADATA RENAMED Viewed

@@ -1,7 +1,7 @@
 Metadata-Version: 2.3
 Name: tonik
-Version: 0.0.5
-Summary: A collection of tools to integrate with GNS Science's time series classification platform.
+Version: 0.0.7
+Summary: Store time series data as HDF5 files and access them through an API.
 Project-URL: Homepage, https://tsc-tools.github.io/tonik
 Project-URL: Issues, https://github.com/tsc-tools/tonik/issues
 Author-email: Yannik Behr <y.behr@gns.cri.nz>, Christof Mueller <c.mueller@gns.cri.nz>

tonik-0.0.7.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,11 @@
+tonik/__init__.py,sha256=p97Bbz-yujI-uNmbqn1S61lq-zfF1VPaS5c1fxs1Fa8,516
+tonik/api.py,sha256=gnwoss7UV8FaY92xzumhcoVPjkzB695qgByHUYcLSw4,5916
+tonik/storage.py,sha256=pJnvoGFb8uZqnpkjOsgnntW-a7dhKVlvevs725nAS54,11009
+tonik/utils.py,sha256=nV0lK8Azasr8LUuQGXxfxef6nU3bn3dCTQnQTmWsKAY,1534
+tonik/xarray2hdf5.py,sha256=cekO9vo9ZRlr0VndswJjPC27CEVD3TpRVKLAJ-aAO0g,4465
+tonik/package_data/index.html,sha256=GKDClUhIam_fAYbNfzAolORhSCG3ae1wW3VjWCg4PMk,2732
+tonik-0.0.7.dist-info/METADATA,sha256=6DhYEfnEAWSKLEZJQQRiRF_cZAGAQFK6mLmHQEYJbuE,1918
+tonik-0.0.7.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
+tonik-0.0.7.dist-info/entry_points.txt,sha256=VnGfC5qAzpntEHAb5pooUEpYABSgOfQoNhCEtLDJyf8,45
+tonik-0.0.7.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
+tonik-0.0.7.dist-info/RECORD,,

tonik-0.0.5.dist-info/RECORD DELETED Viewed

@@ -1,11 +0,0 @@
-tonik/__init__.py,sha256=p97Bbz-yujI-uNmbqn1S61lq-zfF1VPaS5c1fxs1Fa8,516
-tonik/api.py,sha256=zTZZbpPsetoM5EH8ao8RlwR39lU1jEWpxwATC4S8Qq0,11012
-tonik/storage.py,sha256=F1NnRIZLq7uB7TTK1-5VHYix6_e_fl2J1FmiCFYqIJ0,10719
-tonik/utils.py,sha256=jEjvUNcU9nUhQZTgu8iwfPmF4U2odyOT8EJu0v1DaA0,1433
-tonik/xarray2hdf5.py,sha256=biQ3KVt0QrxJhOWm38FwglzYkLhPtO13G1B1vVF2c6o,4090
-tonik/package_data/index.html,sha256=IaPMU-A_FswjMAKA6PrHp9VFTuKJ6o_PGyjg_CTutrY,3082
-tonik-0.0.5.dist-info/METADATA,sha256=4VJUxgbE6FIPk9g4Uj5KlMN2qbW05us5zw3j4byWwjU,1940
-tonik-0.0.5.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
-tonik-0.0.5.dist-info/entry_points.txt,sha256=VnGfC5qAzpntEHAb5pooUEpYABSgOfQoNhCEtLDJyf8,45
-tonik-0.0.5.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
-tonik-0.0.5.dist-info/RECORD,,

{tonik-0.0.5.dist-info → tonik-0.0.7.dist-info}/WHEEL RENAMED Viewed

File without changes

{tonik-0.0.5.dist-info → tonik-0.0.7.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{tonik-0.0.5.dist-info → tonik-0.0.7.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

tonik 0.0.5__py3-none-any.whl → 0.0.7__py3-none-any.whl

tonik 0.0.5py3-none-any.whl → 0.0.7py3-none-any.whl