PyPI - tsp - Versions diffs - 1.7.7__py3-none-any.whl → 1.8.1__py3-none-any.whl - Mend

tsp 1.7.7py3-none-any.whl → 1.8.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (92) hide show

tsp/__init__.py +11 -11
tsp/__meta__.py +1 -1
tsp/concatenation.py +153 -0
tsp/core.py +1162 -1035
tsp/data/2023-01-06_755-test-Dataset_2031-Constant_Over_Interval-Hourly-Ground_Temperature-Thermistor_Automated.timeserie.csv +4 -4
tsp/data/2023-01-06_755-test.metadata.txt +208 -208
tsp/data/NTGS_example_csv.csv +6 -6
tsp/data/NTGS_example_slash_dates.csv +6 -6
tsp/data/example_geotop.csv +5240 -5240
tsp/data/example_gtnp.csv +1298 -1298
tsp/data/example_permos.csv +7 -7
tsp/data/test_geotop_has_space.txt +5 -5
tsp/dataloggers/AbstractReader.py +43 -43
tsp/dataloggers/FG2.py +110 -110
tsp/dataloggers/GP5W.py +114 -114
tsp/dataloggers/Geoprecision.py +34 -34
tsp/dataloggers/HOBO.py +914 -914
tsp/dataloggers/RBRXL800.py +190 -190
tsp/dataloggers/RBRXR420.py +308 -308
tsp/dataloggers/__init__.py +15 -15
tsp/dataloggers/logr.py +115 -115
tsp/dataloggers/test_files/004448.DAT +2543 -2543
tsp/dataloggers/test_files/004531.DAT +17106 -17106
tsp/dataloggers/test_files/004531.HEX +3587 -3587
tsp/dataloggers/test_files/004534.HEX +3587 -3587
tsp/dataloggers/test_files/010252.dat +1731 -1731
tsp/dataloggers/test_files/010252.hex +1739 -1739
tsp/dataloggers/test_files/010274.hex +1291 -1291
tsp/dataloggers/test_files/010278.hex +3544 -3544
tsp/dataloggers/test_files/012064.dat +1286 -1286
tsp/dataloggers/test_files/012064.hex +1294 -1294
tsp/dataloggers/test_files/012081.hex +3532 -3532
tsp/dataloggers/test_files/07B1592.DAT +1483 -1483
tsp/dataloggers/test_files/07B1592.HEX +1806 -1806
tsp/dataloggers/test_files/07B4450.DAT +2234 -2234
tsp/dataloggers/test_files/07B4450.HEX +2559 -2559
tsp/dataloggers/test_files/FG2_399.csv +9881 -9881
tsp/dataloggers/test_files/GP5W.csv +1121 -1121
tsp/dataloggers/test_files/GP5W_260.csv +1884 -1884
tsp/dataloggers/test_files/GP5W_270.csv +2210 -2210
tsp/dataloggers/test_files/H08-030-08_HOBOware.csv +998 -998
tsp/dataloggers/test_files/RBR_01.dat +1046 -1046
tsp/dataloggers/test_files/RBR_02.dat +2426 -2426
tsp/dataloggers/test_files/RSTDT2055.csv +2152 -2152
tsp/dataloggers/test_files/U23-001_HOBOware.csv +1001 -1001
tsp/dataloggers/test_files/hobo-negative-2.txt +6396 -6396
tsp/dataloggers/test_files/hobo-negative-3.txt +5593 -5593
tsp/dataloggers/test_files/hobo-positive-number-1.txt +1000 -1000
tsp/dataloggers/test_files/hobo-positive-number-2.csv +1003 -1003
tsp/dataloggers/test_files/hobo-positive-number-3.csv +1133 -1133
tsp/dataloggers/test_files/hobo-positive-number-4.csv +1209 -1209
tsp/dataloggers/test_files/hobo2.csv +8702 -8702
tsp/dataloggers/test_files/hobo_1_AB.csv +21732 -21732
tsp/dataloggers/test_files/hobo_1_AB_Details.txt +133 -133
tsp/dataloggers/test_files/hobo_1_AB_classic.csv +4373 -4373
tsp/dataloggers/test_files/hobo_1_AB_defaults.csv +21732 -21732
tsp/dataloggers/test_files/hobo_1_AB_minimal.txt +1358 -1358
tsp/dataloggers/test_files/hobo_1_AB_var2.csv +3189 -3189
tsp/dataloggers/test_files/hobo_1_AB_var3.csv +2458 -2458
tsp/dataloggers/test_files/logR_ULogC16-32_1.csv +106 -106
tsp/dataloggers/test_files/logR_ULogC16-32_2.csv +100 -100
tsp/dataloggers/test_files/mon_3_Ta_2010-08-18_2013-02-08.txt +21724 -21724
tsp/dataloggers/test_files/rbr_001.dat +1133 -1133
tsp/dataloggers/test_files/rbr_001.hex +1139 -1139
tsp/dataloggers/test_files/rbr_001_no_comment.dat +1132 -1132
tsp/dataloggers/test_files/rbr_001_no_comment.hex +1138 -1138
tsp/dataloggers/test_files/rbr_002.dat +1179 -1179
tsp/dataloggers/test_files/rbr_002.hex +1185 -1185
tsp/dataloggers/test_files/rbr_003.hex +1292 -1292
tsp/dataloggers/test_files/rbr_003.xls +0 -0
tsp/dataloggers/test_files/rbr_xl_001.DAT +1105 -1105
tsp/dataloggers/test_files/rbr_xl_002.DAT +1126 -1126
tsp/dataloggers/test_files/rbr_xl_003.DAT +4622 -4622
tsp/dataloggers/test_files/rbr_xl_003.HEX +3587 -3587
tsp/gtnp.py +148 -148
tsp/labels.py +3 -3
tsp/misc.py +90 -90
tsp/physics.py +101 -101
tsp/plots/static.py +373 -373
tsp/readers.py +548 -548
tsp/time.py +45 -45
tsp/tspwarnings.py +14 -14
tsp/utils.py +101 -101
tsp/version.py +1 -1
{tsp-1.7.7.dist-info → tsp-1.8.1.dist-info}/METADATA +30 -23
tsp-1.8.1.dist-info/RECORD +94 -0
{tsp-1.7.7.dist-info → tsp-1.8.1.dist-info}/WHEEL +5 -5
{tsp-1.7.7.dist-info → tsp-1.8.1.dist-info/licenses}/LICENSE +674 -674
tsp/dataloggers/test_files/CSc_CR1000_1.dat +0 -295
tsp/scratch.py +0 -6
tsp-1.7.7.dist-info/RECORD +0 -95
{tsp-1.7.7.dist-info → tsp-1.8.1.dist-info}/top_level.txt +0 -0

tsp/readers.py CHANGED Viewed

@@ -1,548 +1,548 @@
-import datetime
-import numpy as np
-import pandas as pd
-import re
-import warnings
-try:
-    import netCDF4 as nc
-except ModuleNotFoundError:
-    warnings.warn("Missing netCDF4 library. Some functionality will be limited.")
-from pathlib import Path
-from typing import Union, Optional
-from tsp.dataloggers.Geoprecision import detect_geoprecision_type
-from tsp.dataloggers.HOBO import HOBO, HOBOProperties
-from tsp.dataloggers.logr import LogR, guessed_depths_ok
-from tsp.dataloggers.RBRXL800 import RBRXL800
-from tsp.dataloggers.RBRXR420 import RBRXR420
-import tsp.tspwarnings as tw
-from tsp.core import TSP, IndexedTSP
-from tsp.misc import _is_depth_column
-from tsp.gtnp import GtnpMetadata
-def read_classic(filepath: str, init_file: "Optional[str]"=None) -> TSP:
-    """Read output from CLASSIC land surface model
-    Depth values, if provided, represent the midpoint of the model cells.
-    Parameters
-    ----------
-    filepath : str
-        Path to an output file
-    init_file : str
-        Path to a classic init file. If provided, depth values will be calculated. Otherwise an :py:class:`~tsp.core.IndexedTSP` is returned
-    Returns
-    -------
-    TSP
-        An IndexedTSP. Use :py:meth:`~tsp.core.IndexedTSP.set_depths` to provide depth information if init_file is not provided.
-    """
-    try:
-        nc
-    except NameError:
-        warnings.warn("netCDF4 library must be installed.")
-    # tbaracc_d / tbaracc_m / tbaracc_y
-    with nc.Dataset(filepath, 'r') as ncdf:
-        lat = ncdf['lat'][:]
-        lon = ncdf['lon'][:]
-        temp = ncdf['tsl'][:]  # t, z
-        try:
-            time = nc.num2date(ncdf['time'][:], ncdf['time'].units, ncdf['time'].calendar,
-                            only_use_cftime_datetimes=False,
-                            only_use_python_datetimes=True)
-        except ValueError:
-            cf_time = nc.num2date(ncdf['time'][:], ncdf['time'].units, ncdf['time'].calendar)
-            time = np.array([datetime.datetime.fromisoformat(t.isoformat()) for t in cf_time])
-    if init_file:
-        with nc.Dataset(init_file, 'r') as init:
-            delz = init["DELZ"][:]
-        depths = np.round(np.cumsum(delz) - np.multiply(delz, 0.5), 7)  # delz precision is lower so we get some very small offsets
-    if len(lat) > 1:
-        warnings.warn("Multiple points in file. Returning the first one found.")
-        # TODO: return Ensemble if multiple points
-        lat = lat[0]
-        lon = lon[0]
-        temp = temp[:,:,0,0]
-    else:
-        temp = temp[:,:,0,0]
-    t = IndexedTSP(times=time, values=temp, latitude=lat, longitude=lon)
-    if init_file:
-        t.set_depths(depths)
-    return t
-def read_csv(filepath: str,
-              datecol: "Union[str, int]",
-              datefmt: str = "%Y-%m-%d %H:%M:%S",
-              depth_pattern: "Union[str, dict]" = r"^(-?[0-9\.]+)$",
-              na_values:list = [],
-              **kwargs) -> TSP:
-    r"""Read an arbitrary CSV file
-    Date and time must be in a single column, and the csv must be in the
-    'wide' data format (each depth is a separate column)
-    Parameters
-    ----------
-    filepath : str
-        Path to csv file
-    datecol : Union[str, int]
-        Either the numeric index (starting at 0) of date column (if int) or name of date column or regular expression (if str)
-    datefmt : str, optional
-        The format of the datetime values. Use `python strftime format codes <https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes>`_,
-        by default ``"%Y-%m-%d %H:%M:%S"``
-    depth_pattern : str or dict
-        If string: A regular expression that matches the column names with depths. The regular expression must
-        have a single capture group that extracts just the numeric part of the column header, by default r"^(-?[0-9\.]+)$".
-        If column names were in the form ``"+/-1.0_m"`` (i.e. included 'm' to denote units), you could use the regular expression ``r"^(-?[0-9\.]+)_m$"``
-        If a dictionary is passed, the keys must be the column names and the values are the depths. This is useful if the column names are not numeric.
-    na_values : list, optional
-        Additional strings to recognize as NA. Passed to pandas.read_csv, by default []
-    Returns
-    -------
-    TSP
-        A TSP
-    """
-    raw = pd.read_csv(filepath, na_values=na_values, **kwargs)
-    if not datecol in raw.columns and isinstance(datecol, str):
-        datecol = [re.search(datecol, c).group(1) for c in raw.columns if re.search(datecol, c)][0]
-    if isinstance(datecol, int):
-        datecol = raw.columns[datecol]
-    time = pd.to_datetime(raw[datecol], format=datefmt).to_numpy()
-    if isinstance(depth_pattern, str):
-        depth = [re.search(depth_pattern, c).group(1) for c in raw.columns if _is_depth_column(c, depth_pattern)]
-        depth_numeric = np.array([float(d) for d in depth])
-    elif isinstance(depth_pattern, dict):
-        depth = [c for c in raw.columns if c in depth_pattern.keys()]
-        depth_numeric = [depth_pattern[c] for c in raw.columns if c in depth_pattern.keys()]
-    else:
-        raise ValueError("depth_pattern must be a string or dictionary")
-    values = raw.loc[:, depth].to_numpy()
-    t = TSP(time, depth_numeric, values)
-    return t
-def read_geoprecision(filepath: str) -> IndexedTSP:
-    """Read a Geoprecision datalogger export (text file)
-    Reads GP5W- and FG2-style files from geoprecision.
-    Parameters
-    ----------
-    filepath : str
-        Path to file.
-    Returns
-    -------
-    IndexedTSP
-        An IndexedTSP
-    """
-    Reader = detect_geoprecision_type(filepath)
-    if Reader is None:
-        raise RuntimeError("Could not detect type of geoprecision file (GP5W or FG2 missing from header")
-    reader = Reader()
-    data = reader.read(filepath)
-    t = IndexedTSP(times=data['TIME'].dt.to_pydatetime(),
-                     values=data.drop("TIME", axis=1).values)
-    t.metadata = reader.META
-    return t
-def read_geotop(file: str) -> TSP:
-    """Read a GEOtop soil temperature output file
-    Parameters
-    ----------
-    file : str
-        Path to file.
-    Returns
-    -------
-    TSP
-        A TSP
-    Description
-    -----------
-    Only the last run of the last simulation period is returned. This is because GEOtop outputs
-    all runs of all simulation periods in the same file. This function will only return the last
-    run of the last simulation period.
-    """
-    with warnings.catch_warnings():
-        warnings.filterwarnings("ignore", category=tw.DuplicateTimesWarning)
-        t = read_csv(file,
-                    na_values=[-9999.0],
-                    datecol="^(Date.*)",
-                    datefmt=r"%d/%m/%Y %H:%M",
-                    depth_pattern=r"^(-?[0-9\.]+\s*)$")
-    t._depths *= 0.001  # Convert to [m]
-    # Only use last simulation period
-    # TODO: this could be improved
-    raw = pd.read_csv(file)
-    is_max_sim_period = raw['Simulation_Period'] == max( raw['Simulation_Period'])
-    is_last_run_in_max_sim_period = raw['Run'] = raw['Run'][is_max_sim_period].max()
-    last_run = np.logical_and(is_max_sim_period, is_last_run_in_max_sim_period)
-    last = TSP(times = t.times[last_run],
-               depths = t.depths,
-               values = t.values[last_run, :],
-               metadata={"Simulation_Period": max(raw['Simulation_Period']),
-                         "Run": max( raw['Run'] )
-                         }
-    )
-    return last
-def read_gtnp(filename: str,
-              metadata_filepath=None,
-              autodetect_metadata=True) -> TSP:
-    """Read test file from GTN-P database export
-    Parameters
-    ----------
-    filename : str
-        Path to file.
-    metadata_file : str, optional
-        Path to GTN-P metadata file (), by default None
-    Returns
-    -------
-    TSP
-        A TSP
-    """
-    t = read_csv(filename,
-                   na_values=[-999.0],
-                   datecol="Date/Depth",
-                   datefmt="%Y-%m-%d %H:%M:%S",
-                   depth_pattern=r"^(-?[0-9\.]+)$")
-    # try to automatically detect metadata file
-    if metadata_filepath is None and autodetect_metadata:
-        partial_name = Path(filename).stem
-        while partial_name:
-            test_metadata = Path(Path(filename).parent, partial_name).with_suffix(".metadata.txt")
-            if test_metadata.is_file():
-                metadata_filepath = test_metadata
-                break
-            else:
-                partial_name = partial_name[:-1]
-    if metadata_filepath is not None:
-        try:
-            meta = GtnpMetadata(metadata_filepath)
-        except Exception as e:
-            warnings.warn(f"Failed to read metadata file: {e}")
-            return t
-        t.metadata['raw'] = meta.raw
-        t.metadata['parsed'] = meta.parsed
-        # set time zone
-        tz = meta.get_timezone()
-        if tz:
-            t.set_utc_offset(int(tz.utcoffset(datetime.datetime.now()).total_seconds()))
-        # set location
-        t.latitude = meta.get_latitude() if meta.get_latitude() else None
-        t.longitude = meta.get_longitude() if meta.get_longitude() else None
-    return t
-def read_gtpem(file: str) -> "list[TSP]":
-    output = list()
-    try:
-        with nc.Dataset(file) as ncdf:
-            n_sim = len(ncdf['geotop']['sitename'][:])
-            time = 1
-            for i, name in enumerate(ncdf['geotop']['sitename'][:]):
-                pass
-                #t = TSP()
-    except NameError:
-        warnings.warn("netCDF4 library must be installed.")
-    return output
-def read_hoboware(filepath: str, hoboware_config: Optional[HOBOProperties]=None) -> IndexedTSP:
-    """Read Onset HoboWare datalogger exports
-    Parameters
-    ----------
-    filepath : str
-        Path to a file
-    hoboware_config : HOBOProperties, optional
-        A HOBOProperties object with information about how the file is configured. If not
-        provided, the configuration will be automatically detected if possible, by default None
-    Returns
-    -------
-    IndexedTSP
-        An IndexedTSP. Use the `set_depths` method to provide depth information
-    """
-    reader = HOBO(properties=hoboware_config)
-    data = reader.read(filepath)
-    t = IndexedTSP(times=data['TIME'],
-                     values=data.drop("TIME", axis=1).values)
-    return t
-def read_logr(filepath: str) -> "Union[IndexedTSP,TSP]":
-    """Read a LogR datalogger export (text file)
-    Reads LogR ULogC16-32 files.
-    Parameters
-    ----------
-    filepath : str
-        Path to file.
-    Returns
-    -------
-    IndexedTSP, TSP
-        An IndexedTSP or TSP, depending on whether the depth labels are sensible
-    """
-    r = LogR()
-    data = r.read(filepath)
-    times = data['TIME'].dt.to_pydatetime()
-    channels = pd.Series(data.columns).str.match("^CH")
-    values = data.loc[:, channels.to_numpy()]
-    if guessed_depths_ok(r.META['guessed_depths'], sum(channels)):
-        t = TSP(times=times,
-                depths=r.META['guessed_depths'][-sum(channels):],
-                values=values.values,)
-    else:
-        warnings.warn(f"Could not convert all channel labels into numeric depths."
-                      "Use the set_depths() method to specify observation depths."
-                      "Guessed depths can be accessed from .metadata['guessed_depths'].")
-        t = IndexedTSP(times=times,
-                       values=values.values,
-                       metadata = r.META)
-    return t
-def read_netcdf(file:str, standard_name='temperature_in_ground') -> TSP:
-    """Read a CF-compliant netCDF file
-    Parameters
-    ----------
-    file : str
-        Path to netCDF file.
-    standard_name : str, optional
-        The standard name of the data variable, by default 'temperature_in_ground'.
-        'soil_temperature' is also common.
-    The file must represent data from a single location
-    A single time variable (with attribute 'axis=T') must be present.
-    A single depth variable (with attribute 'axis=Z') must be present.
-    A single data variable (with 'temperature_in_ground' or '' 'standard name' either ) must be present.
-    """
-    try:
-        with nc.Dataset(file) as ncdf:
-            globals = {k: v for k, v in ncdf.__dict__.items() if not k.startswith("_")}
-            # Checks - global attributes
-            if not globals.get("featureType", "").lower() == "timeseriesprofile":
-                warnings.warn("featureType is not a time series profile")
-            # Checks - data
-            time = ncdf.get_variables_by_attributes(axis='T')
-            if len(time) == 0:
-                raise ValueError("No time variable (with attribute 'axis=T') found")
-            if len(time) > 1:
-                raise ValueError("More than one time variable (with attribute 'axis=T') found")
-            if not 'units' in time[0].ncattrs():
-                raise ValueError("Time variable does not have a 'units' attribute")
-            if not 'calendar' in time[0].ncattrs():
-                raise ValueError("Time variable does not have a 'calendar' attribute")
-            depth = ncdf.get_variables_by_attributes(axis='Z')
-            if len(depth) == 0:
-                raise ValueError("No depth variable (with attribute 'axis=Z') found")
-            if len(depth) > 1:
-                raise ValueError("More than one depth variable (with attribute 'axis=Z') found")
-            temperature = ncdf.get_variables_by_attributes(standard_name=lambda x: x in ['temperature_in_ground', 'soil_temperature'])
-            if len(temperature) == 0:
-                raise ValueError("No temperature variable (with standard name 'temperature_in_ground' or 'soil_temperature') found")
-            if len(temperature) > 1:
-                raise ValueError("More than one temperature variable (with standard name 'temperature_in_ground' or 'soil_temperature') found")
-            #  Get data
-            times = nc.num2date(time[0][:],
-                               units=time[0].units,
-                               calendar=time[0].calendar,
-                               only_use_cftime_datetimes=False,
-                               only_use_python_datetimes=True)
-            depths = np.round(np.array(depth[0][:], dtype='float64'), 5)
-            values = temperature[0][:]
-    except NameError:
-        warnings.warn("netCDF4 library must be installed.")
-        return None
-    except ValueError as e:
-        warnings.warn(f"File does not meet formatting requirements: ({e})")
-        return None
-    t = TSP(times=times, depths=depths, values=values, metadata=globals)
-    return t
-def read_ntgs(filename: str) -> TSP:
-    """Read a file from the NTGS permafrost database
-    Parameters
-    ----------
-    filename : str
-        Path to file.
-    Returns
-    -------
-    TSP
-        A TSP
-    """
-    if Path(filename).suffix == ".csv":
-        try:
-            raw = pd.read_csv(filename,
-                              keep_default_na=False,na_values=[''],
-                              parse_dates={"time": ["date_YYYY-MM-DD","time_HH:MM:SS"]})
-        except IndexError:
-            raise IndexError("There are insufficient columns, the file format is invalid.")
-    elif Path(filename).suffix in [".xls", ".xlsx"]:
-        raise NotImplementedError("Convert to CSV")
-        #try:
-        #    raw = pd.read_excel(filename, keep_default_na=False, parse_dates={"time": [4,5]}, date_parser=self.getISOFormat)
-        #except IndexError:
-        #    raise IndexError("There are insufficient columns, the file format is invalid.")
-    else:
-        raise TypeError("Unsupported file extension.")
-    metadata = {
-                'project_name': raw['project_name'].values[0],
-                'site_id': raw['site_id'].values[0],
-                'latitude': raw['latitude'].values[0],
-                'longitude': raw['longitude'].values[0]
-                }
-    match_depths = [c for c in [re.search(r"(-?[0-9\.]+)_m$", C) for C in raw.columns] if c]
-    values = raw.loc[:, [d.group(0) for d in match_depths]].values
-    times = raw['time'].dt.to_pydatetime()
-    t = TSP(times=times,
-              depths=[float(d.group(1)) for d in match_depths],
-              values=values,
-              latitude=raw['latitude'].values[0],
-              longitude=raw['longitude'].values[0],
-              site_id=raw['site_id'].values[0],
-              metadata=metadata)
-    return t
-def read_rbr(file_path: str) -> IndexedTSP:
-    """
-    Parameters
-    ----------
-    filepath
-    Returns
-    -------
-    """
-    file_extention = Path(file_path).suffix.lower()
-    if file_extention in [".dat", ".hex"]:
-        with open(file_path, "r") as f:
-            first_line = f.readline()
-            model = first_line.split()[1]
-            if model == "XL-800":
-                r = RBRXL800()
-            elif model in ["XR-420", "XR-420-T8"]:
-                r = RBRXR420()
-            else:
-                raise ValueError(f"logger model {model} unsupported")
-            data = r.read(file_path)
-    elif file_extention in [".xls", ".xlsx", ".rsk"]:
-        r = RBRXR420()
-        data = r.read(file_path)
-    else:
-        raise IOError("File is not .dat, .hex, .xls, .xlsx, or .rsk")
-    times = data['TIME'].dt.to_pydatetime()
-    channels = pd.Series(data.columns).str.match("^ch")
-    values = data.loc[:, channels.to_numpy()]
-    t = IndexedTSP(times=times, values=values.values, metadata=r.META)
-    if "utc offset" in list(r.META.keys()):
-        t.set_utc_offset(r.META["utc offset"])
-    return t
-def read_permos(filepath:str) -> TSP:
-    """Read file from PERMOS database export
-    Parameters
-    ----------
-    filename : str
-        Path to file.
-    Returns
-    -------
-    TSP
-        A TSP
-    Used for data obtained from PERMOS (permos.ch/data-portal/permafrost-temperature-and-active-layer)
-    """
-    try:
-        raw = pd.read_csv(filepath,
-                          index_col=0,
-                          parse_dates=True)
-    except IndexError:
-        raise IndexError("There are insufficient columns, the file format is invalid.")
-    t = TSP(times=raw.index,
-            depths=[float(C) for C in raw.columns],
-            values=raw.values)
-    return t
+import datetime
+import numpy as np
+import pandas as pd
+import re
+import warnings
+try:
+    import netCDF4 as nc
+except ModuleNotFoundError:
+    warnings.warn("Missing netCDF4 library. Some functionality will be limited.")
+from pathlib import Path
+from typing import Union, Optional
+from tsp.dataloggers.Geoprecision import detect_geoprecision_type
+from tsp.dataloggers.HOBO import HOBO, HOBOProperties
+from tsp.dataloggers.logr import LogR, guessed_depths_ok
+from tsp.dataloggers.RBRXL800 import RBRXL800
+from tsp.dataloggers.RBRXR420 import RBRXR420
+import tsp.tspwarnings as tw
+from tsp.core import TSP, IndexedTSP
+from tsp.misc import _is_depth_column
+from tsp.gtnp import GtnpMetadata
+def read_classic(filepath: str, init_file: "Optional[str]"=None) -> TSP:
+    """Read output from CLASSIC land surface model
+    Depth values, if provided, represent the midpoint of the model cells.
+    Parameters
+    ----------
+    filepath : str
+        Path to an output file
+    init_file : str
+        Path to a classic init file. If provided, depth values will be calculated. Otherwise an :py:class:`~tsp.core.IndexedTSP` is returned
+    Returns
+    -------
+    TSP
+        An IndexedTSP. Use :py:meth:`~tsp.core.IndexedTSP.set_depths` to provide depth information if init_file is not provided.
+    """
+    try:
+        nc
+    except NameError:
+        warnings.warn("netCDF4 library must be installed.")
+    # tbaracc_d / tbaracc_m / tbaracc_y
+    with nc.Dataset(filepath, 'r') as ncdf:
+        lat = ncdf['lat'][:]
+        lon = ncdf['lon'][:]
+        temp = ncdf['tsl'][:]  # t, z
+        try:
+            time = nc.num2date(ncdf['time'][:], ncdf['time'].units, ncdf['time'].calendar,
+                            only_use_cftime_datetimes=False,
+                            only_use_python_datetimes=True)
+        except ValueError:
+            cf_time = nc.num2date(ncdf['time'][:], ncdf['time'].units, ncdf['time'].calendar)
+            time = np.array([datetime.datetime.fromisoformat(t.isoformat()) for t in cf_time])
+    if init_file:
+        with nc.Dataset(init_file, 'r') as init:
+            delz = init["DELZ"][:]
+        depths = np.round(np.cumsum(delz) - np.multiply(delz, 0.5), 7)  # delz precision is lower so we get some very small offsets
+    if len(lat) > 1:
+        warnings.warn("Multiple points in file. Returning the first one found.")
+        # TODO: return Ensemble if multiple points
+        lat = lat[0]
+        lon = lon[0]
+        temp = temp[:,:,0,0]
+    else:
+        temp = temp[:,:,0,0]
+    t = IndexedTSP(times=time, values=temp, latitude=lat, longitude=lon)
+    if init_file:
+        t.set_depths(depths)
+    return t
+def read_csv(filepath: str,
+              datecol: "Union[str, int]",
+              datefmt: str = "%Y-%m-%d %H:%M:%S",
+              depth_pattern: "Union[str, dict]" = r"^(-?[0-9\.]+)$",
+              na_values:list = [],
+              **kwargs) -> TSP:
+    r"""Read an arbitrary CSV file
+    Date and time must be in a single column, and the csv must be in the
+    'wide' data format (each depth is a separate column)
+    Parameters
+    ----------
+    filepath : str
+        Path to csv file
+    datecol : Union[str, int]
+        Either the numeric index (starting at 0) of date column (if int) or name of date column or regular expression (if str)
+    datefmt : str, optional
+        The format of the datetime values. Use `python strftime format codes <https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes>`_,
+        by default ``"%Y-%m-%d %H:%M:%S"``
+    depth_pattern : str or dict
+        If string: A regular expression that matches the column names with depths. The regular expression must
+        have a single capture group that extracts just the numeric part of the column header, by default r"^(-?[0-9\.]+)$".
+        If column names were in the form ``"+/-1.0_m"`` (i.e. included 'm' to denote units), you could use the regular expression ``r"^(-?[0-9\.]+)_m$"``
+        If a dictionary is passed, the keys must be the column names and the values are the depths. This is useful if the column names are not numeric.
+    na_values : list, optional
+        Additional strings to recognize as NA. Passed to pandas.read_csv, by default []
+    Returns
+    -------
+    TSP
+        A TSP
+    """
+    raw = pd.read_csv(filepath, na_values=na_values, **kwargs)
+    if not datecol in raw.columns and isinstance(datecol, str):
+        datecol = [re.search(datecol, c).group(1) for c in raw.columns if re.search(datecol, c)][0]
+    if isinstance(datecol, int):
+        datecol = raw.columns[datecol]
+    time = pd.to_datetime(raw[datecol], format=datefmt).to_numpy()
+    if isinstance(depth_pattern, str):
+        depth = [re.search(depth_pattern, c).group(1) for c in raw.columns if _is_depth_column(c, depth_pattern)]
+        depth_numeric = np.array([float(d) for d in depth])
+    elif isinstance(depth_pattern, dict):
+        depth = [c for c in raw.columns if c in depth_pattern.keys()]
+        depth_numeric = [depth_pattern[c] for c in raw.columns if c in depth_pattern.keys()]
+    else:
+        raise ValueError("depth_pattern must be a string or dictionary")
+    values = raw.loc[:, depth].to_numpy()
+    t = TSP(time, depth_numeric, values)
+    return t
+def read_geoprecision(filepath: str) -> IndexedTSP:
+    """Read a Geoprecision datalogger export (text file)
+    Reads GP5W- and FG2-style files from geoprecision.
+    Parameters
+    ----------
+    filepath : str
+        Path to file.
+    Returns
+    -------
+    IndexedTSP
+        An IndexedTSP
+    """
+    Reader = detect_geoprecision_type(filepath)
+    if Reader is None:
+        raise RuntimeError("Could not detect type of geoprecision file (GP5W or FG2 missing from header")
+    reader = Reader()
+    data = reader.read(filepath)
+    t = IndexedTSP(times=data['TIME'].dt.to_pydatetime(),
+                     values=data.drop("TIME", axis=1).values)
+    t.metadata = reader.META
+    return t
+def read_geotop(file: str) -> TSP:
+    """Read a GEOtop soil temperature output file
+    Parameters
+    ----------
+    file : str
+        Path to file.
+    Returns
+    -------
+    TSP
+        A TSP
+    Description
+    -----------
+    Only the last run of the last simulation period is returned. This is because GEOtop outputs
+    all runs of all simulation periods in the same file. This function will only return the last
+    run of the last simulation period.
+    """
+    with warnings.catch_warnings():
+        warnings.filterwarnings("ignore", category=tw.DuplicateTimesWarning)
+        t = read_csv(file,
+                    na_values=[-9999.0],
+                    datecol="^(Date.*)",
+                    datefmt=r"%d/%m/%Y %H:%M",
+                    depth_pattern=r"^(-?[0-9\.]+\s*)$")
+    t._depths *= 0.001  # Convert to [m]
+    # Only use last simulation period
+    # TODO: this could be improved
+    raw = pd.read_csv(file)
+    is_max_sim_period = raw['Simulation_Period'] == max( raw['Simulation_Period'])
+    is_last_run_in_max_sim_period = raw['Run'] = raw['Run'][is_max_sim_period].max()
+    last_run = np.logical_and(is_max_sim_period, is_last_run_in_max_sim_period)
+    last = TSP(times = t.times[last_run],
+               depths = t.depths,
+               values = t.values[last_run, :],
+               metadata={"Simulation_Period": max(raw['Simulation_Period']),
+                         "Run": max( raw['Run'] )
+                         }
+    )
+    return last
+def read_gtnp(filename: str,
+              metadata_filepath=None,
+              autodetect_metadata=True) -> TSP:
+    """Read test file from GTN-P database export
+    Parameters
+    ----------
+    filename : str
+        Path to file.
+    metadata_file : str, optional
+        Path to GTN-P metadata file (), by default None
+    Returns
+    -------
+    TSP
+        A TSP
+    """
+    t = read_csv(filename,
+                   na_values=[-999.0],
+                   datecol="Date/Depth",
+                   datefmt="%Y-%m-%d %H:%M:%S",
+                   depth_pattern=r"^(-?[0-9\.]+)$")
+    # try to automatically detect metadata file
+    if metadata_filepath is None and autodetect_metadata:
+        partial_name = Path(filename).stem
+        while partial_name:
+            test_metadata = Path(Path(filename).parent, partial_name).with_suffix(".metadata.txt")
+            if test_metadata.is_file():
+                metadata_filepath = test_metadata
+                break
+            else:
+                partial_name = partial_name[:-1]
+    if metadata_filepath is not None:
+        try:
+            meta = GtnpMetadata(metadata_filepath)
+        except Exception as e:
+            warnings.warn(f"Failed to read metadata file: {e}")
+            return t
+        t.metadata['raw'] = meta.raw
+        t.metadata['parsed'] = meta.parsed
+        # set time zone
+        tz = meta.get_timezone()
+        if tz:
+            t.set_utc_offset(int(tz.utcoffset(datetime.datetime.now()).total_seconds()))
+        # set location
+        t.latitude = meta.get_latitude() if meta.get_latitude() else None
+        t.longitude = meta.get_longitude() if meta.get_longitude() else None
+    return t
+def read_gtpem(file: str) -> "list[TSP]":
+    output = list()
+    try:
+        with nc.Dataset(file) as ncdf:
+            n_sim = len(ncdf['geotop']['sitename'][:])
+            time = 1
+            for i, name in enumerate(ncdf['geotop']['sitename'][:]):
+                pass
+                #t = TSP()
+    except NameError:
+        warnings.warn("netCDF4 library must be installed.")
+    return output
+def read_hoboware(filepath: str, hoboware_config: Optional[HOBOProperties]=None) -> IndexedTSP:
+    """Read Onset HoboWare datalogger exports
+    Parameters
+    ----------
+    filepath : str
+        Path to a file
+    hoboware_config : HOBOProperties, optional
+        A HOBOProperties object with information about how the file is configured. If not
+        provided, the configuration will be automatically detected if possible, by default None
+    Returns
+    -------
+    IndexedTSP
+        An IndexedTSP. Use the `set_depths` method to provide depth information
+    """
+    reader = HOBO(properties=hoboware_config)
+    data = reader.read(filepath)
+    t = IndexedTSP(times=data['TIME'],
+                     values=data.drop("TIME", axis=1).values)
+    return t
+def read_logr(filepath: str) -> "Union[IndexedTSP,TSP]":
+    """Read a LogR datalogger export (text file)
+    Reads LogR ULogC16-32 files.
+    Parameters
+    ----------
+    filepath : str
+        Path to file.
+    Returns
+    -------
+    IndexedTSP, TSP
+        An IndexedTSP or TSP, depending on whether the depth labels are sensible
+    """
+    r = LogR()
+    data = r.read(filepath)
+    times = data['TIME'].dt.to_pydatetime()
+    channels = pd.Series(data.columns).str.match("^CH")
+    values = data.loc[:, channels.to_numpy()]
+    if guessed_depths_ok(r.META['guessed_depths'], sum(channels)):
+        t = TSP(times=times,
+                depths=r.META['guessed_depths'][-sum(channels):],
+                values=values.values,)
+    else:
+        warnings.warn(f"Could not convert all channel labels into numeric depths."
+                      "Use the set_depths() method to specify observation depths."
+                      "Guessed depths can be accessed from .metadata['guessed_depths'].")
+        t = IndexedTSP(times=times,
+                       values=values.values,
+                       metadata = r.META)
+    return t
+def read_netcdf(file:str, standard_name='temperature_in_ground') -> TSP:
+    """Read a CF-compliant netCDF file
+    Parameters
+    ----------
+    file : str
+        Path to netCDF file.
+    standard_name : str, optional
+        The standard name of the data variable, by default 'temperature_in_ground'.
+        'soil_temperature' is also common.
+    The file must represent data from a single location
+    A single time variable (with attribute 'axis=T') must be present.
+    A single depth variable (with attribute 'axis=Z') must be present.
+    A single data variable (with 'temperature_in_ground' or '' 'standard name' either ) must be present.
+    """
+    try:
+        with nc.Dataset(file) as ncdf:
+            globals = {k: v for k, v in ncdf.__dict__.items() if not k.startswith("_")}
+            # Checks - global attributes
+            if not globals.get("featureType", "").lower() == "timeseriesprofile":
+                warnings.warn("featureType is not a time series profile")
+            # Checks - data
+            time = ncdf.get_variables_by_attributes(axis='T')
+            if len(time) == 0:
+                raise ValueError("No time variable (with attribute 'axis=T') found")
+            if len(time) > 1:
+                raise ValueError("More than one time variable (with attribute 'axis=T') found")
+            if not 'units' in time[0].ncattrs():
+                raise ValueError("Time variable does not have a 'units' attribute")
+            if not 'calendar' in time[0].ncattrs():
+                raise ValueError("Time variable does not have a 'calendar' attribute")
+            depth = ncdf.get_variables_by_attributes(axis='Z')
+            if len(depth) == 0:
+                raise ValueError("No depth variable (with attribute 'axis=Z') found")
+            if len(depth) > 1:
+                raise ValueError("More than one depth variable (with attribute 'axis=Z') found")
+            temperature = ncdf.get_variables_by_attributes(standard_name=lambda x: x in ['temperature_in_ground', 'soil_temperature'])
+            if len(temperature) == 0:
+                raise ValueError("No temperature variable (with standard name 'temperature_in_ground' or 'soil_temperature') found")
+            if len(temperature) > 1:
+                raise ValueError("More than one temperature variable (with standard name 'temperature_in_ground' or 'soil_temperature') found")
+            #  Get data
+            times = nc.num2date(time[0][:],
+                               units=time[0].units,
+                               calendar=time[0].calendar,
+                               only_use_cftime_datetimes=False,
+                               only_use_python_datetimes=True)
+            depths = np.round(np.array(depth[0][:], dtype='float64'), 5)
+            values = temperature[0][:]
+    except NameError:
+        warnings.warn("netCDF4 library must be installed.")
+        return None
+    except ValueError as e:
+        warnings.warn(f"File does not meet formatting requirements: ({e})")
+        return None
+    t = TSP(times=times, depths=depths, values=values, metadata=globals)
+    return t
+def read_ntgs(filename: str) -> TSP:
+    """Read a file from the NTGS permafrost database
+    Parameters
+    ----------
+    filename : str
+        Path to file.
+    Returns
+    -------
+    TSP
+        A TSP
+    """
+    if Path(filename).suffix == ".csv":
+        try:
+            raw = pd.read_csv(filename,
+                              keep_default_na=False,na_values=[''],
+                              parse_dates={"time": ["date_YYYY-MM-DD","time_HH:MM:SS"]})
+        except IndexError:
+            raise IndexError("There are insufficient columns, the file format is invalid.")
+    elif Path(filename).suffix in [".xls", ".xlsx"]:
+        raise NotImplementedError("Convert to CSV")
+        #try:
+        #    raw = pd.read_excel(filename, keep_default_na=False, parse_dates={"time": [4,5]}, date_parser=self.getISOFormat)
+        #except IndexError:
+        #    raise IndexError("There are insufficient columns, the file format is invalid.")
+    else:
+        raise TypeError("Unsupported file extension.")
+    metadata = {
+                'project_name': raw['project_name'].values[0],
+                'site_id': raw['site_id'].values[0],
+                'latitude': raw['latitude'].values[0],
+                'longitude': raw['longitude'].values[0]
+                }
+    match_depths = [c for c in [re.search(r"(-?[0-9\.]+)_m$", C) for C in raw.columns] if c]
+    values = raw.loc[:, [d.group(0) for d in match_depths]].values
+    times = raw['time'].dt.to_pydatetime()
+    t = TSP(times=times,
+              depths=[float(d.group(1)) for d in match_depths],
+              values=values,
+              latitude=raw['latitude'].values[0],
+              longitude=raw['longitude'].values[0],
+              site_id=raw['site_id'].values[0],
+              metadata=metadata)
+    return t
+def read_rbr(file_path: str) -> IndexedTSP:
+    """
+    Parameters
+    ----------
+    filepath
+    Returns
+    -------
+    """
+    file_extention = Path(file_path).suffix.lower()
+    if file_extention in [".dat", ".hex"]:
+        with open(file_path, "r") as f:
+            first_line = f.readline()
+            model = first_line.split()[1]
+            if model == "XL-800":
+                r = RBRXL800()
+            elif model in ["XR-420", "XR-420-T8"]:
+                r = RBRXR420()
+            else:
+                raise ValueError(f"logger model {model} unsupported")
+            data = r.read(file_path)
+    elif file_extention in [".xls", ".xlsx", ".rsk"]:
+        r = RBRXR420()
+        data = r.read(file_path)
+    else:
+        raise IOError("File is not .dat, .hex, .xls, .xlsx, or .rsk")
+    times = data['TIME'].dt.to_pydatetime()
+    channels = pd.Series(data.columns).str.match("^ch")
+    values = data.loc[:, channels.to_numpy()]
+    t = IndexedTSP(times=times, values=values.values, metadata=r.META)
+    if "utc offset" in list(r.META.keys()):
+        t.set_utc_offset(r.META["utc offset"])
+    return t
+def read_permos(filepath:str) -> TSP:
+    """Read file from PERMOS database export
+    Parameters
+    ----------
+    filename : str
+        Path to file.
+    Returns
+    -------
+    TSP
+        A TSP
+    Used for data obtained from PERMOS (permos.ch/data-portal/permafrost-temperature-and-active-layer)
+    """
+    try:
+        raw = pd.read_csv(filepath,
+                          index_col=0,
+                          parse_dates=True)
+    except IndexError:
+        raise IndexError("There are insufficient columns, the file format is invalid.")
+    t = TSP(times=raw.index,
+            depths=[float(C) for C in raw.columns],
+            values=raw.values)
+    return t

tsp 1.7.7__py3-none-any.whl → 1.8.1__py3-none-any.whl

tsp 1.7.7py3-none-any.whl → 1.8.1py3-none-any.whl