PyPI - tsp - Versions diffs - 1.7.7__py3-none-any.whl → 1.8.0__py3-none-any.whl - Mend

tsp 1.7.7py3-none-any.whl → 1.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of tsp might be problematic. Click here for more details.

Files changed (92) hide show

tsp/__init__.py +11 -11
tsp/__meta__.py +1 -1
tsp/concatenation.py +153 -0
tsp/core.py +1108 -1035
tsp/data/2023-01-06_755-test-Dataset_2031-Constant_Over_Interval-Hourly-Ground_Temperature-Thermistor_Automated.timeserie.csv +4 -4
tsp/data/2023-01-06_755-test.metadata.txt +208 -208
tsp/data/NTGS_example_csv.csv +6 -6
tsp/data/NTGS_example_slash_dates.csv +6 -6
tsp/data/example_geotop.csv +5240 -5240
tsp/data/example_gtnp.csv +1298 -1298
tsp/data/example_permos.csv +7 -7
tsp/data/test_geotop_has_space.txt +5 -5
tsp/dataloggers/AbstractReader.py +43 -43
tsp/dataloggers/FG2.py +110 -110
tsp/dataloggers/GP5W.py +114 -114
tsp/dataloggers/Geoprecision.py +34 -34
tsp/dataloggers/HOBO.py +914 -914
tsp/dataloggers/RBRXL800.py +190 -190
tsp/dataloggers/RBRXR420.py +308 -308
tsp/dataloggers/__init__.py +15 -15
tsp/dataloggers/logr.py +115 -115
tsp/dataloggers/test_files/004448.DAT +2543 -2543
tsp/dataloggers/test_files/004531.DAT +17106 -17106
tsp/dataloggers/test_files/004531.HEX +3587 -3587
tsp/dataloggers/test_files/004534.HEX +3587 -3587
tsp/dataloggers/test_files/010252.dat +1731 -1731
tsp/dataloggers/test_files/010252.hex +1739 -1739
tsp/dataloggers/test_files/010274.hex +1291 -1291
tsp/dataloggers/test_files/010278.hex +3544 -3544
tsp/dataloggers/test_files/012064.dat +1286 -1286
tsp/dataloggers/test_files/012064.hex +1294 -1294
tsp/dataloggers/test_files/012081.hex +3532 -3532
tsp/dataloggers/test_files/07B1592.DAT +1483 -1483
tsp/dataloggers/test_files/07B1592.HEX +1806 -1806
tsp/dataloggers/test_files/07B4450.DAT +2234 -2234
tsp/dataloggers/test_files/07B4450.HEX +2559 -2559
tsp/dataloggers/test_files/FG2_399.csv +9881 -9881
tsp/dataloggers/test_files/GP5W.csv +1121 -1121
tsp/dataloggers/test_files/GP5W_260.csv +1884 -1884
tsp/dataloggers/test_files/GP5W_270.csv +2210 -2210
tsp/dataloggers/test_files/H08-030-08_HOBOware.csv +998 -998
tsp/dataloggers/test_files/RBR_01.dat +1046 -1046
tsp/dataloggers/test_files/RBR_02.dat +2426 -2426
tsp/dataloggers/test_files/RSTDT2055.csv +2152 -2152
tsp/dataloggers/test_files/U23-001_HOBOware.csv +1001 -1001
tsp/dataloggers/test_files/hobo-negative-2.txt +6396 -6396
tsp/dataloggers/test_files/hobo-negative-3.txt +5593 -5593
tsp/dataloggers/test_files/hobo-positive-number-1.txt +1000 -1000
tsp/dataloggers/test_files/hobo-positive-number-2.csv +1003 -1003
tsp/dataloggers/test_files/hobo-positive-number-3.csv +1133 -1133
tsp/dataloggers/test_files/hobo-positive-number-4.csv +1209 -1209
tsp/dataloggers/test_files/hobo2.csv +8702 -8702
tsp/dataloggers/test_files/hobo_1_AB.csv +21732 -21732
tsp/dataloggers/test_files/hobo_1_AB_Details.txt +133 -133
tsp/dataloggers/test_files/hobo_1_AB_classic.csv +4373 -4373
tsp/dataloggers/test_files/hobo_1_AB_defaults.csv +21732 -21732
tsp/dataloggers/test_files/hobo_1_AB_minimal.txt +1358 -1358
tsp/dataloggers/test_files/hobo_1_AB_var2.csv +3189 -3189
tsp/dataloggers/test_files/hobo_1_AB_var3.csv +2458 -2458
tsp/dataloggers/test_files/logR_ULogC16-32_1.csv +106 -106
tsp/dataloggers/test_files/logR_ULogC16-32_2.csv +100 -100
tsp/dataloggers/test_files/mon_3_Ta_2010-08-18_2013-02-08.txt +21724 -21724
tsp/dataloggers/test_files/rbr_001.dat +1133 -1133
tsp/dataloggers/test_files/rbr_001.hex +1139 -1139
tsp/dataloggers/test_files/rbr_001_no_comment.dat +1132 -1132
tsp/dataloggers/test_files/rbr_001_no_comment.hex +1138 -1138
tsp/dataloggers/test_files/rbr_002.dat +1179 -1179
tsp/dataloggers/test_files/rbr_002.hex +1185 -1185
tsp/dataloggers/test_files/rbr_003.hex +1292 -1292
tsp/dataloggers/test_files/rbr_003.xls +0 -0
tsp/dataloggers/test_files/rbr_xl_001.DAT +1105 -1105
tsp/dataloggers/test_files/rbr_xl_002.DAT +1126 -1126
tsp/dataloggers/test_files/rbr_xl_003.DAT +4622 -4622
tsp/dataloggers/test_files/rbr_xl_003.HEX +3587 -3587
tsp/gtnp.py +148 -148
tsp/labels.py +3 -3
tsp/misc.py +90 -90
tsp/physics.py +101 -101
tsp/plots/static.py +373 -373
tsp/readers.py +548 -548
tsp/time.py +45 -45
tsp/tspwarnings.py +14 -14
tsp/utils.py +101 -101
tsp/version.py +1 -1
{tsp-1.7.7.dist-info → tsp-1.8.0.dist-info}/METADATA +30 -23
tsp-1.8.0.dist-info/RECORD +94 -0
{tsp-1.7.7.dist-info → tsp-1.8.0.dist-info}/WHEEL +5 -5
{tsp-1.7.7.dist-info → tsp-1.8.0.dist-info/licenses}/LICENSE +674 -674
tsp/dataloggers/test_files/CSc_CR1000_1.dat +0 -295
tsp/scratch.py +0 -6
tsp-1.7.7.dist-info/RECORD +0 -95
{tsp-1.7.7.dist-info → tsp-1.8.0.dist-info}/top_level.txt +0 -0

tsp/core.py CHANGED Viewed

@@ -1,1035 +1,1108 @@
-from __future__ import annotations
-import pandas as pd
-import re
-import inspect
-import numpy as np
-import functools
-import warnings
-try:
-    import netCDF4 as nc
-    try:
-        from pfit.pfnet_standard import make_temperature_base
-    except ModuleNotFoundError:
-        warnings.warn("Missing pfit library. Some functionality will be limited.", stacklevel=2)
-except ModuleNotFoundError:
-    warnings.warn("Missing netCDF4 library. Some functionality will be limited.", stacklevel=2)
-from typing import Union, Optional
-from datetime import datetime, tzinfo, timezone, timedelta
-import tsp
-import tsp.labels as lbl
-import tsp.tspwarnings as tw
-from tsp.physics import analytical_fourier
-from tsp.plots.static import trumpet_curve, colour_contour, time_series, profile_evolution
-from tsp.time import format_utc_offset
-from tsp.time import get_utc_offset
-from tsp.misc import completeness
-from matplotlib.figure import Figure
-class TSP:
-    """ A Time Series Profile (a collection of time series data at different depths)
-    A TSP can also be:
-    Thermal State of Permafrost
-    Temperature du Sol en Profondeur
-    Temperatures, Secondes, Profondeurs
-    Parameters
-    ----------
-    times : pandas.DatetimeIndex
-        DatetimeIndex with optional UTC offset. List-like array of datetime objects can also be passed,
-        but will be converted to a DatetimeIndex with no UTC offset.
-    depths : list-like
-        d-length array of depths
-    values : numpy.ndarray
-        array with shape (t,d) containing values at (t)emperatures and (d)epths
-    longitude : float, optional
-        Longitude at which data were collected
-    latitude : float, optional
-        Latitude at which data were collected
-    site_id : str, optional
-        Name of location at which data were collected
-    metadata : dict
-        Additional metadata
-    Attributes
-    ----------
-    values
-    latitude : float
-        Latitude at which data were collected
-    longitude : float
-        Longitude at which data were collected
-    metadata : dict
-        Additional metadata provided at instantiation or by other methods
-    """
-    def __repr__(self) -> str:
-        return repr(self.wide)
-    def __str__(self) -> str:
-        return str(self.wide)
-    def __init__(self, times, depths, values,
-                 latitude: Optional[float]=None,
-                 longitude: Optional[float]=None,
-                 site_id: Optional[str]=None,
-                 metadata: dict={}):
-        self._times = handle_incoming_times(times)
-        if self._times.duplicated().any():
-            warnings.warn(tw.DuplicateTimesWarning(self._times), stacklevel=2)
-        if self.utc_offset:
-            self._output_utc_offset = self.utc_offset
-        else:
-            self._output_utc_offset = None
-        self._depths = np.atleast_1d(depths)
-        self._values = np.atleast_2d(values)
-        self.__number_of_observations = np.ones_like(values, dtype=int)
-        self.__number_of_observations[np.isnan(values)] = 0
-        self.metadata = metadata
-        self.latitude = latitude
-        self.longitude = longitude
-        self.site_id = site_id
-        self._freq = None
-        self._completeness = None
-    @property
-    def freq(self) -> Optional[int]:
-        """ Measurement frequency [s] """
-        return self._freq
-    @freq.setter
-    def freq(self, value: int):
-        if not isinstance(value, int):
-            raise TypeError("Must be string, e.g. '1D', '3600s'")
-        self._freq = value
-    @property
-    def completeness(self) -> Optional[pd.DataFrame]:
-        """ Data completeness """
-        return self._completeness
-    @completeness.setter
-    def completeness(self, value):
-        raise ValueError("You can't assign this variable.")
-    @classmethod
-    def from_tidy_format(cls, times, depths, values,
-                        number_of_observations=None,
-                        latitude: Optional[float]=None,
-                        longitude: Optional[float]=None,
-                        site_id: Optional[str]=None,
-                        metadata:dict={}):
-        """ Create a TSP from data in a 'tidy' or 'long' format
-        Parameters
-        ----------
-        times : list-like
-            n-length array of datetime objects
-        depths : list-like
-            n-length array of depths
-        values : numpy.ndarray
-            n-length array of (temperaure) values at associated time and depth
-        number_of_observations : numpy.ndarray, optional
-            n-length array of number of observations at associated time and
-            depth for aggregated values (default: 1)
-        longitude : float, optional
-            Longitude at which data were collected
-        latitude : float, optional
-            Latitude at which data were collected
-        site_id : str, optional
-            Name of location at which data were collected
-        metadata : dict
-            Additional metadata
-        """
-        times = np.atleast_1d(times)
-        depths = np.atleast_1d(depths)
-        values = np.atleast_1d(values)
-        number_of_observations = number_of_observations if number_of_observations else np.ones_like(values)
-        df = pd.DataFrame({"times": times, "depths": depths, "temperature_in_ground": values, "number_of_observations": number_of_observations})
-        df.set_index(["times", "depths"], inplace=True)
-        try:
-            unstacked = df.unstack()
-        except ValueError as e:
-            if np.any(df.index.duplicated()):
-                print(f"Duplicate data found at {df.iloc[np.where(df.index.duplicated())[0], :].index.get_level_values(0).unique()}")
-            raise e
-        temps = unstacked.get('temperature_in_ground')
-        this = cls(times=temps.index.values,
-                   depths=temps.columns.values,
-                   values=temps.values,
-                   latitude=latitude,
-                   longitude=longitude,
-                   site_id=site_id,
-                   metadata=metadata)
-        number_of_observations = unstacked.get('number_of_observations').values
-        number_of_observations[np.isnan(number_of_observations)] = 0
-        this.__number_of_observations = number_of_observations
-        return this
-    @classmethod
-    def __from_tsp(cls, t:TSP, **kwargs) -> "TSP":
-        """ Use an existing TSP object as a template, """
-        kw = {}
-        for arg in inspect.getfullargspec(TSP).args[1:]:
-            if kwargs.get(arg) is not None:
-                kw[arg] = kwargs.get(arg)
-            else:
-                kw[arg] = getattr(t, arg)
-        t = TSP(**kw)
-        return t
-    @classmethod
-    def from_json(cls, json_file) -> "TSP":
-        """ Read data from a json file
-        Parameters
-        ----------
-        json_file : str
-            Path to a json file from which to read
-        """
-        df = pd.read_json(json_file)
-        depth_pattern = r"^(-?[0-9\.]+)$"
-        times = pd.to_datetime(df['time']).values
-        depths = [re.search(depth_pattern, c).group(1) for c in df.columns if tsp._is_depth_column(c, depth_pattern)]
-        values = df.loc[:, depths].to_numpy()
-        t = cls(times=times, depths=depths, values=values)
-        return t
-    @classmethod
-    def synthetic(cls, depths: "np.ndarray", start="2000-01-01", end="2003-01-01",
-                  Q:"Optional[float]"=0.2,
-                  c:"Optional[float]"=1.6e6,
-                  k:"Optional[float]"=2.5,
-                  A:"Optional[float]"=6,
-                  MAGST:"Optional[float]"=-0.5) -> "TSP":
-        """
-        Create a 'synthetic' temperature time series using the analytical solution to the heat conduction equation.
-        Suitable for testing
-        Parameters
-        ----------
-        depths : np.ndarray
-            array of depths in m
-        start : str
-            array of times in seconds
-        Q : Optional[float], optional
-            Ground heat flux [W m-2], by default 0.2
-        c : Optional[float], optional
-            heat capacity [J m-3 K-1], by default 1.6e6
-        k : Optional[float], optional
-            thermal conductivity [W m-1 K-1], by default 2.5
-        A : Optional[float], optional
-            Amplitude of temperature fluctuation [C], by default 6
-        MAGST : Optional[float], optional
-            Mean annual ground surface temperature [C], by default -0.5
-        Returns
-        -------
-        TSP
-            A timeseries profile (TSP) object
-        """
-        times = pd.date_range(start=start, end=end).to_pydatetime()
-        t_sec = np.array([(t-times[0]).total_seconds() for t in times])
-        values = analytical_fourier(depths=depths, times=t_sec, Q=Q, c=c, k=k, A=A, MAGST=MAGST)
-        this = cls(depths=depths, times=times, values=values)
-        return this
-    @property
-    @functools.lru_cache()
-    def long(self) -> "pd.DataFrame":
-        """ Return the data in a 'long' or 'tidy' format (one row per observation, one column per variable)
-        Returns
-        -------
-        pandas.DataFrame
-            Time series profile data with columns:
-                - **time**: time
-                - **depth**: depth
-                - **temperature_in_ground**: temperature
-                - **number_of_observations**: If data are aggregated, how many observations are used in the aggregation
-        """
-        values = self.wide.melt(id_vars='time',
-                                var_name="depth",
-                                value_name="temperature_in_ground")
-        number_of_observations = self.number_of_observations.melt(id_vars='time',
-                                  var_name="depth",
-                                  value_name="number_of_observations")
-        values['number_of_observations'] = number_of_observations['number_of_observations']
-        return values
-    @property
-    @functools.lru_cache()
-    def wide(self) -> "pd.DataFrame":
-        """ Return the data in a 'wide' format (one column per depth)
-        Returns
-        -------
-        pandas.DataFrame
-            Time series profile data
-        """
-        tabular = pd.DataFrame(self._values)
-        tabular.columns = self._depths
-        tabular.index = self.times
-        tabular.insert(0, "time", self.times)
-        return tabular
-    @property
-    @functools.lru_cache()
-    def number_of_observations(self) -> "pd.DataFrame":
-        """ The number of observations for an average at a particular depth or time.
-        For pure observational data, the number of observations will always be '1'. When data are aggregated,
-        (e.g. using :py:meth:`~tsp.core.TSP.monthly` or :py:meth:`~tsp.core.TSP.daily`) these numbers
-        will be greater than 1.
-        Returns
-        -------
-        DataFrame
-            Number of observations
-        """
-        tabular = pd.DataFrame(self.__number_of_observations, dtype=int)
-        tabular.columns = self._depths
-        tabular.index = self._times
-        tabular.insert(0, "time", self._times)
-        return tabular
-    @number_of_observations.setter
-    def number_of_observations(self, value):
-        raise ValueError(f"You can't assign {value} to this variable (no assignment allowed).")
-    def reset_counts(self):
-        """ Set observation count to 1 if data exists, 0 otherwise """
-        self.__number_of_observations = (~self.wide.isna()).astype('boolean')
-    def set_utc_offset(self, offset:"Union[int,str]") -> None:
-        """ Set the time zone of the data by providing a UTC offset
-        Parameters
-        ----------
-        offset : int, str
-            If int, the number of seconds. If str, a string in the format "+HH:MM" or "-HH:MM"
-        """
-        if self.utc_offset is not None:
-            raise ValueError("You can only set the UTC offset once.")
-        utc_offset = get_utc_offset(offset)
-        tz = timezone(timedelta(seconds = utc_offset))
-        self._times = self._times.tz_localize(tz)
-        self._output_utc_offset = timezone(timedelta(seconds = utc_offset))
-        TSP.wide.fget.cache_clear()
-        TSP.long.fget.cache_clear()
-    @property
-    def utc_offset(self) -> "Optional[tzinfo]":
-        """ Get the time zone of the data by providing a UTC offset
-        Returns
-        -------
-        datetime.tzinfo
-            A timezone object
-        """
-        if self._times.tz is None:
-            return None
-        else:
-            return self._times.tz
-    @utc_offset.setter
-    def utc_offset(self, value):
-        self.set_utc_offset(value)
-    @property
-    def output_utc_offset(self) -> "Optional[tzinfo]":
-        """ Get the time zone in which to output or display the data by providing a UTC offset
-        Returns
-        -------
-        datetime.tzinfo
-            A timezone object
-        """
-        if self._output_utc_offset is None:
-            return None
-        else:
-            return self._output_utc_offset
-    @output_utc_offset.setter
-    def output_utc_offset(self, offset:"Union[int,str]") -> None:
-        self.set_output_utc_offset(offset)
-    def set_output_utc_offset(self, offset:"Union[int,str]") -> None:
-        """ Set the time zone in which to display the output or data by providing a UTC offset
-        Parameters
-        ----------
-        offset : int, str
-            If int, the number of seconds. If str, a string in the format "+HH:MM" or "-HH:MM"
-        """
-        utc_offset = get_utc_offset(offset)
-        tz = timezone(timedelta(seconds = utc_offset))
-        self._output_utc_offset = tz
-        TSP.wide.fget.cache_clear()
-        TSP.long.fget.cache_clear()
-    def reset_output_utc_offset(self) -> None:
-        """ Reset the time zone in which to output or display the data to the default (the one set by set_utc_offset)
-        """
-        if self.utc_offset is None:
-            raise ValueError("You can't reset the output time zone if the time zone of the data hasn't yet been set with set_utc_offset.")
-        else:
-            self._output_utc_offset = self.utc_offset
-    def __nly(self,
-              freq_fmt:str,
-              new_freq,
-              min_count:Optional[int],
-              max_gap:Optional[int],
-              min_span:Optional[int]) -> TSP:
-        """
-        Temporal aggregation by grouping according to a string-ified time
-        Parameters
-        ----------
-        freq_fmt : str
-            Python date format string  used to aggregate and recover time
-        Returns
-        -------
-        tuple[pd.DataFrame, pd.DataFrame]
-            A tuple of dataframes, the first containing the aggregated data, the second containing the number of observations
-        """
-        R = self.wide.drop("time", axis=1).resample(freq_fmt)
-        cumulative_obs = self.number_of_observations.drop("time", axis=1).resample(freq_fmt).sum()
-        total_obs = R.count()
-        values = R.mean()
-        # Calculate masks
-        mc_mask = Mg_mask = ms_mask = pd.DataFrame(index=values.index, columns=values.columns, data=False)
-        if min_count is not None:
-            mc_mask = (cumulative_obs < min_count)
-        if max_gap is not None:
-            Mg_mask = max_gap_mask(R, max_gap)
-        if min_span is not None:
-            ms_mask = min_span_mask(R, min_span)
-        mask = (mc_mask | Mg_mask | ms_mask)
-        values[mask] = np.nan
-        # Construct TSP
-        t = TSP.__from_tsp(self, times=values.index,
-                           depths=values.columns,
-                           values=values.values)
-        t.__number_of_observations = cumulative_obs
-        t.freq = new_freq
-        # Calculate data completeness
-        if self.freq is not None:
-            f1 = self.freq
-            f2 = new_freq
-            t._completeness = completeness(total_obs, f1, f2)
-        return t
-    def monthly(self,
-                min_count:Optional[int]=24,
-                max_gap:Optional[int]=3600*24*8,
-                min_span:Optional[int]=3600*24*21) -> "TSP":
-        """ Monthly averages, possibly with some months unavailable (NaN) if there is insufficient data
-        Parameters
-        ----------
-        min_count : int
-            Minimum number of observations in a month to be considered a valid average,
-            defaults to None
-        max_gap : int
-            Maximum gap (in seconds) between data points to be considered a valid average,
-             defaults to None
-        min_span : int
-            Minimum total data range (in seconds) to be consiered a valid average,
-            defaults to None
-        Returns
-        -------
-        TSP
-            A TSP object with data aggregated to monthly averages
-        """
-        t = self.__nly(freq_fmt="M",
-                       new_freq=lbl.MONTHLY,
-                       min_count=min_count,
-                       max_gap=max_gap,
-                       min_span=min_span)
-        return t
-    def daily(self,
-              min_count:Optional[int]=None,
-              max_gap:Optional[int]=None,
-              min_span:Optional[int]=None) -> "TSP":
-        """ Daily averages, possibly with some days unavailable (NaN) if there is insufficient data
-        Parameters
-        ----------
-        min_count : int
-            Minimum number of observations in a day to be considered a valid average,
-            defaults to None
-        max_gap : int
-            Maximum gap (in seconds) between data points to be considered a valid average, defaults to None
-        min_span : int
-            Minimum total data range (in seconds) to be consiered a valid average, defaults to None
-        Returns
-        -------
-        TSP
-            A TSP object with data aggregated to daily averages
-        """
-        # if the data is already daily +/- 1min , just return it
-        t = self.__nly(freq_fmt="D",
-                new_freq=lbl.DAILY,
-                min_count=min_count,
-                max_gap=max_gap,
-                min_span=min_span)
-        return t
-    def yearly(self,
-               min_count:Optional[int]=None,
-               max_gap:Optional[int]=None,
-               min_span:Optional[int]=None) -> "TSP":
-        """ Yearly averages, possibly with some years unavailable (NaN) if there is insufficient data
-        Parameters
-        ----------
-        min_count : int
-            Minimum number of observations in a month to be considered a valid average, defaults to None
-        max_gap : int
-            Maximum gap (in seconds) between data points to be considered a valid average, defaults to None
-        min_span : int
-            Minimum total data range (in seconds) to be consiered a valid average, defaults to None
-        Returns
-        -------
-        TSP
-            A TSP object with data aggregated to yearly averages
-        """
-        t = self.__nly(freq_fmt="Y",
-                new_freq=lbl.YEARLY,
-                min_count=min_count,
-                max_gap=max_gap,
-                min_span=min_span)
-        return t
-    @property
-    def depths(self) -> "np.ndarray":
-        """ Return the depth values in the profile
-        Returns
-        -------
-        numpy.ndarray
-            The depths in the profile
-        """
-        return self._depths
-    @depths.setter
-    def depths(self, value):
-        depths = np.atleast_1d(value)
-        if not len(depths) == len(self._depths):
-            raise ValueError(f"List of depths must have length of {len(self._depths)}.")
-        self._depths = depths
-        TSP.wide.fget.cache_clear()
-        TSP.long.fget.cache_clear()
-    @property
-    def times(self):
-        """ Return the timestamps in the time series
-        Returns
-        -------
-        pandas.DatetimeIndex
-            The timestamps in the time series
-        """
-        if self.utc_offset is None:
-            return self._times
-        elif self._output_utc_offset == self.utc_offset:
-            return self._times
-        else:
-            return self._times.tz_convert(self.output_utc_offset)
-    @property
-    def values(self):
-        return self._values
-    def to_gtnp(self, filename: str) -> None:
-        """ Write the data in GTN-P format
-        Parameters
-        ----------
-        filename : str
-            Path to the file to write to
-        """
-        df = self.wide.rename(columns={'time': 'Date/Depth'})
-        df['Date/Depth'] = df['Date/Depth'].dt.strftime("%Y-%m-%d %H:%M:%S")
-        df.to_csv(filename, index=False, na_rep="-999")
-    def to_ntgs(self, filename:str, project_name:str="", site_id:"Optional[str]" = None, latitude:"Optional[float]"=None, longitude:"Optional[float]"=None) -> None:
-        """ Write the data in NTGS template format
-        Parameters
-        ----------
-        filename : str
-            Path to the file to write to
-        project_name : str, optional
-            The project name, by default ""
-        site_id : str, optional
-            The name of the site , by default None
-        latitude : float, optional
-            WGS84 latitude at which the observations were recorded, by default None
-        longitude : float, optional
-            WGS84 longitude at which the observations were recorded, by default None
-        """
-        if latitude is None:
-            latitude = self.latitude if self.latitude is not None else ""
-        if longitude is None:
-            longitude = self.longitude if self.longitude is not None else ""
-        if site_id is None:
-                site_id = self.site_id if self.site_id is not None else ""
-        data = self.values
-        df = pd.DataFrame({'project_name': pd.Series(dtype='str'),
-                           'site_id': pd.Series(dtype='str'),
-                           'latitude': pd.Series(dtype='float'),
-                           'longitude': pd.Series(dtype='float')
-                           })
-        df["date_YYYY-MM-DD"] = pd.Series(self.times).dt.strftime(r"%Y-%m-%d")
-        df["time_HH:MM:SS"] = pd.Series(self.times).dt.strftime(r"%H:%M:%S")
-        df["project_name"] = project_name
-        df["site_id"] = site_id
-        df["latitude"] = latitude
-        df["longitude"] = longitude
-        headers = [str(d) + "_m" for d in self.depths]
-        for i, h in enumerate(headers):
-            df[h] = data[:, i]
-        df.to_csv(filename, index=False)
-    def to_netcdf(self, file: str) -> None:
-        """  Write the data as a netcdf"""
-        try:
-            ncf = make_temperature_base(file, len(self.depths))
-        except NameError:
-            warnings.warn("Missing required packages. Try installing with `pip install tsp[nc]`", stacklevel=2)
-            return
-        with nc.Dataset(ncf, 'a') as ncd:
-            pytime = self.times.to_pydatetime()
-            ncd['depth_below_ground_surface'][:] = self.depths
-            ncd['time'][:] = nc.date2num(pytime, ncd['time'].units, ncd['time'].calendar)
-            ncd['ground_temperature'][:] = self.values
-            if self.latitude:
-                ncd['latitude'][:] = self.latitude
-            if self.longitude:
-                ncd['longitude'][:] = self.longitude
-            if self.site_id:
-                ncd['site_name'] = self.site_id
-            for key, value in self.metadata:
-                try:
-                    ncd.setncattr(key, value)
-                except Exception:
-                    warnings.warn(f"Could not set metadata item: {key}", stacklevel=2)
-    def to_json(self, file: str) -> None:
-        """ Write the data to a serialized json file """
-        with open(file, 'w') as f:
-            f.write(self._to_json())
-    def _to_json(self) -> str:
-        return self.wide.to_json()
-    def plot_profiles(self, P:int=100, n:int=10) -> Figure:
-        """ Create a plot of the temperature profiles at different times
-        Parameters
-        ----------
-        P : int
-            Percentage of time range to plot
-        n : int
-            Number of evenly-spaced profiles to plot
-        Returns
-        -------
-        Figure
-            matplotlib `Figure` object
-        """
-        fig = profile_evolution(depths=self.depths, times=self.times, values=self._values, P=P, n=n)
-        fig.show()
-        return fig
-    def plot_trumpet(self,
-                     year: Optional[int]=None,
-                     begin: Optional[datetime]=None,
-                     end: Optional[datetime]=None,
-                     min_completeness: Optional[float]=None,
-                     **kwargs) -> Figure:
-        """ Create a trumpet plot from the data
-        Parameters
-        ----------
-        year : int, optional
-            Which year to plot
-        begin : datetime, optional
-            If 'end' also provided, the earliest measurement to include in the averaging for the plot
-        end : datetime, optional
-            If 'begin' also provided, the latest measurement to include in the averaging for the plot
-        min_completeness : float, optional
-            If provided, the minimum completeness (fractional, 0 to 1) required to include
-            in temperature envelope, otherwise
-            the point is plotted as an unconnected, slightly transparent dot, by default None
-        **kwargs : dict, optional
-            Extra arguments to the plotting function: refer to the documentation for :func:`~tsp.plots.static.trumpet_curve` for a
-            list of all possible arguments.
-        Returns
-        -------
-        Figure
-            a matplotlib `Figure` object
-        """
-        df = self.long.dropna()
-        if year is not None:
-            df = df[df['time'].dt.year == year]
-        elif begin is not None or end is not None:
-            raise NotImplementedError
-        else:
-            raise ValueError("One of 'year', 'begin', 'end' must be provided.")
-        grouped = df.groupby('depth')
-        max_t = grouped.max().get('temperature_in_ground').values
-        min_t = grouped.min().get('temperature_in_ground').values
-        mean_t = grouped.mean().get('temperature_in_ground').values
-        depth = np.array([d for d in grouped.groups.keys()])
-        # Calculate completeness
-        c = self.yearly(None, None, None).completeness
-        if min_completeness is not None and c is not None:
-            C = c[c.index.year == year]
-            C = C[depth].iloc[0,:].values
-        else:
-            C = None
-        fig = trumpet_curve(depth=depth,
-                            t_max=max_t,
-                            t_min=min_t,
-                            t_mean=mean_t,
-                            min_completeness=min_completeness,
-                            data_completeness=C,
-                            **kwargs)
-        fig.show()
-        return fig
-    def plot_contour(self, **kwargs) -> Figure:
-        """ Create a contour plot
-        Parameters
-        ----------
-        **kwargs : dict, optional
-            Extra arguments to the plotting function: refer to the documentation for :func:`~tsp.plots.static.colour_contour` for a
-            list of all possible arguments.
-        Returns
-        -------
-        Figure
-            matplotlib `Figure` object
-        """
-        fig = colour_contour(depths=self.depths, times=self.times, values=self._values, **kwargs)
-        if self.output_utc_offset is not None:
-            label = format_utc_offset(self.output_utc_offset)
-            if label != "UTC":
-                label = f"UTC{label}"
-            fig.axes[0].set_xlabel(f"Time [{label}]")
-        fig.show()
-        return fig
-    def plot_timeseries(self, depths: list=[], **kwargs) -> Figure:
-        """Create a time series T(t) plot
-        Parameters
-        ----------
-        depths : list, optional
-            If non-empty, restricts the depths to include in the plot, by default []
-        **kwargs : dict, optional
-            Extra arguments to the plotting function: refer to the documentation for :func:`~tsp.plots.static.time_series` for a
-            list of all possible arguments.
-        Returns
-        -------
-        Figure
-            matplotlib `Figure` object
-        """
-        if depths == []:
-            depths = self.depths
-        d_mask = np.isin(self.depths, depths)
-        fig = time_series(self.depths[d_mask], self.times, self.values[:, d_mask], **kwargs)
-        if self.output_utc_offset is not None:
-            label = format_utc_offset(self.output_utc_offset)
-            if label != "UTC":
-                label = f"UTC{label}"
-            fig.axes[0].set_xlabel(f"Time [{label}]")
-        fig.autofmt_xdate()
-        fig.show()
-        return fig
-class AggregatedTSP(TSP):
-    """ A Time Series Profile that uses indices (1,2,3,...) instead of depth values.
-    Used in situations when depths are unknown (such as when reading datlogger exports
-    that don't have depth measurements.)
-    Parameters
-    ----------
-    times : list-like
-        t-length array of datetime objects
-    values : numpy.ndarray
-        array with shape (t,d) containing values at (t)emperatures and (d)epths
-    **kwargs : dict
-        Extra arguments to parent class: refer to :py:class:`tsp.core.TSP` documentation for a
-        list of all possible arguments.
-    """
-class IndexedTSP(TSP):
-    """ A Time Series Profile that uses indices (1,2,3,...) instead of depth values.
-    Used in situations when depths are unknown (such as when reading datlogger exports
-    that don't have depth measurements.)
-    Parameters
-    ----------
-    times : list-like
-        t-length array of datetime objects
-    values : numpy.ndarray
-        array with shape (t,d) containing values at (t)emperatures and (d)epths
-    **kwargs : dict
-        Extra arguments to parent class: refer to :py:class:`~tsp.core.TSP` documentation for a
-        list of all possible arguments.
-    """
-    def __init__(self, times, values, **kwargs):
-        depths = np.arange(0, values.shape[1]) + 1
-        super().__init__(times=times, depths=depths, values=values, **kwargs)
-    @property
-    def depths(self) -> np.ndarray:
-        """Depth indices
-        Returns
-        -------
-        numpy.ndarray
-            An array of depth indices
-        """
-        warnings.warn("This TSP uses indices (1,2,3,...) instad of depths. Use set_depths() to use measured depths.", stacklevel=2)
-        return self._depths
-    @depths.setter
-    def depths(self, value):
-        TSP.depths.__set__(self, value)
-    def set_depths(self, depths: np.ndarray):
-        """Assign depth values to depth indices. Change the object to a :py:class:`~tsp.core.TSP`
-        Parameters
-        ----------
-        depths : np.ndarray
-            An array or list of depth values equal in lenth to the depth indices
-        """
-        self.depths = depths
-        self.__class__ = TSP
-def span(S: pd.Series) -> float:
-    first = S.first_valid_index()  # type: pd.Timestamp
-    last = S.last_valid_index()  # type: pd.Timestamp
-    if first is None or last is None:
-        return 0
-    return (last - first).total_seconds()
-def min_span_mask(R: "pd.core.resample.DatetimeIndexResampler",
-             threshold: float) -> "pd.DataFrame":
-    s = R.apply(lambda x: span(x))
-    return s < threshold
-def gap(S: pd.Series) -> float:
-    d = np.diff(S.dropna().index)
-    if len(d) == 0:
-        return 0
-    elif len(d) == 1:
-        return 0
-    elif len(d) > 1:
-        gap = max(d).astype('timedelta64[s]').astype(float)
-    return gap
-def max_gap_mask(R: "pd.core.resample.DatetimeIndexResampler",
-            threshold: float) -> "pd.DataFrame":
-    g = R.apply(lambda x: gap(x))
-    return (g > threshold) | (g == 0)
-def _temporal_gap_mask(grouped: "pd.core.groupby.DataFrameGroupBy", max_gap: Optional[int], min_span: Optional[int]) -> np.ndarray:
-    """ Mask out observational groups in which there is more than a certain size temporal gap
-    Controls for gaps in the data within an aggregation group (using max_gap) and missing data at the beginning
-    or end of the aggregation group (using min_span).
-    Parameters
-    ----------
-    grouped : pandas.core.groupby.DataFrameGroupBy
-        groupby  with 'time' and 'depth' columns
-    max_gap : int
-        maximum gap in seconds to tolerate between observations in a group
-    min_span : int
-        minimum data range (beginning to end) in seconds.
-    Returns
-    -------
-    numpy.ndarray
-        boolean array with ``True`` where measurement spacing or range in group does not satisfy tolerances
-    """
-    if max_gap is not None:
-        max_diff = grouped.time.apply(np.diff).apply(lambda x: np.max(x, initial=np.timedelta64(0))).apply(lambda x: x.total_seconds())
-        max_diff = max_diff.unstack().to_numpy()
-        diff_mask = np.where((max_diff == 0) | (max_diff >= max_gap), True, False)
-    else:
-        diff_mask = np.zeros_like(grouped, dtype=bool)
-    if min_span is not None:
-        total_span = grouped.time.apply(np.ptp).apply(lambda x: x.total_seconds()).unstack().to_numpy()
-        span_mask = np.where(total_span < min_span, True, False)
-    else:
-        span_mask = np.zeros_like(grouped, dtype=bool)
-    mask = diff_mask * span_mask
-    return mask
-def _observation_count_mask(number_of_observations: np.ndarray, min_count:int) -> np.ndarray:
-    """ Create a mask array for an
-    Parameters
-    ----------
-    number_of_observations : numpy.ndarray
-        Array of how many data points are in aggregation
-    min_count : int
-        Minimum number of data points for aggregation to be 'valid'
-    Returns
-    -------
-    np.ndarray
-        a mask, True where data should be masked
-    """
-    valid = np.less(number_of_observations, min_count)  # type: np.ndarray
-    return valid
-def handle_incoming_times(times: "Union[np.ndarray, pd.DatetimeIndex, pd.Series, list]") -> "pd.DatetimeIndex":
-    """Convert a list of times to a pandas DatetimeIndex object"""
-    invalid_msg = "Times must be a list, numpy array, pandas DatetimeIndex, or pandas Series"
-    try:
-        if not len(times):
-            raise ValueError(invalid_msg)
-    except TypeError:
-        raise ValueError(invalid_msg)
-    if isinstance(times, pd.DatetimeIndex):
-        return times
-    if isinstance(times, pd.Series):
-        try:
-            times = pd.DatetimeIndex(times)
-        except Exception:
-            raise ValueError("Series must be convertible to DatetimeIndex")
-        times.name = 'time'
-        return times
-    elif isinstance(times, np.ndarray):
-        times = pd.to_datetime(times)
-        times.name = 'time'
-        return times
-    elif isinstance(times, list):
-        return pd.to_datetime(times)
-    else:
-        raise ValueError(invalid_msg)
+from __future__ import annotations
+import pandas as pd
+import re
+import inspect
+import numpy as np
+import functools
+import warnings
+try:
+    import netCDF4 as nc
+    try:
+        from pfit.pfnet_standard import make_temperature_base
+    except ModuleNotFoundError:
+        warnings.warn("Missing pfit library. Some functionality will be limited.", stacklevel=2)
+except ModuleNotFoundError:
+    warnings.warn("Missing netCDF4 library. Some functionality will be limited.", stacklevel=2)
+from typing import Union, Optional
+from numpy.typing import NDArray
+from datetime import datetime, tzinfo, timezone, timedelta
+import tsp
+import tsp.labels as lbl
+import tsp.tspwarnings as tw
+from tsp.physics import analytical_fourier
+from tsp.plots.static import trumpet_curve, colour_contour, time_series, profile_evolution
+from tsp.time import format_utc_offset
+from tsp.time import get_utc_offset
+from tsp.misc import completeness
+from tsp.concatenation import _tsp_concat
+from matplotlib.figure import Figure
+class TSP:
+    """ A Time Series Profile (a collection of time series data at different depths)
+    A TSP can also be:
+    Thermal State of Permafrost
+    Temperature du Sol en Profondeur
+    Temperatures, Secondes, Profondeurs
+    Parameters
+    ----------
+    times : pandas.DatetimeIndex
+        DatetimeIndex with optional UTC offset. List-like array of datetime objects can also be passed,
+        but will be converted to a DatetimeIndex with no UTC offset.
+    depths : list-like
+        d-length array of depths
+    values : numpy.ndarray
+        array with shape (t,d) containing values at (t)emperatures and (d)epths
+    longitude : float, optional
+        Longitude at which data were collected
+    latitude : float, optional
+        Latitude at which data were collected
+    site_id : str, optional
+        Name of location at which data were collected
+    metadata : dict
+        Additional metadata
+    Attributes
+    ----------
+    values
+    latitude : float
+        Latitude at which data were collected
+    longitude : float
+        Longitude at which data were collected
+    metadata : dict
+        Additional metadata provided at instantiation or by other methods
+    """
+    def __repr__(self) -> str:
+        return repr(self.wide)
+    def __str__(self) -> str:
+        return str(self.wide)
+    def __add__(self, other: TSP) -> TSP:
+        """ Concatenate two TSP objects along the time axis.
+        The two TSP objects must have the same depths and the same UTC offset.
+        Parameters
+        ----------
+        other : TSP
+            Another TSP object to concatenate with this one
+        Returns
+        -------
+        TSP
+            A new TSP object with the concatenated data
+        """
+        if not isinstance(other, TSP):
+            raise TypeError("Can only concatenate TSP objects.")
+        if self.utc_offset != other.utc_offset:
+            raise ValueError("UTC offsets must be the same to concatenate.")
+        return tsp_concat([self, other])
+    def __init__(self, times, depths, values,
+                 latitude: Optional[float]=None,
+                 longitude: Optional[float]=None,
+                 site_id: Optional[str]=None,
+                 metadata: dict={}):
+        self._times = handle_incoming_times(times)
+        if self._times.duplicated().any():
+            warnings.warn(tw.DuplicateTimesWarning(self._times), stacklevel=2)
+        if self.utc_offset:
+            self._output_utc_offset = self.utc_offset
+        else:
+            self._output_utc_offset = None
+        self._depths = np.atleast_1d(depths)
+        self._values = np.atleast_2d(values)
+        self.__number_of_observations = np.ones_like(values, dtype=int)
+        self.__number_of_observations[np.isnan(values)] = 0
+        self.metadata = metadata
+        self.latitude = latitude
+        self.longitude = longitude
+        self.site_id = site_id
+        self._freq = None
+        self._completeness = None
+        self._export_precision = 3
+    @property
+    def freq(self) -> Optional[int]:
+        """ Measurement frequency [s] """
+        return self._freq
+    @freq.setter
+    def freq(self, value: int):
+        if not isinstance(value, int):
+            raise TypeError("Must be string, e.g. '1D', '3600s'")
+        self._freq = value
+    @property
+    def completeness(self) -> Optional[pd.DataFrame]:
+        """ Data completeness """
+        return self._completeness
+    @completeness.setter
+    def completeness(self, value):
+        raise ValueError("You can't assign this variable.")
+    @classmethod
+    def from_tidy_format(cls, times, depths, values,
+                        number_of_observations=None,
+                        latitude: Optional[float]=None,
+                        longitude: Optional[float]=None,
+                        site_id: Optional[str]=None,
+                        metadata:dict={}):
+        """ Create a TSP from data in a 'tidy' or 'long' format
+        Parameters
+        ----------
+        times : list-like
+            n-length array of datetime objects
+        depths : list-like
+            n-length array of depths
+        values : numpy.ndarray
+            n-length array of (temperaure) values at associated time and depth
+        number_of_observations : numpy.ndarray, optional
+            n-length array of number of observations at associated time and
+            depth for aggregated values (default: 1)
+        longitude : float, optional
+            Longitude at which data were collected
+        latitude : float, optional
+            Latitude at which data were collected
+        site_id : str, optional
+            Name of location at which data were collected
+        metadata : dict
+            Additional metadata
+        """
+        times = np.atleast_1d(times)
+        depths = np.atleast_1d(depths)
+        values = np.atleast_1d(values)
+        number_of_observations = number_of_observations if number_of_observations else np.ones_like(values)
+        df = pd.DataFrame({"times": times, "depths": depths, "temperature_in_ground": values, "number_of_observations": number_of_observations})
+        df.set_index(["times", "depths"], inplace=True)
+        try:
+            unstacked = df.unstack()
+        except ValueError as e:
+            if np.any(df.index.duplicated()):
+                print(f"Duplicate data found at {df.iloc[np.where(df.index.duplicated())[0], :].index.get_level_values(0).unique()}")
+            raise e
+        temps = unstacked.get('temperature_in_ground')
+        this = cls(times=temps.index.values,
+                   depths=temps.columns.values,
+                   values=temps.values,
+                   latitude=latitude,
+                   longitude=longitude,
+                   site_id=site_id,
+                   metadata=metadata)
+        number_of_observations = unstacked.get('number_of_observations').values
+        number_of_observations[np.isnan(number_of_observations)] = 0
+        this.__number_of_observations = number_of_observations
+        return this
+    @classmethod
+    def __from_tsp(cls, t:TSP, **kwargs) -> "TSP":
+        """ Use an existing TSP object as a template, """
+        kw = {}
+        for arg in inspect.getfullargspec(TSP).args[1:]:
+            if kwargs.get(arg) is not None:
+                kw[arg] = kwargs.get(arg)
+            else:
+                kw[arg] = getattr(t, arg)
+        t = TSP(**kw)
+        return t
+    @classmethod
+    def from_json(cls, json_file) -> "TSP":
+        """ Read data from a json file
+        Parameters
+        ----------
+        json_file : str
+            Path to a json file from which to read
+        """
+        df = pd.read_json(json_file)
+        depth_pattern = r"^(-?[0-9\.]+)$"
+        times = pd.to_datetime(df['time']).values
+        depths = [re.search(depth_pattern, c).group(1) for c in df.columns if tsp._is_depth_column(c, depth_pattern)]
+        values = df.loc[:, depths].to_numpy()
+        t = cls(times=times, depths=depths, values=values)
+        return t
+    @classmethod
+    def synthetic(cls, depths: NDArray[np.number],
+                  start:str ="2000-01-01",
+                  end:str ="2003-01-01",
+                  freq: "str"="D",
+                  Q:float=0.2,
+                  c:float=1.6e6,
+                  k:float=2.5,
+                  A:float=6,
+                  MAGST:float=-0.5) -> "TSP":
+        """
+        Create a 'synthetic' temperature time series using the analytical solution to the heat conduction equation.
+        Suitable for testing
+        Parameters
+        ----------
+        depths : np.ndarray
+            array of depths in metres
+        start : str
+            start date for the time series, in the format "YYYY-MM-DD"
+        end : str
+            end date for the time series, in the format "YYYY-MM-DD"
+        freq : str
+            pandas frequency string, e.g. "D" for daily, "H" for hourly, etc.
+        Q : Optional[float], optional
+            Ground heat flux [W m-2], by default 0.2
+        c : Optional[float], optional
+            heat capacity [J m-3 K-1], by default 1.6e6
+        k : Optional[float], optional
+            thermal conductivity [W m-1 K-1], by default 2.5
+        A : Optional[float], optional
+            Amplitude of temperature fluctuation [C], by default 6
+        MAGST : Optional[float], optional
+            Mean annual ground surface temperature [C], by default -0.5
+        Returns
+        -------
+        TSP
+            A timeseries profile (TSP) object
+        """
+        times = pd.date_range(start=start, end=end, freq=freq).to_pydatetime()
+        t_sec = np.array([(t-times[0]).total_seconds() for t in times])
+        values = analytical_fourier(depths=depths,
+                                    times=t_sec,
+                                    Q=Q,
+                                    c=c,
+                                    k=k,
+                                    A=A,
+                                    MAGST=MAGST)
+        this = cls(depths=depths, times=times, values=values)
+        return this
+    @property
+    @functools.lru_cache()
+    def long(self) -> "pd.DataFrame":
+        """ Return the data in a 'long' or 'tidy' format (one row per observation, one column per variable)
+        Returns
+        -------
+        pandas.DataFrame
+            Time series profile data with columns:
+                - **time**: time
+                - **depth**: depth
+                - **temperature_in_ground**: temperature
+                - **number_of_observations**: If data are aggregated, how many observations are used in the aggregation
+        """
+        values = self.wide.melt(id_vars='time',
+                                var_name="depth",
+                                value_name="temperature_in_ground")
+        number_of_observations = self.number_of_observations.melt(id_vars='time',
+                                  var_name="depth",
+                                  value_name="number_of_observations")
+        values['number_of_observations'] = number_of_observations['number_of_observations']
+        return values
+    @property
+    @functools.lru_cache()
+    def wide(self) -> "pd.DataFrame":
+        """ Return the data in a 'wide' format (one column per depth)
+        Returns
+        -------
+        pandas.DataFrame
+            Time series profile data
+        """
+        tabular = pd.DataFrame(self._values)
+        tabular.columns = self._depths
+        tabular.index = self.times
+        tabular.insert(0, "time", self.times)
+        return tabular
+    @property
+    @functools.lru_cache()
+    def number_of_observations(self) -> "pd.DataFrame":
+        """ The number of observations for an average at a particular depth or time.
+        For pure observational data, the number of observations will always be '1'. When data are aggregated,
+        (e.g. using :py:meth:`~tsp.core.TSP.monthly` or :py:meth:`~tsp.core.TSP.daily`) these numbers
+        will be greater than 1.
+        Returns
+        -------
+        DataFrame
+            Number of observations
+        """
+        tabular = pd.DataFrame(self.__number_of_observations, dtype=int)
+        tabular.columns = self._depths
+        tabular.index = self._times
+        tabular.insert(0, "time", self._times)
+        return tabular
+    @number_of_observations.setter
+    def number_of_observations(self, value):
+        raise ValueError(f"You can't assign {value} to this variable (no assignment allowed).")
+    def reset_counts(self):
+        """ Set observation count to 1 if data exists, 0 otherwise """
+        self.__number_of_observations = (~self.wide.isna()).astype('boolean')
+    def set_utc_offset(self, offset:"Union[int,str]") -> None:
+        """ Set the time zone of the data by providing a UTC offset
+        Parameters
+        ----------
+        offset : int, str
+            If int, the number of seconds. If str, a string in the format "+HH:MM" or "-HH:MM"
+        """
+        if self.utc_offset is not None:
+            raise ValueError("You can only set the UTC offset once.")
+        utc_offset = get_utc_offset(offset)
+        tz = timezone(timedelta(seconds = utc_offset))
+        self._times = self._times.tz_localize(tz)
+        self._output_utc_offset = timezone(timedelta(seconds = utc_offset))
+        TSP.wide.fget.cache_clear()
+        TSP.long.fget.cache_clear()
+    @property
+    def utc_offset(self) -> "Optional[tzinfo]":
+        """ Get the time zone of the data by providing a UTC offset
+        Returns
+        -------
+        datetime.tzinfo
+            A timezone object
+        """
+        if self._times.tz is None:
+            return None
+        else:
+            return self._times.tz
+    @utc_offset.setter
+    def utc_offset(self, value):
+        self.set_utc_offset(value)
+    @property
+    def output_utc_offset(self) -> "Optional[tzinfo]":
+        """ Get the time zone in which to output or display the data by providing a UTC offset
+        Returns
+        -------
+        datetime.tzinfo
+            A timezone object
+        """
+        if self._output_utc_offset is None:
+            return None
+        else:
+            return self._output_utc_offset
+    @output_utc_offset.setter
+    def output_utc_offset(self, offset:"Union[int,str]") -> None:
+        self.set_output_utc_offset(offset)
+    def set_output_utc_offset(self, offset:"Union[int,str]") -> None:
+        """ Set the time zone in which to display the output or data by providing a UTC offset
+        Parameters
+        ----------
+        offset : int, str
+            If int, the number of seconds. If str, a string in the format "+HH:MM" or "-HH:MM"
+        """
+        utc_offset = get_utc_offset(offset)
+        tz = timezone(timedelta(seconds = utc_offset))
+        self._output_utc_offset = tz
+        TSP.wide.fget.cache_clear()
+        TSP.long.fget.cache_clear()
+    def reset_output_utc_offset(self) -> None:
+        """ Reset the time zone in which to output or display the data to the default (the one set by set_utc_offset)
+        """
+        if self.utc_offset is None:
+            raise ValueError("You can't reset the output time zone if the time zone of the data hasn't yet been set with set_utc_offset.")
+        else:
+            self._output_utc_offset = self.utc_offset
+    def __nly(self,
+              freq_fmt:str,
+              new_freq,
+              min_count:Optional[int],
+              max_gap:Optional[int],
+              min_span:Optional[int]) -> TSP:
+        """
+        Temporal aggregation by grouping according to a string-ified time
+        Parameters
+        ----------
+        freq_fmt : str
+            Python date format string  used to aggregate and recover time
+        Returns
+        -------
+        tuple[pd.DataFrame, pd.DataFrame]
+            A tuple of dataframes, the first containing the aggregated data, the second containing the number of observations
+        """
+        R = self.wide.drop("time", axis=1).resample(freq_fmt)
+        cumulative_obs = self.number_of_observations.drop("time", axis=1).resample(freq_fmt).sum()
+        total_obs = R.count()
+        values = R.mean()
+        # Calculate masks
+        mc_mask = Mg_mask = ms_mask = pd.DataFrame(index=values.index, columns=values.columns, data=False)
+        if min_count is not None:
+            mc_mask = (cumulative_obs < min_count)
+        if max_gap is not None:
+            Mg_mask = max_gap_mask(R, max_gap)
+        if min_span is not None:
+            ms_mask = min_span_mask(R, min_span)
+        mask = (mc_mask | Mg_mask | ms_mask)
+        values[mask] = np.nan
+        # Construct TSP
+        t = TSP.__from_tsp(self, times=values.index,
+                           depths=values.columns,
+                           values=values.values)
+        t.__number_of_observations = cumulative_obs
+        t.freq = new_freq
+        # Calculate data completeness
+        if self.freq is not None:
+            f1 = self.freq
+            f2 = new_freq
+            t._completeness = completeness(total_obs, f1, f2)
+        return t
+    def monthly(self,
+                min_count:Optional[int]=24,
+                max_gap:Optional[int]=3600*24*8,
+                min_span:Optional[int]=3600*24*21) -> "TSP":
+        """ Monthly averages, possibly with some months unavailable (NaN) if there is insufficient data
+        Parameters
+        ----------
+        min_count : int
+            Minimum number of observations in a month to be considered a valid average,
+            defaults to None
+        max_gap : int
+            Maximum gap (in seconds) between data points to be considered a valid average,
+             defaults to None
+        min_span : int
+            Minimum total data range (in seconds) to be consiered a valid average,
+            defaults to None
+        Returns
+        -------
+        TSP
+            A TSP object with data aggregated to monthly averages
+        """
+        t = self.__nly(freq_fmt="M",
+                       new_freq=lbl.MONTHLY,
+                       min_count=min_count,
+                       max_gap=max_gap,
+                       min_span=min_span)
+        return t
+    def daily(self,
+              min_count:Optional[int]=None,
+              max_gap:Optional[int]=None,
+              min_span:Optional[int]=None) -> "TSP":
+        """ Daily averages, possibly with some days unavailable (NaN) if there is insufficient data
+        Parameters
+        ----------
+        min_count : int
+            Minimum number of observations in a day to be considered a valid average,
+            defaults to None
+        max_gap : int
+            Maximum gap (in seconds) between data points to be considered a valid average, defaults to None
+        min_span : int
+            Minimum total data range (in seconds) to be consiered a valid average, defaults to None
+        Returns
+        -------
+        TSP
+            A TSP object with data aggregated to daily averages
+        """
+        # if the data is already daily +/- 1min , just return it
+        t = self.__nly(freq_fmt="D",
+                new_freq=lbl.DAILY,
+                min_count=min_count,
+                max_gap=max_gap,
+                min_span=min_span)
+        return t
+    def yearly(self,
+               min_count:Optional[int]=None,
+               max_gap:Optional[int]=None,
+               min_span:Optional[int]=None) -> "TSP":
+        """ Yearly averages, possibly with some years unavailable (NaN) if there is insufficient data
+        Parameters
+        ----------
+        min_count : int
+            Minimum number of observations in a month to be considered a valid average, defaults to None
+        max_gap : int
+            Maximum gap (in seconds) between data points to be considered a valid average, defaults to None
+        min_span : int
+            Minimum total data range (in seconds) to be consiered a valid average, defaults to None
+        Returns
+        -------
+        TSP
+            A TSP object with data aggregated to yearly averages
+        """
+        t = self.__nly(freq_fmt="Y",
+                new_freq=lbl.YEARLY,
+                min_count=min_count,
+                max_gap=max_gap,
+                min_span=min_span)
+        return t
+    @property
+    def depths(self) -> NDArray[np.number]:
+        """ Return the depth values in the profile
+        Returns
+        -------
+        numpy.ndarray
+            The depths in the profile
+        """
+        return self._depths
+    @depths.setter
+    def depths(self, value):
+        depths = np.atleast_1d(value)
+        if not len(depths) == len(self._depths):
+            raise ValueError(f"List of depths must have length of {len(self._depths)}.")
+        self._depths = depths
+        TSP.wide.fget.cache_clear()
+        TSP.long.fget.cache_clear()
+    @property
+    def times(self):
+        """ Return the timestamps in the time series
+        Returns
+        -------
+        pandas.DatetimeIndex
+            The timestamps in the time series
+        """
+        if self.utc_offset is None:
+            return self._times
+        elif self._output_utc_offset == self.utc_offset:
+            return self._times
+        else:
+            return self._times.tz_convert(self.output_utc_offset)
+    @property
+    def values(self):
+        return self._values
+    def to_gtnp(self, filename: str) -> None:
+        """ Write the data in GTN-P format
+        Parameters
+        ----------
+        filename : str
+            Path to the file to write to
+        """
+        df = self.wide.round(self._export_precision).rename(columns={'time': 'Date/Depth'})
+        df['Date/Depth'] = df['Date/Depth'].dt.strftime("%Y-%m-%d %H:%M:%S")
+        df.to_csv(filename, index=False, na_rep="-999")
+    def to_ntgs(self, filename:str, project_name:str="", site_id:"Optional[str]" = None, latitude:"Optional[float]"=None, longitude:"Optional[float]"=None) -> None:
+        """ Write the data in NTGS template format
+        Parameters
+        ----------
+        filename : str
+            Path to the file to write to
+        project_name : str, optional
+            The project name, by default ""
+        site_id : str, optional
+            The name of the site , by default None
+        latitude : float, optional
+            WGS84 latitude at which the observations were recorded, by default None
+        longitude : float, optional
+            WGS84 longitude at which the observations were recorded, by default None
+        """
+        if latitude is None:
+            latitude = self.latitude if self.latitude is not None else ""
+        if longitude is None:
+            longitude = self.longitude if self.longitude is not None else ""
+        if site_id is None:
+                site_id = self.site_id if self.site_id is not None else ""
+        data = self.values
+        df = pd.DataFrame({'project_name': pd.Series(dtype='str'),
+                           'site_id': pd.Series(dtype='str'),
+                           'latitude': pd.Series(dtype='float'),
+                           'longitude': pd.Series(dtype='float')
+                           })
+        df["date_YYYY-MM-DD"] = pd.Series(self.times).dt.strftime(r"%Y-%m-%d")
+        df["time_HH:MM:SS"] = pd.Series(self.times).dt.strftime(r"%H:%M:%S")
+        df["project_name"] = project_name
+        df["site_id"] = site_id
+        df["latitude"] = latitude
+        df["longitude"] = longitude
+        headers = [str(d) + "_m" for d in self.depths]
+        for i, h in enumerate(headers):
+            df[h] = data[:, i].round(self._export_precision)
+        df.to_csv(filename, index=False)
+    def to_netcdf(self, file: str) -> None:
+        """  Write the data as a netcdf"""
+        try:
+            ncf = make_temperature_base(file, len(self.depths))
+        except NameError:
+            warnings.warn("Missing required packages. Try installing with `pip install tsp[nc]`", stacklevel=2)
+            return
+        with nc.Dataset(ncf, 'a') as ncd:
+            pytime = self.times.to_pydatetime()
+            ncd['depth_below_ground_surface'][:] = self.depths
+            ncd['time'][:] = nc.date2num(pytime, ncd['time'].units, ncd['time'].calendar)
+            ncd['ground_temperature'][:] = self.values
+            if self.latitude:
+                ncd['latitude'][:] = self.latitude
+            if self.longitude:
+                ncd['longitude'][:] = self.longitude
+            if self.site_id:
+                ncd['site_name'] = self.site_id
+            for key, value in self.metadata:
+                try:
+                    ncd.setncattr(key, value)
+                except Exception:
+                    warnings.warn(f"Could not set metadata item: {key}", stacklevel=2)
+    def to_json(self, file: str) -> None:
+        """ Write the data to a serialized json file """
+        with open(file, 'w') as f:
+            f.write(self._to_json())
+    def _to_json(self) -> str:
+        return self.wide.round(self._export_precision).to_json()
+    def plot_profiles(self, P:int=100, n:int=10) -> Figure:
+        """ Create a plot of the temperature profiles at different times
+        Parameters
+        ----------
+        P : int
+            Percentage of time range to plot
+        n : int
+            Number of evenly-spaced profiles to plot
+        Returns
+        -------
+        Figure
+            matplotlib `Figure` object
+        """
+        fig = profile_evolution(depths=self.depths, times=self.times, values=self._values, P=P, n=n)
+        fig.show()
+        return fig
+    def plot_trumpet(self,
+                     year: Optional[int]=None,
+                     begin: Optional[datetime]=None,
+                     end: Optional[datetime]=None,
+                     min_completeness: Optional[float]=None,
+                     **kwargs) -> Figure:
+        """ Create a trumpet plot from the data
+        Parameters
+        ----------
+        year : int, optional
+            Which year to plot
+        begin : datetime, optional
+            If 'end' also provided, the earliest measurement to include in the averaging for the plot
+        end : datetime, optional
+            If 'begin' also provided, the latest measurement to include in the averaging for the plot
+        min_completeness : float, optional
+            If provided, the minimum completeness (fractional, 0 to 1) required to include
+            in temperature envelope, otherwise
+            the point is plotted as an unconnected, slightly transparent dot, by default None
+        **kwargs : dict, optional
+            Extra arguments to the plotting function: refer to the documentation for :func:`~tsp.plots.static.trumpet_curve` for a
+            list of all possible arguments.
+        Returns
+        -------
+        Figure
+            a matplotlib `Figure` object
+        """
+        df = self.long.dropna()
+        if year is not None:
+            df = df[df['time'].dt.year == year]
+        elif begin is not None or end is not None:
+            raise NotImplementedError
+        else:
+            raise ValueError("One of 'year', 'begin', 'end' must be provided.")
+        grouped = df.groupby('depth')
+        max_t = grouped.max().get('temperature_in_ground').values
+        min_t = grouped.min().get('temperature_in_ground').values
+        mean_t = grouped.mean().get('temperature_in_ground').values
+        depth = np.array([d for d in grouped.groups.keys()])
+        # Calculate completeness
+        c = self.yearly(None, None, None).completeness
+        if min_completeness is not None and c is not None:
+            C = c[c.index.year == year]
+            C = C[depth].iloc[0,:].values
+        else:
+            C = None
+        fig = trumpet_curve(depth=depth,
+                            t_max=max_t,
+                            t_min=min_t,
+                            t_mean=mean_t,
+                            min_completeness=min_completeness,
+                            data_completeness=C,
+                            **kwargs)
+        fig.show()
+        return fig
+    def plot_contour(self, **kwargs) -> Figure:
+        """ Create a contour plot
+        Parameters
+        ----------
+        **kwargs : dict, optional
+            Extra arguments to the plotting function: refer to the documentation for :func:`~tsp.plots.static.colour_contour` for a
+            list of all possible arguments.
+        Returns
+        -------
+        Figure
+            matplotlib `Figure` object
+        """
+        fig = colour_contour(depths=self.depths, times=self.times, values=self._values, **kwargs)
+        if self.output_utc_offset is not None:
+            label = format_utc_offset(self.output_utc_offset)
+            if label != "UTC":
+                label = f"UTC{label}"
+            fig.axes[0].set_xlabel(f"Time [{label}]")
+        fig.show()
+        return fig
+    def plot_timeseries(self, depths: list=[], **kwargs) -> Figure:
+        """Create a time series T(t) plot
+        Parameters
+        ----------
+        depths : list, optional
+            If non-empty, restricts the depths to include in the plot, by default []
+        **kwargs : dict, optional
+            Extra arguments to the plotting function: refer to the documentation for :func:`~tsp.plots.static.time_series` for a
+            list of all possible arguments.
+        Returns
+        -------
+        Figure
+            matplotlib `Figure` object
+        """
+        if depths == []:
+            depths = self.depths
+        d_mask = np.isin(self.depths, depths)
+        fig = time_series(self.depths[d_mask], self.times, self.values[:, d_mask], **kwargs)
+        if self.output_utc_offset is not None:
+            label = format_utc_offset(self.output_utc_offset)
+            if label != "UTC":
+                label = f"UTC{label}"
+            fig.axes[0].set_xlabel(f"Time [{label}]")
+        fig.autofmt_xdate()
+        fig.show()
+        return fig
+class AggregatedTSP(TSP):
+    """ A Time Series Profile that uses indices (1,2,3,...) instead of depth values.
+    Used in situations when depths are unknown (such as when reading datlogger exports
+    that don't have depth measurements.)
+    Parameters
+    ----------
+    times : list-like
+        t-length array of datetime objects
+    values : numpy.ndarray
+        array with shape (t,d) containing values at (t)emperatures and (d)epths
+    **kwargs : dict
+        Extra arguments to parent class: refer to :py:class:`tsp.core.TSP` documentation for a
+        list of all possible arguments.
+    """
+class IndexedTSP(TSP):
+    """ A Time Series Profile that uses indices (1,2,3,...) instead of depth values.
+    Used in situations when depths are unknown (such as when reading datlogger exports
+    that don't have depth measurements.)
+    Parameters
+    ----------
+    times : list-like
+        t-length array of datetime objects
+    values : numpy.ndarray
+        array with shape (t,d) containing values at (t)emperatures and (d)epths
+    **kwargs : dict
+        Extra arguments to parent class: refer to :py:class:`~tsp.core.TSP` documentation for a
+        list of all possible arguments.
+    """
+    def __init__(self, times, values, **kwargs):
+        depths = np.arange(0, values.shape[1]) + 1
+        super().__init__(times=times, depths=depths, values=values, **kwargs)
+    @property
+    def depths(self) -> np.ndarray:
+        """Depth indices
+        Returns
+        -------
+        numpy.ndarray
+            An array of depth indices
+        """
+        warnings.warn("This TSP uses indices (1,2,3,...) instad of depths. Use set_depths() to use measured depths.", stacklevel=2)
+        return self._depths
+    @depths.setter
+    def depths(self, value):
+        TSP.depths.__set__(self, value)
+    def set_depths(self, depths: np.ndarray):
+        """Assign depth values to depth indices. Change the object to a :py:class:`~tsp.core.TSP`
+        Parameters
+        ----------
+        depths : np.ndarray
+            An array or list of depth values equal in lenth to the depth indices
+        """
+        self.depths = depths
+        self.__class__ = TSP
+def span(S: pd.Series) -> float:
+    first = S.first_valid_index()  # type: pd.Timestamp
+    last = S.last_valid_index()  # type: pd.Timestamp
+    if first is None or last is None:
+        return 0
+    return (last - first).total_seconds()
+def min_span_mask(R: "pd.core.resample.DatetimeIndexResampler",
+             threshold: float) -> "pd.DataFrame":
+    s = R.apply(lambda x: span(x))
+    return s < threshold
+def gap(S: pd.Series) -> float:
+    d = np.diff(S.dropna().index)
+    if len(d) == 0:
+        return 0
+    elif len(d) == 1:
+        return 0
+    elif len(d) > 1:
+        gap = max(d).astype('timedelta64[s]').astype(float)
+    return gap
+def max_gap_mask(R: "pd.core.resample.DatetimeIndexResampler",
+            threshold: float) -> "pd.DataFrame":
+    g = R.apply(lambda x: gap(x))
+    return (g > threshold) | (g == 0)
+def _temporal_gap_mask(grouped: "pd.core.groupby.DataFrameGroupBy", max_gap: Optional[int], min_span: Optional[int]) -> np.ndarray:
+    """ Mask out observational groups in which there is more than a certain size temporal gap
+    Controls for gaps in the data within an aggregation group (using max_gap) and missing data at the beginning
+    or end of the aggregation group (using min_span).
+    Parameters
+    ----------
+    grouped : pandas.core.groupby.DataFrameGroupBy
+        groupby  with 'time' and 'depth' columns
+    max_gap : int
+        maximum gap in seconds to tolerate between observations in a group
+    min_span : int
+        minimum data range (beginning to end) in seconds.
+    Returns
+    -------
+    numpy.ndarray
+        boolean array with ``True`` where measurement spacing or range in group does not satisfy tolerances
+    """
+    if max_gap is not None:
+        max_diff = grouped.time.apply(np.diff).apply(lambda x: np.max(x, initial=np.timedelta64(0))).apply(lambda x: x.total_seconds())
+        max_diff = max_diff.unstack().to_numpy()
+        diff_mask = np.where((max_diff == 0) | (max_diff >= max_gap), True, False)
+    else:
+        diff_mask = np.zeros_like(grouped, dtype=bool)
+    if min_span is not None:
+        total_span = grouped.time.apply(np.ptp).apply(lambda x: x.total_seconds()).unstack().to_numpy()
+        span_mask = np.where(total_span < min_span, True, False)
+    else:
+        span_mask = np.zeros_like(grouped, dtype=bool)
+    mask = diff_mask * span_mask
+    return mask
+def _observation_count_mask(number_of_observations: np.ndarray, min_count:int) -> np.ndarray:
+    """ Create a mask array for an
+    Parameters
+    ----------
+    number_of_observations : numpy.ndarray
+        Array of how many data points are in aggregation
+    min_count : int
+        Minimum number of data points for aggregation to be 'valid'
+    Returns
+    -------
+    np.ndarray
+        a mask, True where data should be masked
+    """
+    valid = np.less(number_of_observations, min_count)  # type: np.ndarray
+    return valid
+def handle_incoming_times(times: "Union[np.ndarray, pd.DatetimeIndex, pd.Series, list]") -> "pd.DatetimeIndex":
+    """Convert a list of times to a pandas DatetimeIndex object"""
+    invalid_msg = "Times must be a list, numpy array, pandas DatetimeIndex, or pandas Series"
+    try:
+        if not len(times):
+            raise ValueError(invalid_msg)
+    except TypeError:
+        raise ValueError(invalid_msg)
+    if isinstance(times, pd.DatetimeIndex):
+        return times
+    if isinstance(times, pd.Series):
+        try:
+            times = pd.DatetimeIndex(times)
+        except Exception:
+            raise ValueError("Series must be convertible to DatetimeIndex")
+        times.name = 'time'
+        return times
+    elif isinstance(times, np.ndarray):
+        times = pd.to_datetime(times)
+        times.name = 'time'
+        return times
+    elif isinstance(times, list):
+        return pd.to_datetime(times)
+    else:
+        raise ValueError(invalid_msg)
+def tsp_concat(tsp_list, on_conflict='error', metadata='first') -> TSP:
+    """Combine multiple TSPs into a single TSP.
+    Parameters
+    ----------
+    tsp_list : list[TSP]
+        List of TSPs to combine. They must have the same depths
+    on_conflict : str, optional
+        Method to resolve duplicate times with different values. Chosen from "error", "keep", by default "error"
+        - "error": Raise an error if duplicate times with different values are found.
+        - "keep": Keep the first occurrence of the duplicate time.
+    metadata : str, optional
+        Method to select metadata from the TSPs. Chosen from "first", "identical", or "none", by default "first"
+        - "first": Use the metadata from the first TSP in the list.
+        - "identical": Only keep metadata records that are identical across TSPs.
+        - "none": Ignore metadata and set it to None.
+    Returns
+    -------
+    TSP
+        Combined TSP.
+    Description
+    -----------
+    This function combines multiple TSPs into a single TSP. The TSPs must have the same depths.
+    """
+    tsp_dict = _tsp_concat(tsp_list=tsp_list, on_conflict=on_conflict, metadata=metadata)
+    times = tsp_dict.pop('times')
+    depths = tsp_dict.pop('depths')
+    values = tsp_dict.pop('values')
+    t = TSP(times, depths, values, **tsp_dict)
+    return t

tsp 1.7.7__py3-none-any.whl → 1.8.0__py3-none-any.whl

Potentially problematic release.

tsp 1.7.7py3-none-any.whl → 1.8.0py3-none-any.whl