tsp 1.8.1__py3-none-any.whl → 1.10.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. tsp/__init__.py +11 -11
  2. tsp/__meta__.py +1 -1
  3. tsp/concatenation.py +159 -153
  4. tsp/core.py +1306 -1162
  5. tsp/data/2023-01-06_755-test-Dataset_2031-Constant_Over_Interval-Hourly-Ground_Temperature-Thermistor_Automated.timeserie.csv +4 -4
  6. tsp/data/2023-01-06_755-test.metadata.txt +208 -208
  7. tsp/data/NTGS_example_csv.csv +6 -6
  8. tsp/data/NTGS_example_slash_dates.csv +6 -6
  9. tsp/data/NTGS_gtr_example_excel.xlsx +0 -0
  10. tsp/data/example_geotop.csv +5240 -5240
  11. tsp/data/example_gtnp.csv +1298 -1298
  12. tsp/data/example_permos.csv +7 -7
  13. tsp/data/ntgs-db-multi.txt +3872 -0
  14. tsp/data/ntgs-db-single.txt +2251 -0
  15. tsp/data/test_geotop_has_space.txt +5 -5
  16. tsp/data/tsp_format_long.csv +10 -0
  17. tsp/data/tsp_format_wide_1.csv +7 -0
  18. tsp/data/tsp_format_wide_2.csv +7 -0
  19. tsp/dataloggers/AbstractReader.py +43 -43
  20. tsp/dataloggers/FG2.py +110 -110
  21. tsp/dataloggers/GP5W.py +114 -114
  22. tsp/dataloggers/Geoprecision.py +34 -34
  23. tsp/dataloggers/HOBO.py +930 -914
  24. tsp/dataloggers/RBRXL800.py +190 -190
  25. tsp/dataloggers/RBRXR420.py +371 -308
  26. tsp/dataloggers/Vemco.py +84 -0
  27. tsp/dataloggers/__init__.py +15 -15
  28. tsp/dataloggers/logr.py +196 -115
  29. tsp/dataloggers/test_files/004448.DAT +2543 -2543
  30. tsp/dataloggers/test_files/004531.DAT +17106 -17106
  31. tsp/dataloggers/test_files/004531.HEX +3587 -3587
  32. tsp/dataloggers/test_files/004534.HEX +3587 -3587
  33. tsp/dataloggers/test_files/010252.dat +1731 -1731
  34. tsp/dataloggers/test_files/010252.hex +1739 -1739
  35. tsp/dataloggers/test_files/010274.hex +1291 -1291
  36. tsp/dataloggers/test_files/010278.hex +3544 -3544
  37. tsp/dataloggers/test_files/012064.dat +1286 -1286
  38. tsp/dataloggers/test_files/012064.hex +1294 -1294
  39. tsp/dataloggers/test_files/012064_modified_start.hex +1294 -0
  40. tsp/dataloggers/test_files/012081.hex +3532 -3532
  41. tsp/dataloggers/test_files/013138_recovery_stamp.hex +1123 -0
  42. tsp/dataloggers/test_files/014037-2007.hex +95 -0
  43. tsp/dataloggers/test_files/019360_20160918_1146_SlumpIslandTopofHill.hex +11253 -0
  44. tsp/dataloggers/test_files/019360_20160918_1146_SlumpIslandTopofHill.xls +0 -0
  45. tsp/dataloggers/test_files/07B1592.DAT +1483 -1483
  46. tsp/dataloggers/test_files/07B1592.HEX +1806 -1806
  47. tsp/dataloggers/test_files/07B4450.DAT +2234 -2234
  48. tsp/dataloggers/test_files/07B4450.HEX +2559 -2559
  49. tsp/dataloggers/test_files/2022018_2025-09-18T22-16-16.txt +36 -0
  50. tsp/dataloggers/test_files/2022018_2025-09-18T22-16-16_raw.csv +2074 -0
  51. tsp/dataloggers/test_files/2022018_2025-09-18T22-16-16_temp.csv +2074 -0
  52. tsp/dataloggers/test_files/2025004_2025-12-02T17-07-28_cfg.txt +30 -0
  53. tsp/dataloggers/test_files/2025004_2025-12-02T17-07-28_raw.csv +35 -0
  54. tsp/dataloggers/test_files/2025004_2025-12-02T17-07-28_temp.csv +35 -0
  55. tsp/dataloggers/test_files/204087.xlsx +0 -0
  56. tsp/dataloggers/test_files/Asc-1455As02.000 +2982 -0
  57. tsp/dataloggers/test_files/Asc-1456As02.000 +2992 -0
  58. tsp/dataloggers/test_files/Asc-1457As02.000 +2917 -0
  59. tsp/dataloggers/test_files/BGC_BH15_019362_20140610_1253.hex +1729 -0
  60. tsp/dataloggers/test_files/Bin2944.csv +759 -0
  61. tsp/dataloggers/test_files/Bin5494.csv +2972 -0
  62. tsp/dataloggers/test_files/Bin6786.csv +272 -0
  63. tsp/dataloggers/test_files/FG2_399.csv +9881 -9881
  64. tsp/dataloggers/test_files/GP5W.csv +1121 -1121
  65. tsp/dataloggers/test_files/GP5W_260.csv +1884 -1884
  66. tsp/dataloggers/test_files/GP5W_270.csv +2210 -2210
  67. tsp/dataloggers/test_files/H08-030-08_HOBOware.csv +998 -998
  68. tsp/dataloggers/test_files/Minilog-II-T_350763_20190711_1.csv +2075 -0
  69. tsp/dataloggers/test_files/Minilog-II-T_350769_20190921_1.csv +6384 -0
  70. tsp/dataloggers/test_files/Minilog-II-T_354284_20190921_1.csv +4712 -0
  71. tsp/dataloggers/test_files/Minilog-T_7943_20140920_1.csv +5826 -0
  72. tsp/dataloggers/test_files/Minilog-T_8979_20140806_1.csv +2954 -0
  73. tsp/dataloggers/test_files/Minilog-T_975_20110824_1.csv +4343 -0
  74. tsp/dataloggers/test_files/RBR_01.dat +1046 -1046
  75. tsp/dataloggers/test_files/RBR_02.dat +2426 -2426
  76. tsp/dataloggers/test_files/RI03b_062831_20240905_1801.rsk +0 -0
  77. tsp/dataloggers/test_files/RI03b_062831_20240905_1801.xlsx +0 -0
  78. tsp/dataloggers/test_files/RSTDT2055.csv +2152 -2152
  79. tsp/dataloggers/test_files/U23-001_HOBOware.csv +1001 -1001
  80. tsp/dataloggers/test_files/hobo-negative-2.txt +6396 -6396
  81. tsp/dataloggers/test_files/hobo-negative-3.txt +5593 -5593
  82. tsp/dataloggers/test_files/hobo-positive-number-1.txt +1000 -1000
  83. tsp/dataloggers/test_files/hobo-positive-number-2.csv +1003 -1003
  84. tsp/dataloggers/test_files/hobo-positive-number-3.csv +1133 -1133
  85. tsp/dataloggers/test_files/hobo-positive-number-4.csv +1209 -1209
  86. tsp/dataloggers/test_files/hobo2.csv +8702 -8702
  87. tsp/dataloggers/test_files/hobo_1_AB.csv +21732 -21732
  88. tsp/dataloggers/test_files/hobo_1_AB_Details.txt +133 -133
  89. tsp/dataloggers/test_files/hobo_1_AB_classic.csv +4373 -4373
  90. tsp/dataloggers/test_files/hobo_1_AB_defaults.csv +21732 -21732
  91. tsp/dataloggers/test_files/hobo_1_AB_minimal.txt +1358 -1358
  92. tsp/dataloggers/test_files/hobo_1_AB_var2.csv +3189 -3189
  93. tsp/dataloggers/test_files/hobo_1_AB_var3.csv +2458 -2458
  94. tsp/dataloggers/test_files/logR_ULogC16-32_1.csv +106 -106
  95. tsp/dataloggers/test_files/logR_ULogC16-32_2.csv +100 -100
  96. tsp/dataloggers/test_files/mon_3_Ta_2010-08-18_2013-02-08.txt +21724 -21724
  97. tsp/dataloggers/test_files/rbr_001.dat +1133 -1133
  98. tsp/dataloggers/test_files/rbr_001.hex +1139 -1139
  99. tsp/dataloggers/test_files/rbr_001_no_comment.dat +1132 -1132
  100. tsp/dataloggers/test_files/rbr_001_no_comment.hex +1138 -1138
  101. tsp/dataloggers/test_files/rbr_002.dat +1179 -1179
  102. tsp/dataloggers/test_files/rbr_002.hex +1185 -1185
  103. tsp/dataloggers/test_files/rbr_003.hex +1292 -1292
  104. tsp/dataloggers/test_files/rbr_xl_001.DAT +1105 -1105
  105. tsp/dataloggers/test_files/rbr_xl_002.DAT +1126 -1126
  106. tsp/dataloggers/test_files/rbr_xl_003.DAT +4622 -4622
  107. tsp/dataloggers/test_files/rbr_xl_003.HEX +3587 -3587
  108. tsp/gtnp.py +148 -148
  109. tsp/labels.py +3 -3
  110. tsp/misc.py +90 -90
  111. tsp/physics.py +101 -101
  112. tsp/plots/static.py +388 -374
  113. tsp/readers.py +829 -548
  114. tsp/standardization/__init__.py +0 -0
  115. tsp/standardization/metadata.py +95 -0
  116. tsp/standardization/metadata_ref.py +0 -0
  117. tsp/standardization/validator.py +535 -0
  118. tsp/time.py +45 -45
  119. tsp/tspwarnings.py +27 -15
  120. tsp/utils.py +131 -101
  121. tsp/version.py +1 -1
  122. {tsp-1.8.1.dist-info → tsp-1.10.2.dist-info}/METADATA +95 -86
  123. tsp-1.10.2.dist-info/RECORD +132 -0
  124. {tsp-1.8.1.dist-info → tsp-1.10.2.dist-info}/licenses/LICENSE +674 -674
  125. {tsp-1.8.1.dist-info → tsp-1.10.2.dist-info}/top_level.txt +1 -0
  126. tsp-1.8.1.dist-info/RECORD +0 -94
  127. {tsp-1.8.1.dist-info → tsp-1.10.2.dist-info}/WHEEL +0 -0
tsp/core.py CHANGED
@@ -1,1162 +1,1306 @@
1
- from __future__ import annotations
2
-
3
- import pandas as pd
4
- import re
5
- import inspect
6
- import numpy as np
7
- import functools
8
- import warnings
9
-
10
- try:
11
- import netCDF4 as nc
12
-
13
- try:
14
- from pfit.pfnet_standard import make_temperature_base, calculate_extent_metadata as calc_ext_meta
15
- except ModuleNotFoundError:
16
- warnings.warn("Missing pfit library. Some functionality will be limited.", stacklevel=2)
17
-
18
- except ModuleNotFoundError:
19
- warnings.warn("Missing netCDF4 library. Some functionality will be limited.", stacklevel=2)
20
-
21
- from typing import Union, Optional
22
- from numpy.typing import NDArray
23
- from datetime import datetime, tzinfo, timezone, timedelta
24
-
25
- import tsp
26
- import tsp.labels as lbl
27
- import tsp.tspwarnings as tw
28
-
29
- from tsp.physics import analytical_fourier
30
- from tsp.plots.static import trumpet_curve, colour_contour, time_series, profile_evolution
31
- from tsp.time import format_utc_offset
32
- from tsp.time import get_utc_offset
33
- from tsp.misc import completeness
34
- from tsp.concatenation import _tsp_concat
35
-
36
- from matplotlib.figure import Figure
37
-
38
-
39
- class TSP:
40
- """ A Time Series Profile (a collection of time series data at different depths)
41
-
42
- A TSP can also be:
43
- Thermal State of Permafrost
44
- Temperature du Sol en Profondeur
45
- Temperatures, Secondes, Profondeurs
46
-
47
- Parameters
48
- ----------
49
- times : pandas.DatetimeIndex
50
- DatetimeIndex with optional UTC offset. List-like array of datetime objects can also be passed,
51
- but will be converted to a DatetimeIndex with no UTC offset.
52
- depths : list-like
53
- d-length array of depths
54
- values : numpy.ndarray
55
- array with shape (t,d) containing values at (t)emperatures and (d)epths
56
- longitude : float, optional
57
- Longitude at which data were collected
58
- latitude : float, optional
59
- Latitude at which data were collected
60
- site_id : str, optional
61
- Name of location at which data were collected
62
- metadata : dict
63
- Additional metadata
64
-
65
- Attributes
66
- ----------
67
- values
68
- latitude : float
69
- Latitude at which data were collected
70
- longitude : float
71
- Longitude at which data were collected
72
- metadata : dict
73
- Additional metadata provided at instantiation or by other methods
74
- """
75
-
76
- def __repr__(self) -> str:
77
- return repr(self.wide)
78
-
79
- def __str__(self) -> str:
80
- return str(self.wide)
81
-
82
- def __add__(self, other: TSP) -> TSP:
83
- """ Concatenate two TSP objects along the time axis.
84
- The two TSP objects must have the same depths and the same UTC offset.
85
-
86
- Parameters
87
- ----------
88
- other : TSP
89
- Another TSP object to concatenate with this one
90
-
91
- Returns
92
- -------
93
- TSP
94
- A new TSP object with the concatenated data
95
- """
96
- if not isinstance(other, TSP):
97
- raise TypeError("Can only concatenate TSP objects.")
98
-
99
- if self.utc_offset != other.utc_offset:
100
- raise ValueError("UTC offsets must be the same to concatenate.")
101
-
102
- return tsp_concat([self, other])
103
-
104
- def __init__(self, times, depths, values,
105
- latitude: Optional[float]=None,
106
- longitude: Optional[float]=None,
107
- site_id: Optional[str]=None,
108
- metadata: dict={}):
109
-
110
- self._times = handle_incoming_times(times)
111
- if self._times.duplicated().any():
112
- warnings.warn(tw.DuplicateTimesWarning(self._times), stacklevel=2)
113
-
114
- if self.utc_offset:
115
- self._output_utc_offset = self.utc_offset
116
- else:
117
- self._output_utc_offset = None
118
-
119
- self._depths = np.atleast_1d(depths)
120
- self._values = np.atleast_2d(values)
121
- self.__number_of_observations = np.ones_like(values, dtype=int)
122
- self.__number_of_observations[np.isnan(values)] = 0
123
- self.metadata = metadata
124
- self._latitude = latitude
125
- self._longitude = longitude
126
- self.site_id = site_id
127
- self._freq = None
128
- self._completeness = None
129
-
130
- self._export_precision = 3
131
-
132
- @property
133
- def latitude(self):
134
- """ Latitude at which data were collected """
135
- return self._latitude
136
-
137
- @latitude.setter
138
- def latitude(self, value: Optional[float]):
139
- if value is not None:
140
- try:
141
- self._latitude = float(value)
142
- except ValueError:
143
- raise ValueError("Latitude must be a float or None.")
144
- else:
145
- self._latitude = None
146
- self.metadata['_latitude'] = self._latitude
147
-
148
- @property
149
- def longitude(self):
150
- """ Longitude at which data were collected """
151
- return self._longitude
152
-
153
- @longitude.setter
154
- def longitude(self, value: Optional[float]):
155
- if value is not None:
156
- try:
157
- self._longitude = float(value)
158
- except ValueError:
159
- raise ValueError("Longitude must be a float or None.")
160
- else:
161
- self._longitude = None
162
- self.metadata['_longitude'] = self._longitude
163
-
164
- @property
165
- def freq(self) -> Optional[int]:
166
- """ Measurement frequency [s] """
167
- return self._freq
168
-
169
- @freq.setter
170
- def freq(self, value: int):
171
- if not isinstance(value, int):
172
- raise TypeError("Must be string, e.g. '1D', '3600s'")
173
- self._freq = value
174
-
175
- @property
176
- def completeness(self) -> Optional[pd.DataFrame]:
177
- """ Data completeness """
178
- return self._completeness
179
-
180
- @completeness.setter
181
- def completeness(self, value):
182
- raise ValueError("You can't assign this variable.")
183
-
184
- @classmethod
185
- def from_tidy_format(cls, times, depths, values,
186
- number_of_observations=None,
187
- latitude: Optional[float]=None,
188
- longitude: Optional[float]=None,
189
- site_id: Optional[str]=None,
190
- metadata:dict={}):
191
- """ Create a TSP from data in a 'tidy' or 'long' format
192
-
193
- Parameters
194
- ----------
195
- times : list-like
196
- n-length array of datetime objects
197
- depths : list-like
198
- n-length array of depths
199
- values : numpy.ndarray
200
- n-length array of (temperaure) values at associated time and depth
201
- number_of_observations : numpy.ndarray, optional
202
- n-length array of number of observations at associated time and
203
- depth for aggregated values (default: 1)
204
- longitude : float, optional
205
- Longitude at which data were collected
206
- latitude : float, optional
207
- Latitude at which data were collected
208
- site_id : str, optional
209
- Name of location at which data were collected
210
- metadata : dict
211
- Additional metadata
212
- """
213
- times = np.atleast_1d(times)
214
- depths = np.atleast_1d(depths)
215
- values = np.atleast_1d(values)
216
-
217
- number_of_observations = number_of_observations if number_of_observations else np.ones_like(values)
218
- df = pd.DataFrame({"times": times, "depths": depths, "temperature_in_ground": values, "number_of_observations": number_of_observations})
219
- df.set_index(["times", "depths"], inplace=True)
220
-
221
- try:
222
- unstacked = df.unstack()
223
- except ValueError as e:
224
- if np.any(df.index.duplicated()):
225
- print(f"Duplicate data found at {df.iloc[np.where(df.index.duplicated())[0], :].index.get_level_values(0).unique()}")
226
- raise e
227
-
228
- temps = unstacked.get('temperature_in_ground')
229
-
230
- this = cls(times=temps.index.values,
231
- depths=temps.columns.values,
232
- values=temps.values,
233
- latitude=latitude,
234
- longitude=longitude,
235
- site_id=site_id,
236
- metadata=metadata)
237
-
238
- number_of_observations = unstacked.get('number_of_observations').values
239
-
240
- number_of_observations[np.isnan(number_of_observations)] = 0
241
- this.__number_of_observations = number_of_observations
242
- return this
243
-
244
- @classmethod
245
- def __from_tsp(cls, t:TSP, **kwargs) -> "TSP":
246
- """ Use an existing TSP object as a template, """
247
- kw = {}
248
- for arg in inspect.getfullargspec(TSP).args[1:]:
249
- if kwargs.get(arg) is not None:
250
- kw[arg] = kwargs.get(arg)
251
- else:
252
- kw[arg] = getattr(t, arg)
253
-
254
- t = TSP(**kw)
255
-
256
- return t
257
-
258
- @classmethod
259
- def from_json(cls, json_file) -> "TSP":
260
- """ Read data from a json file
261
-
262
- Parameters
263
- ----------
264
- json_file : str
265
- Path to a json file from which to read
266
- """
267
- df = pd.read_json(json_file)
268
- depth_pattern = r"^(-?[0-9\.]+)$"
269
-
270
- times = pd.to_datetime(df['time']).values
271
- depths = [re.search(depth_pattern, c).group(1) for c in df.columns if tsp._is_depth_column(c, depth_pattern)]
272
- values = df.loc[:, depths].to_numpy()
273
-
274
- t = cls(times=times, depths=depths, values=values)
275
-
276
- return t
277
-
278
- @classmethod
279
- def synthetic(cls, depths: NDArray[np.number],
280
- start:str ="2000-01-01",
281
- end:str ="2003-01-01",
282
- freq: "str"="D",
283
- Q:float=0.2,
284
- c:float=1.6e6,
285
- k:float=2.5,
286
- A:float=6,
287
- MAGST:float=-0.5) -> "TSP":
288
- """
289
- Create a 'synthetic' temperature time series using the analytical solution to the heat conduction equation.
290
- Suitable for testing
291
-
292
- Parameters
293
- ----------
294
- depths : np.ndarray
295
- array of depths in metres
296
- start : str
297
- start date for the time series, in the format "YYYY-MM-DD"
298
- end : str
299
- end date for the time series, in the format "YYYY-MM-DD"
300
- freq : str
301
- pandas frequency string, e.g. "D" for daily, "H" for hourly, etc.
302
- Q : Optional[float], optional
303
- Ground heat flux [W m-2], by default 0.2
304
- c : Optional[float], optional
305
- heat capacity [J m-3 K-1], by default 1.6e6
306
- k : Optional[float], optional
307
- thermal conductivity [W m-1 K-1], by default 2.5
308
- A : Optional[float], optional
309
- Amplitude of temperature fluctuation [C], by default 6
310
- MAGST : Optional[float], optional
311
- Mean annual ground surface temperature [C], by default -0.5
312
-
313
- Returns
314
- -------
315
- TSP
316
- A timeseries profile (TSP) object
317
- """
318
- times = pd.date_range(start=start, end=end, freq=freq).to_pydatetime()
319
- t_sec = np.array([(t-times[0]).total_seconds() for t in times])
320
-
321
- values = analytical_fourier(depths=depths,
322
- times=t_sec,
323
- Q=Q,
324
- c=c,
325
- k=k,
326
- A=A,
327
- MAGST=MAGST)
328
-
329
- this = cls(depths=depths, times=times, values=values)
330
-
331
- return this
332
-
333
- @property
334
- @functools.lru_cache()
335
- def long(self) -> "pd.DataFrame":
336
- """ Return the data in a 'long' or 'tidy' format (one row per observation, one column per variable)
337
-
338
- Returns
339
- -------
340
- pandas.DataFrame
341
- Time series profile data with columns:
342
- - **time**: time
343
- - **depth**: depth
344
- - **temperature_in_ground**: temperature
345
- - **number_of_observations**: If data are aggregated, how many observations are used in the aggregation
346
- """
347
- values = self.wide.melt(id_vars='time',
348
- var_name="depth",
349
- value_name="temperature_in_ground")
350
-
351
- number_of_observations = self.number_of_observations.melt(id_vars='time',
352
- var_name="depth",
353
- value_name="number_of_observations")
354
-
355
- values['number_of_observations'] = number_of_observations['number_of_observations']
356
-
357
- return values
358
-
359
- @property
360
- @functools.lru_cache()
361
- def wide(self) -> "pd.DataFrame":
362
- """ Return the data in a 'wide' format (one column per depth)
363
-
364
- Returns
365
- -------
366
- pandas.DataFrame
367
- Time series profile data
368
- """
369
- tabular = pd.DataFrame(self._values)
370
- tabular.columns = self._depths
371
- tabular.index = self.times
372
- tabular.insert(0, "time", self.times)
373
-
374
- return tabular
375
-
376
- @property
377
- @functools.lru_cache()
378
- def number_of_observations(self) -> "pd.DataFrame":
379
- """ The number of observations for an average at a particular depth or time.
380
-
381
- For pure observational data, the number of observations will always be '1'. When data are aggregated,
382
- (e.g. using :py:meth:`~tsp.core.TSP.monthly` or :py:meth:`~tsp.core.TSP.daily`) these numbers
383
- will be greater than 1.
384
-
385
- Returns
386
- -------
387
- DataFrame
388
- Number of observations
389
- """
390
- tabular = pd.DataFrame(self.__number_of_observations, dtype=int)
391
- tabular.columns = self._depths
392
- tabular.index = self._times
393
- tabular.insert(0, "time", self._times)
394
-
395
- return tabular
396
-
397
- @number_of_observations.setter
398
- def number_of_observations(self, value):
399
- raise ValueError(f"You can't assign {value} to this variable (no assignment allowed).")
400
-
401
- def reset_counts(self):
402
- """ Set observation count to 1 if data exists, 0 otherwise """
403
- self.__number_of_observations = (~self.wide.isna()).astype('boolean')
404
-
405
- def set_utc_offset(self, offset:"Union[int,str]") -> None:
406
- """ Set the time zone of the data by providing a UTC offset
407
-
408
- Parameters
409
- ----------
410
- offset : int, str
411
- If int, the number of seconds. If str, a string in the format "+HH:MM" or "-HH:MM"
412
- """
413
- if self.utc_offset is not None:
414
- raise ValueError("You can only set the UTC offset once.")
415
-
416
- utc_offset = get_utc_offset(offset)
417
-
418
- tz = timezone(timedelta(seconds = utc_offset))
419
- self._times = self._times.tz_localize(tz)
420
- self._output_utc_offset = timezone(timedelta(seconds = utc_offset))
421
-
422
- TSP.wide.fget.cache_clear()
423
- TSP.long.fget.cache_clear()
424
-
425
- @property
426
- def utc_offset(self) -> "Optional[tzinfo]":
427
- """ Get the time zone of the data by providing a UTC offset
428
-
429
- Returns
430
- -------
431
- datetime.tzinfo
432
- A timezone object
433
- """
434
- if self._times.tz is None:
435
- return None
436
- else:
437
- return self._times.tz
438
-
439
- @utc_offset.setter
440
- def utc_offset(self, value):
441
- self.set_utc_offset(value)
442
-
443
- @property
444
- def output_utc_offset(self) -> "Optional[tzinfo]":
445
- """ Get the time zone in which to output or display the data by providing a UTC offset
446
-
447
- Returns
448
- -------
449
- datetime.tzinfo
450
- A timezone object
451
- """
452
- if self._output_utc_offset is None:
453
- return None
454
- else:
455
- return self._output_utc_offset
456
-
457
- @output_utc_offset.setter
458
- def output_utc_offset(self, offset:"Union[int,str]") -> None:
459
- self.set_output_utc_offset(offset)
460
-
461
- def set_output_utc_offset(self, offset:"Union[int,str]") -> None:
462
- """ Set the time zone in which to display the output or data by providing a UTC offset
463
- Parameters
464
- ----------
465
- offset : int, str
466
- If int, the number of seconds. If str, a string in the format "+HH:MM" or "-HH:MM"
467
- """
468
- utc_offset = get_utc_offset(offset)
469
- tz = timezone(timedelta(seconds = utc_offset))
470
- self._output_utc_offset = tz
471
-
472
- TSP.wide.fget.cache_clear()
473
- TSP.long.fget.cache_clear()
474
-
475
- def reset_output_utc_offset(self) -> None:
476
- """ Reset the time zone in which to output or display the data to the default (the one set by set_utc_offset)
477
-
478
- """
479
- if self.utc_offset is None:
480
- raise ValueError("You can't reset the output time zone if the time zone of the data hasn't yet been set with set_utc_offset.")
481
- else:
482
- self._output_utc_offset = self.utc_offset
483
-
484
- def __nly(self,
485
- freq_fmt:str,
486
- new_freq,
487
- min_count:Optional[int],
488
- max_gap:Optional[int],
489
- min_span:Optional[int]) -> TSP:
490
- """
491
- Temporal aggregation by grouping according to a string-ified time
492
-
493
- Parameters
494
- ----------
495
- freq_fmt : str
496
- Python date format string used to aggregate and recover time
497
-
498
- Returns
499
- -------
500
- tuple[pd.DataFrame, pd.DataFrame]
501
- A tuple of dataframes, the first containing the aggregated data, the second containing the number of observations
502
- """
503
- R = self.wide.drop("time", axis=1).resample(freq_fmt)
504
- cumulative_obs = self.number_of_observations.drop("time", axis=1).resample(freq_fmt).sum()
505
- total_obs = R.count()
506
- values = R.mean()
507
-
508
- # Calculate masks
509
- mc_mask = Mg_mask = ms_mask = pd.DataFrame(index=values.index, columns=values.columns, data=False)
510
-
511
- if min_count is not None:
512
- mc_mask = (cumulative_obs < min_count)
513
- if max_gap is not None:
514
- Mg_mask = max_gap_mask(R, max_gap)
515
- if min_span is not None:
516
- ms_mask = min_span_mask(R, min_span)
517
-
518
- mask = (mc_mask | Mg_mask | ms_mask)
519
- values[mask] = np.nan
520
-
521
- # Construct TSP
522
- t = TSP.__from_tsp(self, times=values.index,
523
- depths=values.columns,
524
- values=values.values)
525
- t.__number_of_observations = cumulative_obs
526
- t.freq = new_freq
527
-
528
- # Calculate data completeness
529
- if self.freq is not None:
530
- f1 = self.freq
531
- f2 = new_freq
532
- t._completeness = completeness(total_obs, f1, f2)
533
-
534
- return t
535
-
536
- def monthly(self,
537
- min_count:Optional[int]=24,
538
- max_gap:Optional[int]=3600*24*8,
539
- min_span:Optional[int]=3600*24*21) -> "TSP":
540
- """ Monthly averages, possibly with some months unavailable (NaN) if there is insufficient data
541
-
542
- Parameters
543
- ----------
544
- min_count : int
545
- Minimum number of observations in a month to be considered a valid average,
546
- defaults to None
547
- max_gap : int
548
- Maximum gap (in seconds) between data points to be considered a valid average,
549
- defaults to None
550
- min_span : int
551
- Minimum total data range (in seconds) to be consiered a valid average,
552
- defaults to None
553
-
554
- Returns
555
- -------
556
- TSP
557
- A TSP object with data aggregated to monthly averages
558
- """
559
- t = self.__nly(freq_fmt="M",
560
- new_freq=lbl.MONTHLY,
561
- min_count=min_count,
562
- max_gap=max_gap,
563
- min_span=min_span)
564
-
565
- return t
566
-
567
- def daily(self,
568
- min_count:Optional[int]=None,
569
- max_gap:Optional[int]=None,
570
- min_span:Optional[int]=None) -> "TSP":
571
- """ Daily averages, possibly with some days unavailable (NaN) if there is insufficient data
572
-
573
- Parameters
574
- ----------
575
- min_count : int
576
- Minimum number of observations in a day to be considered a valid average,
577
- defaults to None
578
- max_gap : int
579
- Maximum gap (in seconds) between data points to be considered a valid average, defaults to None
580
- min_span : int
581
- Minimum total data range (in seconds) to be consiered a valid average, defaults to None
582
-
583
- Returns
584
- -------
585
- TSP
586
- A TSP object with data aggregated to daily averages
587
- """
588
- # if the data is already daily +/- 1min , just return it
589
- t = self.__nly(freq_fmt="D",
590
- new_freq=lbl.DAILY,
591
- min_count=min_count,
592
- max_gap=max_gap,
593
- min_span=min_span)
594
-
595
- return t
596
-
597
- def yearly(self,
598
- min_count:Optional[int]=None,
599
- max_gap:Optional[int]=None,
600
- min_span:Optional[int]=None) -> "TSP":
601
- """ Yearly averages, possibly with some years unavailable (NaN) if there is insufficient data
602
-
603
- Parameters
604
- ----------
605
- min_count : int
606
- Minimum number of observations in a month to be considered a valid average, defaults to None
607
- max_gap : int
608
- Maximum gap (in seconds) between data points to be considered a valid average, defaults to None
609
- min_span : int
610
- Minimum total data range (in seconds) to be consiered a valid average, defaults to None
611
-
612
- Returns
613
- -------
614
- TSP
615
- A TSP object with data aggregated to yearly averages
616
- """
617
- t = self.__nly(freq_fmt="Y",
618
- new_freq=lbl.YEARLY,
619
- min_count=min_count,
620
- max_gap=max_gap,
621
- min_span=min_span)
622
-
623
- return t
624
-
625
- @property
626
- def depths(self) -> NDArray[np.number]:
627
- """ Return the depth values in the profile
628
-
629
- Returns
630
- -------
631
- numpy.ndarray
632
- The depths in the profile
633
- """
634
- return self._depths
635
-
636
- @depths.setter
637
- def depths(self, value):
638
- depths = np.atleast_1d(value)
639
-
640
- if not len(depths) == len(self._depths):
641
- raise ValueError(f"List of depths must have length of {len(self._depths)}.")
642
-
643
- self._depths = depths
644
-
645
- TSP.wide.fget.cache_clear()
646
- TSP.long.fget.cache_clear()
647
-
648
- @property
649
- def times(self):
650
- """ Return the timestamps in the time series
651
-
652
- Returns
653
- -------
654
- pandas.DatetimeIndex
655
- The timestamps in the time series
656
- """
657
- if self.utc_offset is None:
658
- return self._times
659
-
660
- elif self._output_utc_offset == self.utc_offset:
661
- return self._times
662
-
663
- else:
664
- return self._times.tz_convert(self.output_utc_offset)
665
-
666
- @property
667
- def values(self):
668
- return self._values
669
-
670
- def to_gtnp(self, filename: str) -> None:
671
- """ Write the data in GTN-P format
672
-
673
- Parameters
674
- ----------
675
- filename : str
676
- Path to the file to write to
677
- """
678
- df = self.wide.round(self._export_precision).rename(columns={'time': 'Date/Depth'})
679
- df['Date/Depth'] = df['Date/Depth'].dt.strftime("%Y-%m-%d %H:%M:%S")
680
-
681
- df.to_csv(filename, index=False, na_rep="-999")
682
-
683
- def to_ntgs(self, filename:str, project_name:Optional[str]="", site_id:"Optional[str]" = None, latitude:"Optional[float]"=None, longitude:"Optional[float]"=None) -> None:
684
- """ Write the data in NTGS template format
685
-
686
- Parameters
687
- ----------
688
- filename : str
689
- Path to the file to write to
690
- project_name : str, optional
691
- The project name, by default ""
692
- site_id : str, optional
693
- The name of the site , by default None
694
- latitude : float, optional
695
- WGS84 latitude at which the observations were recorded, by default None
696
- longitude : float, optional
697
- WGS84 longitude at which the observations were recorded, by default None
698
- """
699
- if latitude is None:
700
- latitude = self.latitude if self.latitude is not None else ""
701
-
702
- if longitude is None:
703
- longitude = self.longitude if self.longitude is not None else ""
704
-
705
- if site_id is None:
706
- site_id = self.site_id if self.site_id is not None else ""
707
-
708
- if project_name is None:
709
- project_name = self.metadata.get("project_name", "")
710
-
711
- data = self.values
712
-
713
- df = pd.DataFrame({'project_name': pd.Series(dtype='str'),
714
- 'site_id': pd.Series(dtype='str'),
715
- 'latitude': pd.Series(dtype='float'),
716
- 'longitude': pd.Series(dtype='float')
717
- })
718
-
719
- df["date_YYYY-MM-DD"] = pd.Series(self.times).dt.strftime(r"%Y-%m-%d")
720
- df["time_HH:MM:SS"] = pd.Series(self.times).dt.strftime(r"%H:%M:%S")
721
-
722
- df["project_name"] = project_name
723
- df["site_id"] = site_id
724
- df["latitude"] = latitude
725
- df["longitude"] = longitude
726
-
727
- headers = [str(d) + "_m" for d in self.depths]
728
-
729
- for i, h in enumerate(headers):
730
- df[h] = data[:, i].round(self._export_precision)
731
-
732
- df.to_csv(filename, index=False)
733
-
734
- def to_netcdf(self, file: str, only_use_cf_metadata=True, calculate_extent_metadata=True) -> None:
735
- """ Write the data as a netcdf"""
736
- try:
737
- ncf = make_temperature_base(file, ndepth=len(self.depths), ntime=len(self.times), strings_as_strings=True)
738
- except NameError:
739
- warnings.warn("Missing required packages. Try installing with `pip install tsp[nc]`", stacklevel=2)
740
- return
741
-
742
- with nc.Dataset(ncf, 'a') as ncd:
743
- pytime = self.times.to_pydatetime()
744
-
745
- ncd['depth_below_ground_surface'][:] = self.depths
746
-
747
-
748
- ncd['time'][:] = nc.date2num(pytime, ncd['time'].units, ncd['time'].calendar)
749
- ncd['ground_temperature'][:] = self.values
750
-
751
- if self.latitude:
752
- ncd['latitude'][:] = self.latitude
753
- if self.longitude:
754
- ncd['longitude'][:] = self.longitude
755
- if self.site_id:
756
- if ncd['site_name'].dtype == str:
757
- ncd['site_name'][0] = self.site_id
758
- else:
759
- strlen = ncd['site_name'].shape[0]
760
- ncd['site_name'][:] = nc.stringtochar(np.array([self.site_id], f"S{strlen}"))
761
-
762
- if "_elevation" in self.metadata:
763
- ncd['surface_elevation'][:] = self.metadata.get("_elevation")
764
-
765
- if only_use_cf_metadata:
766
- metadata = self.metadata.get('CF', {})
767
- else:
768
- metadata = self.metadata
769
-
770
- for key, value in metadata.items():
771
- try:
772
- if isinstance(value, str):
773
- ncd.setncattr_string(key, value)
774
- else:
775
- ncd.setncattr(key, value)
776
- except Exception:
777
- warnings.warn(f"Could not set metadata item: {key} : {value}", stacklevel=2)
778
-
779
- if calculate_extent_metadata:
780
- calc_ext_meta(ncd)
781
-
782
- def to_json(self, file: str) -> None:
783
- """ Write the data to a serialized json file """
784
- with open(file, 'w') as f:
785
- f.write(self._to_json())
786
-
787
- def _to_json(self) -> str:
788
- return self.wide.round(self._export_precision).to_json()
789
-
790
- def plot_profiles(self, P:int=100, n:int=10) -> Figure:
791
- """ Create a plot of the temperature profiles at different times
792
-
793
- Parameters
794
- ----------
795
- P : int
796
- Percentage of time range to plot
797
- n : int
798
- Number of evenly-spaced profiles to plot
799
-
800
- Returns
801
- -------
802
- Figure
803
- matplotlib `Figure` object
804
- """
805
- fig = profile_evolution(depths=self.depths, times=self.times, values=self._values, P=P, n=n)
806
- fig.show()
807
- return fig
808
-
809
- def plot_trumpet(self,
810
- year: Optional[int]=None,
811
- begin: Optional[datetime]=None,
812
- end: Optional[datetime]=None,
813
- min_completeness: Optional[float]=None,
814
- **kwargs) -> Figure:
815
- """ Create a trumpet plot from the data
816
-
817
- Parameters
818
- ----------
819
- year : int, optional
820
- Which year to plot
821
- begin : datetime, optional
822
- If 'end' also provided, the earliest measurement to include in the averaging for the plot
823
- end : datetime, optional
824
- If 'begin' also provided, the latest measurement to include in the averaging for the plot
825
- min_completeness : float, optional
826
- If provided, the minimum completeness (fractional, 0 to 1) required to include
827
- in temperature envelope, otherwise
828
- the point is plotted as an unconnected, slightly transparent dot, by default None
829
- **kwargs : dict, optional
830
- Extra arguments to the plotting function: refer to the documentation for :func:`~tsp.plots.static.trumpet_curve` for a
831
- list of all possible arguments.
832
-
833
- Returns
834
- -------
835
- Figure
836
- a matplotlib `Figure` object
837
- """
838
- df = self.long.dropna()
839
-
840
- if year is not None:
841
- df = df[df['time'].dt.year == year]
842
-
843
- elif begin is not None or end is not None:
844
- raise NotImplementedError
845
-
846
- else:
847
- raise ValueError("One of 'year', 'begin', 'end' must be provided.")
848
-
849
- grouped = df.groupby('depth')
850
-
851
- max_t = grouped.max().get('temperature_in_ground').values
852
- min_t = grouped.min().get('temperature_in_ground').values
853
- mean_t = grouped.mean().get('temperature_in_ground').values
854
- depth = np.array([d for d in grouped.groups.keys()])
855
-
856
- # Calculate completeness
857
- c = self.yearly(None, None, None).completeness
858
-
859
- if min_completeness is not None and c is not None:
860
- C = c[c.index.year == year]
861
- C = C[depth].iloc[0,:].values
862
-
863
- else:
864
- C = None
865
-
866
- fig = trumpet_curve(depth=depth,
867
- t_max=max_t,
868
- t_min=min_t,
869
- t_mean=mean_t,
870
- min_completeness=min_completeness,
871
- data_completeness=C,
872
- **kwargs)
873
- fig.show()
874
-
875
- return fig
876
-
877
- def plot_contour(self, **kwargs) -> Figure:
878
- """ Create a contour plot
879
-
880
- Parameters
881
- ----------
882
- **kwargs : dict, optional
883
- Extra arguments to the plotting function: refer to the documentation for :func:`~tsp.plots.static.colour_contour` for a
884
- list of all possible arguments.
885
-
886
- Returns
887
- -------
888
- Figure
889
- matplotlib `Figure` object
890
- """
891
- fig = colour_contour(depths=self.depths, times=self.times, values=self._values, **kwargs)
892
-
893
- if self.output_utc_offset is not None:
894
- label = format_utc_offset(self.output_utc_offset)
895
- if label != "UTC":
896
- label = f"UTC{label}"
897
- fig.axes[0].set_xlabel(f"Time [{label}]")
898
-
899
- fig.show()
900
-
901
- return fig
902
-
903
- def plot_timeseries(self, depths: list=[], **kwargs) -> Figure:
904
- """Create a time series T(t) plot
905
-
906
- Parameters
907
- ----------
908
- depths : list, optional
909
- If non-empty, restricts the depths to include in the plot, by default []
910
- **kwargs : dict, optional
911
- Extra arguments to the plotting function: refer to the documentation for :func:`~tsp.plots.static.time_series` for a
912
- list of all possible arguments.
913
-
914
- Returns
915
- -------
916
- Figure
917
- matplotlib `Figure` object
918
- """
919
- if depths == []:
920
- depths = self.depths
921
-
922
- d_mask = np.isin(self.depths, depths)
923
-
924
- fig = time_series(self.depths[d_mask], self.times, self.values[:, d_mask], **kwargs)
925
-
926
-
927
- if self.output_utc_offset is not None:
928
- label = format_utc_offset(self.output_utc_offset)
929
- if label != "UTC":
930
- label = f"UTC{label}"
931
- fig.axes[0].set_xlabel(f"Time [{label}]")
932
- fig.autofmt_xdate()
933
- fig.show()
934
-
935
- return fig
936
-
937
-
938
- class AggregatedTSP(TSP):
939
- """ A Time Series Profile that uses indices (1,2,3,...) instead of depth values.
940
-
941
- Used in situations when depths are unknown (such as when reading datlogger exports
942
- that don't have depth measurements.)
943
-
944
- Parameters
945
- ----------
946
- times : list-like
947
- t-length array of datetime objects
948
- values : numpy.ndarray
949
- array with shape (t,d) containing values at (t)emperatures and (d)epths
950
- **kwargs : dict
951
- Extra arguments to parent class: refer to :py:class:`tsp.core.TSP` documentation for a
952
- list of all possible arguments.
953
- """
954
-
955
-
956
- class IndexedTSP(TSP):
957
- """ A Time Series Profile that uses indices (1,2,3,...) instead of depth values.
958
-
959
- Used in situations when depths are unknown (such as when reading datlogger exports
960
- that don't have depth measurements.)
961
-
962
- Parameters
963
- ----------
964
- times : list-like
965
- t-length array of datetime objects
966
- values : numpy.ndarray
967
- array with shape (t,d) containing values at (t)emperatures and (d)epths
968
- **kwargs : dict
969
- Extra arguments to parent class: refer to :py:class:`~tsp.core.TSP` documentation for a
970
- list of all possible arguments.
971
- """
972
-
973
- def __init__(self, times, values, **kwargs):
974
- depths = np.arange(0, values.shape[1]) + 1
975
- super().__init__(times=times, depths=depths, values=values, **kwargs)
976
-
977
- @property
978
- def depths(self) -> np.ndarray:
979
- """Depth indices
980
-
981
- Returns
982
- -------
983
- numpy.ndarray
984
- An array of depth indices
985
- """
986
- warnings.warn("This TSP uses indices (1,2,3,...) instad of depths. Use set_depths() to use measured depths.", stacklevel=2)
987
- return self._depths
988
-
989
- @depths.setter
990
- def depths(self, value):
991
- TSP.depths.__set__(self, value)
992
-
993
- def set_depths(self, depths: np.ndarray):
994
- """Assign depth values to depth indices. Change the object to a :py:class:`~tsp.core.TSP`
995
-
996
- Parameters
997
- ----------
998
- depths : np.ndarray
999
- An array or list of depth values equal in lenth to the depth indices
1000
- """
1001
- self.depths = depths
1002
- self.__class__ = TSP
1003
-
1004
-
1005
-
1006
- def span(S: pd.Series) -> float:
1007
- first = S.first_valid_index() # type: pd.Timestamp
1008
- last = S.last_valid_index() # type: pd.Timestamp
1009
- if first is None or last is None:
1010
- return 0
1011
-
1012
- return (last - first).total_seconds()
1013
-
1014
- def min_span_mask(R: "pd.core.resample.DatetimeIndexResampler",
1015
- threshold: float) -> "pd.DataFrame":
1016
- s = R.apply(lambda x: span(x))
1017
- return s < threshold
1018
-
1019
-
1020
- def gap(S: pd.Series) -> float:
1021
-
1022
- d = np.diff(S.dropna().index)
1023
- if len(d) == 0:
1024
- return 0
1025
- elif len(d) == 1:
1026
- return 0
1027
- elif len(d) > 1:
1028
- gap = max(d).astype('timedelta64[s]').astype(float)
1029
- return gap
1030
-
1031
-
1032
- def max_gap_mask(R: "pd.core.resample.DatetimeIndexResampler",
1033
- threshold: float) -> "pd.DataFrame":
1034
- g = R.apply(lambda x: gap(x))
1035
- return (g > threshold) | (g == 0)
1036
-
1037
-
1038
-
1039
-
1040
- def _temporal_gap_mask(grouped: "pd.core.groupby.DataFrameGroupBy", max_gap: Optional[int], min_span: Optional[int]) -> np.ndarray:
1041
- """ Mask out observational groups in which there is more than a certain size temporal gap
1042
-
1043
- Controls for gaps in the data within an aggregation group (using max_gap) and missing data at the beginning
1044
- or end of the aggregation group (using min_span).
1045
-
1046
- Parameters
1047
- ----------
1048
- grouped : pandas.core.groupby.DataFrameGroupBy
1049
- groupby with 'time' and 'depth' columns
1050
- max_gap : int
1051
- maximum gap in seconds to tolerate between observations in a group
1052
- min_span : int
1053
- minimum data range (beginning to end) in seconds.
1054
-
1055
- Returns
1056
- -------
1057
- numpy.ndarray
1058
- boolean array with ``True`` where measurement spacing or range in group does not satisfy tolerances
1059
- """
1060
- if max_gap is not None:
1061
- max_diff = grouped.time.apply(np.diff).apply(lambda x: np.max(x, initial=np.timedelta64(0))).apply(lambda x: x.total_seconds())
1062
- max_diff = max_diff.unstack().to_numpy()
1063
- diff_mask = np.where((max_diff == 0) | (max_diff >= max_gap), True, False)
1064
- else:
1065
- diff_mask = np.zeros_like(grouped, dtype=bool)
1066
-
1067
- if min_span is not None:
1068
- total_span = grouped.time.apply(np.ptp).apply(lambda x: x.total_seconds()).unstack().to_numpy()
1069
- span_mask = np.where(total_span < min_span, True, False)
1070
- else:
1071
- span_mask = np.zeros_like(grouped, dtype=bool)
1072
-
1073
- mask = diff_mask * span_mask
1074
-
1075
- return mask
1076
-
1077
-
1078
- def _observation_count_mask(number_of_observations: np.ndarray, min_count:int) -> np.ndarray:
1079
- """ Create a mask array for an
1080
-
1081
- Parameters
1082
- ----------
1083
- number_of_observations : numpy.ndarray
1084
- Array of how many data points are in aggregation
1085
- min_count : int
1086
- Minimum number of data points for aggregation to be 'valid'
1087
-
1088
- Returns
1089
- -------
1090
- np.ndarray
1091
- a mask, True where data should be masked
1092
- """
1093
- valid = np.less(number_of_observations, min_count) # type: np.ndarray
1094
- return valid
1095
-
1096
-
1097
- def handle_incoming_times(times: "Union[np.ndarray, pd.DatetimeIndex, pd.Series, list]") -> "pd.DatetimeIndex":
1098
- """Convert a list of times to a pandas DatetimeIndex object"""
1099
- invalid_msg = "Times must be a list, numpy array, pandas DatetimeIndex, or pandas Series"
1100
-
1101
- try:
1102
- if not len(times):
1103
- raise ValueError(invalid_msg)
1104
- except TypeError:
1105
- raise ValueError(invalid_msg)
1106
-
1107
- if isinstance(times, pd.DatetimeIndex):
1108
- return times
1109
-
1110
- if isinstance(times, pd.Series):
1111
- try:
1112
- times = pd.DatetimeIndex(times)
1113
- except Exception:
1114
- raise ValueError("Series must be convertible to DatetimeIndex")
1115
- times.name = 'time'
1116
-
1117
- return times
1118
-
1119
- elif isinstance(times, np.ndarray):
1120
- times = pd.to_datetime(times)
1121
- times.name = 'time'
1122
- return times
1123
-
1124
- elif isinstance(times, list):
1125
- return pd.to_datetime(times)
1126
-
1127
- else:
1128
- raise ValueError(invalid_msg)
1129
-
1130
- def tsp_concat(tsp_list, on_conflict='error', metadata='first') -> TSP:
1131
- """Combine multiple TSPs into a single TSP.
1132
-
1133
- Parameters
1134
- ----------
1135
- tsp_list : list[TSP]
1136
- List of TSPs to combine. They must have the same depths
1137
- on_conflict : str, optional
1138
- Method to resolve duplicate times with different values. Chosen from "error", "keep", by default "error"
1139
- - "error": Raise an error if duplicate times with different values are found.
1140
- - "keep": Keep the first occurrence of the duplicate time.
1141
- metadata : str, optional
1142
- Method to select metadata from the TSPs. Chosen from "first", "identical", or "none", by default "first"
1143
- - "first": Use the metadata from the first TSP in the list.
1144
- - "identical": Only keep metadata records that are identical across TSPs.
1145
- - "none": Ignore metadata and set it to None.
1146
- Returns
1147
- -------
1148
- TSP
1149
- Combined TSP.
1150
-
1151
- Description
1152
- -----------
1153
- This function combines multiple TSPs into a single TSP. The TSPs must have the same depths.
1154
- """
1155
- tsp_dict = _tsp_concat(tsp_list=tsp_list, on_conflict=on_conflict, metadata=metadata)
1156
- times = tsp_dict.pop('times')
1157
- depths = tsp_dict.pop('depths')
1158
- values = tsp_dict.pop('values')
1159
-
1160
- t = TSP(times, depths, values, **tsp_dict)
1161
-
1162
- return t
1
+ from __future__ import annotations
2
+
3
+ import pandas as pd
4
+ import re
5
+ import inspect
6
+ import numpy as np
7
+ import functools
8
+ import warnings
9
+ import unicodedata
10
+
11
+ try:
12
+ import netCDF4 as nc
13
+
14
+ try:
15
+ from pfit.pfnet_standard import make_temperature_base, calculate_extent_metadata as calc_ext_meta
16
+ except ModuleNotFoundError:
17
+ warnings.warn("Missing pfit library. Some functionality will be limited.", stacklevel=2)
18
+
19
+ except ModuleNotFoundError:
20
+ warnings.warn("Missing netCDF4 library. Some functionality will be limited.", stacklevel=2)
21
+
22
+ from typing import Union, Optional
23
+ from numpy.typing import NDArray
24
+ from datetime import datetime, tzinfo, timezone, timedelta
25
+
26
+ import tsp
27
+ import tsp.labels as lbl
28
+ import tsp.tspwarnings as tw
29
+
30
+ from tsp.physics import analytical_fourier
31
+ from tsp.plots.static import trumpet_curve, colour_contour, time_series, profile_evolution, _plot_overlay
32
+ from tsp.time import format_utc_offset
33
+ from tsp.time import get_utc_offset
34
+ from tsp.misc import completeness
35
+ from tsp.standardization import metadata as mdf
36
+ from tsp.concatenation import _tsp_concat
37
+
38
+ from matplotlib.figure import Figure
39
+
40
+
41
+ class TSP:
42
+ """ A Time Series Profile (a collection of time series data at different depths)
43
+
44
+ A TSP can also be:
45
+ Thermal State of Permafrost
46
+ Temperature du Sol en Profondeur
47
+ Temperatures, Secondes, Profondeurs
48
+
49
+ Parameters
50
+ ----------
51
+ times : pandas.DatetimeIndex
52
+ DatetimeIndex with optional UTC offset. List-like array of datetime objects can also be passed,
53
+ but will be converted to a DatetimeIndex with no UTC offset.
54
+ depths : list-like
55
+ d-length array of depths
56
+ values : numpy.ndarray
57
+ array with shape (t,d) containing values at (t)emperatures and (d)epths
58
+ longitude : float, optional
59
+ Longitude at which data were collected
60
+ latitude : float, optional
61
+ Latitude at which data were collected
62
+ site_id : str, optional
63
+ Name of location at which data were collected
64
+ metadata : dict
65
+ Additional metadata
66
+
67
+ Attributes
68
+ ----------
69
+ values
70
+ latitude : float
71
+ Latitude at which data were collected
72
+ longitude : float
73
+ Longitude at which data were collected
74
+ metadata : dict
75
+ Additional metadata provided at instantiation or by other methods
76
+ """
77
+
78
+ def __repr__(self) -> str:
79
+ return repr(self.wide)
80
+
81
+ def __str__(self) -> str:
82
+ return str(self.wide)
83
+
84
+ def __add__(self, other: TSP) -> TSP:
85
+ """ Concatenate two TSP objects along the time axis.
86
+ The two TSP objects must have the same depths and the same UTC offset.
87
+
88
+ Parameters
89
+ ----------
90
+ other : TSP
91
+ Another TSP object to concatenate with this one
92
+
93
+ Returns
94
+ -------
95
+ TSP
96
+ A new TSP object with the concatenated data
97
+ """
98
+ if not isinstance(other, TSP):
99
+ raise TypeError("Can only concatenate TSP objects.")
100
+
101
+ if self.utc_offset != other.utc_offset:
102
+ raise ValueError("UTC offsets must be the same to concatenate.")
103
+
104
+ return tsp_concat([self, other])
105
+
106
+ def __init__(self, times, depths, values,
107
+ latitude: Optional[float]=None,
108
+ longitude: Optional[float]=None,
109
+ site_id: Optional[str]=None,
110
+ metadata: dict={}):
111
+
112
+ self._times = handle_incoming_times(times)
113
+ if self._times.duplicated().any():
114
+ warnings.warn(tw.DuplicateTimesWarning(self._times), stacklevel=2)
115
+
116
+ if self.utc_offset:
117
+ self._output_utc_offset = self.utc_offset
118
+ else:
119
+ self._output_utc_offset = None
120
+
121
+ self._depths = np.atleast_1d(depths)
122
+ self._values = np.atleast_2d(values)
123
+ self._times, self._values = self.__enforce_increasing_times(self._times, self._values)
124
+ self.__number_of_observations = np.ones_like(values, dtype=int)
125
+ self.__number_of_observations[np.isnan(values)] = 0
126
+ self.metadata = metadata
127
+ self.latitude = latitude
128
+ self.longitude = longitude
129
+ self.site_id = site_id
130
+ self._freq = None
131
+ self._completeness = None
132
+
133
+ self._export_precision = 3
134
+
135
+ @property
136
+ def site_id(self):
137
+ return self._site_id
138
+
139
+ @site_id.setter
140
+ def site_id(self, value):
141
+ if value is not None:
142
+ if is_valid_site_name_unicode(value):
143
+ self._site_id = str(value)
144
+ self.metadata['_site_id'] = self._site_id
145
+ else:
146
+ raise ValueError("site_id is not valid.")
147
+ else:
148
+ self._site_id = None
149
+
150
+
151
+ @property
152
+ def latitude(self):
153
+ """ Latitude at which data were collected """
154
+ return self._latitude
155
+
156
+ @latitude.setter
157
+ def latitude(self, value: Optional[float]):
158
+ if value is not None:
159
+ try:
160
+ self._latitude = float(value)
161
+ self.metadata['_latitude'] = self._latitude
162
+ except ValueError:
163
+ raise ValueError("Latitude must be a float or None.")
164
+ else:
165
+ self._latitude = None
166
+
167
+
168
+ @property
169
+ def longitude(self):
170
+ """ Longitude at which data were collected """
171
+ return self._longitude
172
+
173
+ @longitude.setter
174
+ def longitude(self, value: Optional[float]):
175
+ if value is not None:
176
+ try:
177
+ self._longitude = float(value)
178
+ self.metadata['_longitude'] = self._longitude
179
+ except ValueError:
180
+ raise ValueError("Longitude must be a float or None.")
181
+ else:
182
+ self._longitude = None
183
+
184
+
185
+ @property
186
+ def freq(self) -> Optional[int]:
187
+ """ Measurement frequency [s] """
188
+ return self._freq
189
+
190
+ @freq.setter
191
+ def freq(self, value: int):
192
+ if not isinstance(value, int):
193
+ raise TypeError("Must be string, e.g. '1D', '3600s'")
194
+ self._freq = value
195
+
196
+ def __enforce_increasing_times(self, times, values):
197
+ """ Ensure times are strictly increasing, reordering if necessary """
198
+ diffs = times.diff()
199
+ non_increasing = np.where(diffs <= np.timedelta64(0, 'ns'))[0]
200
+ if len(non_increasing) > 0:
201
+ warnings.warn(tw.NonIncreasingTimesWarning(times), stacklevel=2)
202
+ warnings.warn(UserWarning("Attempting to reorder times."), stacklevel=2)
203
+ order = np.argsort(times)
204
+ times = times[order]
205
+ values = values[order, :]
206
+ return times, values
207
+
208
+ @property
209
+ def completeness(self) -> Optional[pd.DataFrame]:
210
+ """ Data completeness """
211
+ return self._completeness
212
+
213
+ @completeness.setter
214
+ def completeness(self, value):
215
+ raise ValueError("You can't assign this variable.")
216
+
217
+ @classmethod
218
+ def from_tidy_format(cls, times, depths, values,
219
+ number_of_observations=None,
220
+ latitude: Optional[float]=None,
221
+ longitude: Optional[float]=None,
222
+ site_id: Optional[str]=None,
223
+ metadata:dict={}):
224
+ """ Create a TSP from data in a 'tidy' or 'long' format
225
+
226
+ Parameters
227
+ ----------
228
+ times : list-like
229
+ n-length array of datetime objects
230
+ depths : list-like
231
+ n-length array of depths
232
+ values : numpy.ndarray
233
+ n-length array of (temperaure) values at associated time and depth
234
+ number_of_observations : numpy.ndarray, optional
235
+ n-length array of number of observations at associated time and
236
+ depth for aggregated values (default: 1)
237
+ longitude : float, optional
238
+ Longitude at which data were collected
239
+ latitude : float, optional
240
+ Latitude at which data were collected
241
+ site_id : str, optional
242
+ Name of location at which data were collected
243
+ metadata : dict
244
+ Additional metadata
245
+ """
246
+ times = np.atleast_1d(times)
247
+ depths = np.atleast_1d(depths)
248
+ values = np.atleast_1d(values)
249
+
250
+ number_of_observations = number_of_observations if number_of_observations else np.ones_like(values)
251
+ df = pd.DataFrame({"times": times, "depths": depths, "temperature_in_ground": values, "number_of_observations": number_of_observations})
252
+ df.set_index(["times", "depths"], inplace=True)
253
+
254
+ try:
255
+ unstacked = df.unstack()
256
+ except ValueError as e:
257
+ if np.any(df.index.duplicated()):
258
+ print(f"Duplicate data found at {df.iloc[np.where(df.index.duplicated())[0], :].index.get_level_values(0).unique()}")
259
+ raise e
260
+
261
+ temps = unstacked.get('temperature_in_ground')
262
+
263
+ this = cls(times=temps.index.values,
264
+ depths=temps.columns.values,
265
+ values=temps.values,
266
+ latitude=latitude,
267
+ longitude=longitude,
268
+ site_id=site_id,
269
+ metadata=metadata)
270
+
271
+ number_of_observations = unstacked.get('number_of_observations').values
272
+
273
+ number_of_observations[np.isnan(number_of_observations)] = 0
274
+ this.__number_of_observations = number_of_observations
275
+ return this
276
+
277
+ @classmethod
278
+ def __from_tsp(cls, t:TSP, **kwargs) -> "TSP":
279
+ """ Use an existing TSP object as a template, """
280
+ kw = {}
281
+ for arg in inspect.getfullargspec(TSP).args[1:]:
282
+ if kwargs.get(arg) is not None:
283
+ kw[arg] = kwargs.get(arg)
284
+ else:
285
+ kw[arg] = getattr(t, arg)
286
+
287
+ t = TSP(**kw)
288
+
289
+ return t
290
+
291
+ @classmethod
292
+ def from_json(cls, json_file) -> "TSP":
293
+ """ Read data from a json file
294
+
295
+ Parameters
296
+ ----------
297
+ json_file : str
298
+ Path to a json file from which to read
299
+ """
300
+ df = pd.read_json(json_file)
301
+ depth_pattern = r"^(-?[0-9\.]+)$"
302
+
303
+ times = pd.to_datetime(df['time']).values
304
+ depths = [re.search(depth_pattern, c).group(1) for c in df.columns if tsp._is_depth_column(c, depth_pattern)]
305
+ values = df.loc[:, depths].to_numpy()
306
+
307
+ t = cls(times=times, depths=depths, values=values)
308
+
309
+ return t
310
+
311
+ @classmethod
312
+ def synthetic(cls, depths: NDArray[np.number],
313
+ start:str ="2000-01-01",
314
+ end:str ="2003-01-01",
315
+ freq: "str"="D",
316
+ Q:float=0.2,
317
+ c:float=1.6e6,
318
+ k:float=2.5,
319
+ A:float=6,
320
+ MAGST:float=-0.5,
321
+ **kwargs) -> "TSP":
322
+ """
323
+ Create a 'synthetic' temperature time series using the analytical solution to the heat conduction equation.
324
+ Suitable for testing
325
+
326
+ Parameters
327
+ ----------
328
+ depths : np.ndarray
329
+ array of depths in metres
330
+ start : str
331
+ start date for the time series, in the format "YYYY-MM-DD"
332
+ end : str
333
+ end date for the time series, in the format "YYYY-MM-DD"
334
+ freq : str
335
+ pandas frequency string, e.g. "D" for daily, "H" for hourly, etc.
336
+ Q : Optional[float], optional
337
+ Ground heat flux [W m-2], by default 0.2
338
+ c : Optional[float], optional
339
+ heat capacity [J m-3 K-1], by default 1.6e6
340
+ k : Optional[float], optional
341
+ thermal conductivity [W m-1 K-1], by default 2.5
342
+ A : Optional[float], optional
343
+ Amplitude of temperature fluctuation [C], by default 6
344
+ MAGST : Optional[float], optional
345
+ Mean annual ground surface temperature [C], by default -0.5
346
+
347
+ Returns
348
+ -------
349
+ TSP
350
+ A timeseries profile (TSP) object
351
+ """
352
+ times = pd.date_range(start=start, end=end, freq=freq).to_pydatetime()
353
+ t_sec = np.array([(t-times[0]).total_seconds() for t in times])
354
+
355
+ values = analytical_fourier(depths=depths,
356
+ times=t_sec,
357
+ Q=Q,
358
+ c=c,
359
+ k=k,
360
+ A=A,
361
+ MAGST=MAGST)
362
+
363
+ this = cls(depths=depths, times=times, values=values, **kwargs)
364
+
365
+ return this
366
+
367
+ @property
368
+ @functools.lru_cache()
369
+ def long(self) -> "pd.DataFrame":
370
+ """ Return the data in a 'long' or 'tidy' format (one row per observation, one column per variable)
371
+
372
+ Returns
373
+ -------
374
+ pandas.DataFrame
375
+ Time series profile data with columns:
376
+ - **time**: time
377
+ - **depth**: depth
378
+ - **temperature_in_ground**: temperature
379
+ - **number_of_observations**: If data are aggregated, how many observations are used in the aggregation
380
+ """
381
+ values = self.wide.melt(id_vars='time',
382
+ var_name="depth",
383
+ value_name="temperature_in_ground")
384
+
385
+ number_of_observations = self.number_of_observations.melt(id_vars='time',
386
+ var_name="depth",
387
+ value_name="number_of_observations")
388
+
389
+ values['number_of_observations'] = number_of_observations['number_of_observations']
390
+
391
+ return values
392
+
393
+ @property
394
+ @functools.lru_cache()
395
+ def wide(self) -> "pd.DataFrame":
396
+ """ Return the data in a 'wide' format (one column per depth)
397
+
398
+ Returns
399
+ -------
400
+ pandas.DataFrame
401
+ Time series profile data
402
+ """
403
+ tabular = pd.DataFrame(self._values)
404
+ tabular.columns = self._depths
405
+ tabular.index = self.times
406
+ tabular.insert(0, "time", self.times)
407
+
408
+ return tabular
409
+
410
+ @property
411
+ @functools.lru_cache()
412
+ def number_of_observations(self) -> "pd.DataFrame":
413
+ """ The number of observations for an average at a particular depth or time.
414
+
415
+ For pure observational data, the number of observations will always be '1'. When data are aggregated,
416
+ (e.g. using :py:meth:`~tsp.core.TSP.monthly` or :py:meth:`~tsp.core.TSP.daily`) these numbers
417
+ will be greater than 1.
418
+
419
+ Returns
420
+ -------
421
+ DataFrame
422
+ Number of observations
423
+ """
424
+ tabular = pd.DataFrame(self.__number_of_observations, dtype=int)
425
+ tabular.columns = self._depths
426
+ tabular.index = self._times
427
+ tabular.insert(0, "time", self._times)
428
+
429
+ return tabular
430
+
431
+ @number_of_observations.setter
432
+ def number_of_observations(self, value):
433
+ raise ValueError(f"You can't assign {value} to this variable (no assignment allowed).")
434
+
435
+ def reset_counts(self):
436
+ """ Set observation count to 1 if data exists, 0 otherwise """
437
+ self.__number_of_observations = (~self.wide.isna()).astype('boolean')
438
+
439
+ def set_utc_offset(self, offset:"Union[int,str]") -> None:
440
+ """ Set the time zone of the data by providing a UTC offset
441
+
442
+ Parameters
443
+ ----------
444
+ offset : int, str
445
+ If int, the number of seconds. If str, a string in the format "+HH:MM" or "-HH:MM"
446
+ """
447
+ if self.utc_offset is not None:
448
+ raise ValueError("You can only set the UTC offset once.")
449
+
450
+ utc_offset = get_utc_offset(offset)
451
+
452
+ tz = timezone(timedelta(seconds = utc_offset))
453
+ self._times = self._times.tz_localize(tz)
454
+ self._output_utc_offset = timezone(timedelta(seconds = utc_offset))
455
+
456
+ TSP.wide.fget.cache_clear()
457
+ TSP.long.fget.cache_clear()
458
+
459
+ @property
460
+ def utc_offset(self) -> "Optional[tzinfo]":
461
+ """ Get the time zone of the data by providing a UTC offset
462
+
463
+ Returns
464
+ -------
465
+ datetime.tzinfo
466
+ A timezone object
467
+ """
468
+ if self._times.tz is None:
469
+ return None
470
+ else:
471
+ return self._times.tz
472
+
473
+ @utc_offset.setter
474
+ def utc_offset(self, value):
475
+ self.set_utc_offset(value)
476
+
477
+ @property
478
+ def output_utc_offset(self) -> "Optional[tzinfo]":
479
+ """ Get the time zone in which to output or display the data by providing a UTC offset
480
+
481
+ Returns
482
+ -------
483
+ datetime.tzinfo
484
+ A timezone object
485
+ """
486
+ if self._output_utc_offset is None:
487
+ return None
488
+ else:
489
+ return self._output_utc_offset
490
+
491
+ @output_utc_offset.setter
492
+ def output_utc_offset(self, offset:"Union[int,str]") -> None:
493
+ self.set_output_utc_offset(offset)
494
+
495
+ def set_output_utc_offset(self, offset:"Union[int,str]") -> None:
496
+ """ Set the time zone in which to display the output or data by providing a UTC offset
497
+ Parameters
498
+ ----------
499
+ offset : int, str
500
+ If int, the number of seconds. If str, a string in the format "+HH:MM" or "-HH:MM"
501
+ """
502
+ utc_offset = get_utc_offset(offset)
503
+ tz = timezone(timedelta(seconds = utc_offset))
504
+ self._output_utc_offset = tz
505
+
506
+ TSP.wide.fget.cache_clear()
507
+ TSP.long.fget.cache_clear()
508
+
509
+ def reset_output_utc_offset(self) -> None:
510
+ """ Reset the time zone in which to output or display the data to the default (the one set by set_utc_offset)
511
+
512
+ """
513
+ if self.utc_offset is None:
514
+ raise ValueError("You can't reset the output time zone if the time zone of the data hasn't yet been set with set_utc_offset.")
515
+ else:
516
+ self._output_utc_offset = self.utc_offset
517
+
518
+ def __nly(self,
519
+ freq_fmt:str,
520
+ new_freq,
521
+ min_count:Optional[int],
522
+ max_gap:Optional[int],
523
+ min_span:Optional[int]) -> TSP:
524
+ """
525
+ Temporal aggregation by grouping according to a string-ified time
526
+
527
+ Parameters
528
+ ----------
529
+ freq_fmt : str
530
+ Python date format string used to aggregate and recover time
531
+
532
+ Returns
533
+ -------
534
+ tuple[pd.DataFrame, pd.DataFrame]
535
+ A tuple of dataframes, the first containing the aggregated data, the second containing the number of observations
536
+ """
537
+ R = self.wide.drop("time", axis=1).resample(freq_fmt)
538
+ cumulative_obs = self.number_of_observations.drop("time", axis=1).resample(freq_fmt).sum()
539
+ total_obs = R.count()
540
+ values = R.mean()
541
+
542
+ # Calculate masks
543
+ mc_mask = Mg_mask = ms_mask = pd.DataFrame(index=values.index, columns=values.columns, data=False)
544
+
545
+ if min_count is not None:
546
+ mc_mask = (cumulative_obs < min_count)
547
+ if max_gap is not None:
548
+ Mg_mask = max_gap_mask(R, max_gap)
549
+ if min_span is not None:
550
+ ms_mask = min_span_mask(R, min_span)
551
+
552
+ mask = (mc_mask | Mg_mask | ms_mask)
553
+ values[mask] = np.nan
554
+
555
+ # Construct TSP
556
+ t = TSP.__from_tsp(self, times=values.index,
557
+ depths=values.columns,
558
+ values=values.values)
559
+ t.__number_of_observations = cumulative_obs
560
+ t.freq = new_freq
561
+
562
+ # Calculate data completeness
563
+ if self.freq is not None:
564
+ f1 = self.freq
565
+ f2 = new_freq
566
+ t._completeness = completeness(total_obs, f1, f2)
567
+
568
+ return t
569
+
570
+ def monthly(self,
571
+ min_count:Optional[int]=24,
572
+ max_gap:Optional[int]=3600*24*8,
573
+ min_span:Optional[int]=3600*24*21) -> "TSP":
574
+ """ Monthly averages, possibly with some months unavailable (NaN) if there is insufficient data
575
+
576
+ Parameters
577
+ ----------
578
+ min_count : int
579
+ Minimum number of observations in a month to be considered a valid average,
580
+ defaults to None
581
+ max_gap : int
582
+ Maximum gap (in seconds) between data points to be considered a valid average,
583
+ defaults to None
584
+ min_span : int
585
+ Minimum total data range (in seconds) to be consiered a valid average,
586
+ defaults to None
587
+
588
+ Returns
589
+ -------
590
+ TSP
591
+ A TSP object with data aggregated to monthly averages
592
+ """
593
+ t = self.__nly(freq_fmt="ME",
594
+ new_freq=lbl.MONTHLY,
595
+ min_count=min_count,
596
+ max_gap=max_gap,
597
+ min_span=min_span)
598
+
599
+ return t
600
+
601
+ def daily(self,
602
+ min_count:Optional[int]=None,
603
+ max_gap:Optional[int]=None,
604
+ min_span:Optional[int]=None) -> "TSP":
605
+ """ Daily averages, possibly with some days unavailable (NaN) if there is insufficient data
606
+
607
+ Parameters
608
+ ----------
609
+ min_count : int
610
+ Minimum number of observations in a day to be considered a valid average,
611
+ defaults to None
612
+ max_gap : int
613
+ Maximum gap (in seconds) between data points to be considered a valid average, defaults to None
614
+ min_span : int
615
+ Minimum total data range (in seconds) to be consiered a valid average, defaults to None
616
+
617
+ Returns
618
+ -------
619
+ TSP
620
+ A TSP object with data aggregated to daily averages
621
+ """
622
+ # if the data is already daily +/- 1min , just return it
623
+ t = self.__nly(freq_fmt="D",
624
+ new_freq=lbl.DAILY,
625
+ min_count=min_count,
626
+ max_gap=max_gap,
627
+ min_span=min_span)
628
+
629
+ return t
630
+
631
+ def yearly(self,
632
+ min_count:Optional[int]=None,
633
+ max_gap:Optional[int]=None,
634
+ min_span:Optional[int]=None) -> "TSP":
635
+ """ Yearly averages, possibly with some years unavailable (NaN) if there is insufficient data
636
+
637
+ Parameters
638
+ ----------
639
+ min_count : int
640
+ Minimum number of observations in a month to be considered a valid average, defaults to None
641
+ max_gap : int
642
+ Maximum gap (in seconds) between data points to be considered a valid average, defaults to None
643
+ min_span : int
644
+ Minimum total data range (in seconds) to be consiered a valid average, defaults to None
645
+
646
+ Returns
647
+ -------
648
+ TSP
649
+ A TSP object with data aggregated to yearly averages
650
+ """
651
+ t = self.__nly(freq_fmt="YE",
652
+ new_freq=lbl.YEARLY,
653
+ min_count=min_count,
654
+ max_gap=max_gap,
655
+ min_span=min_span)
656
+
657
+ return t
658
+
659
+ @property
660
+ def counts(self) -> NDArray[np.number]:
661
+ """ Return the number of observations at each time and depth in the profile
662
+
663
+ Returns
664
+ -------
665
+ numpy.ndarray
666
+ The number of observations at each time and depth in the profile
667
+ """
668
+ return self.__number_of_observations
669
+
670
+ @counts.setter
671
+ def counts(self, value):
672
+ counts = np.atleast_2d(value)
673
+
674
+ if not counts.shape == self.__number_of_observations.shape:
675
+ raise ValueError(f"Array of counts must have shape of {self.__number_of_observations.shape}.")
676
+
677
+ self.__number_of_observations = counts
678
+
679
+ @property
680
+ def depths(self) -> NDArray[np.number]:
681
+ """ Return the depth values in the profile
682
+
683
+ Returns
684
+ -------
685
+ numpy.ndarray
686
+ The depths in the profile
687
+ """
688
+ return self._depths
689
+
690
+ @depths.setter
691
+ def depths(self, value):
692
+ depths = np.atleast_1d(value)
693
+
694
+ if not len(depths) == len(self._depths):
695
+ raise ValueError(f"List of depths must have length of {len(self._depths)}.")
696
+
697
+ self._depths = depths
698
+
699
+ TSP.wide.fget.cache_clear()
700
+ TSP.long.fget.cache_clear()
701
+
702
+ @property
703
+ def times(self):
704
+ """ Return the timestamps in the time series
705
+
706
+ Returns
707
+ -------
708
+ pandas.DatetimeIndex
709
+ The timestamps in the time series
710
+ """
711
+ if self.utc_offset is None:
712
+ return self._times
713
+
714
+ elif self._output_utc_offset == self.utc_offset:
715
+ return self._times
716
+
717
+ else:
718
+ return self._times.tz_convert(self.output_utc_offset)
719
+
720
+ @property
721
+ def values(self):
722
+ return self._values
723
+
724
+ def counts_df(self) -> pd.DataFrame:
725
+ """ Return the number of observations as a DataFrame
726
+
727
+ Returns
728
+ -------
729
+ pandas.DataFrame
730
+ DataFrame of number of observations at each time and depth in the profile
731
+ """
732
+ df = pd.DataFrame(data=self.__number_of_observations, index=self.wide.index, columns=self.depths)
733
+ return df
734
+
735
+ def to_gtnp(self, filename: str) -> None:
736
+ """ Write the data in GTN-P format
737
+
738
+ Parameters
739
+ ----------
740
+ filename : str
741
+ Path to the file to write to
742
+ """
743
+ df = self.wide.round(self._export_precision).rename(columns={'time': 'Date/Depth'})
744
+ df['Date/Depth'] = df['Date/Depth'].dt.strftime("%Y-%m-%d %H:%M:%S")
745
+
746
+ df.to_csv(filename, index=False, na_rep="-999")
747
+
748
+ def to_ntgs(self, filename:str, project_name:Optional[str]="", site_id:"Optional[str]" = None, latitude:"Optional[float]"=None, longitude:"Optional[float]"=None) -> None:
749
+ """ Write the data in NTGS template format
750
+
751
+ Parameters
752
+ ----------
753
+ filename : str
754
+ Path to the file to write to
755
+ project_name : str, optional
756
+ The project name, by default ""
757
+ site_id : str, optional
758
+ The name of the site , by default None
759
+ latitude : float, optional
760
+ WGS84 latitude at which the observations were recorded, by default None
761
+ longitude : float, optional
762
+ WGS84 longitude at which the observations were recorded, by default None
763
+ """
764
+ if latitude is None:
765
+ latitude = self.latitude if self.latitude is not None else ""
766
+
767
+ if longitude is None:
768
+ longitude = self.longitude if self.longitude is not None else ""
769
+
770
+ if site_id is None:
771
+ site_id = self.site_id if self.site_id is not None else ""
772
+
773
+ if project_name is None:
774
+ project_name = self.metadata.get("project_name", "")
775
+
776
+ data = self.values
777
+
778
+ df = pd.DataFrame({'project_name': pd.Series(dtype='str'),
779
+ 'site_id': pd.Series(dtype='str'),
780
+ 'latitude': pd.Series(dtype='float'),
781
+ 'longitude': pd.Series(dtype='float')
782
+ })
783
+
784
+ df["date_YYYY-MM-DD"] = pd.Series(self.times).dt.strftime(r"%Y-%m-%d")
785
+ df["time_HH:MM:SS"] = pd.Series(self.times).dt.strftime(r"%H:%M:%S")
786
+
787
+ df["project_name"] = project_name
788
+ df["site_id"] = site_id
789
+ df["latitude"] = latitude
790
+ df["longitude"] = longitude
791
+
792
+ headers = [str(d) + "_m" for d in self.depths]
793
+
794
+ for i, h in enumerate(headers):
795
+ df[h] = data[:, i].round(self._export_precision)
796
+
797
+ df.to_csv(filename, index=False)
798
+
799
+ def to_netcdf(self, file: str, only_use_cf_metadata=True,
800
+ calculate_extent_metadata=True, zlib=True, complevel=4) -> None:
801
+ """ Write the data as a netcdf"""
802
+ try:
803
+ ncf = make_temperature_base(file, ndepth=len(self.depths), ntime=len(self.times), strings_as_strings=True, zlib=zlib, complevel=complevel)
804
+ except NameError:
805
+ warnings.warn("Missing required packages. Try installing with `pip install tsp[nc]`", stacklevel=2)
806
+ return
807
+
808
+ with nc.Dataset(ncf, 'a') as ncd:
809
+ pytime = self.times.to_pydatetime()
810
+
811
+ ncd['depth_below_ground_surface'][:] = self.depths
812
+
813
+
814
+ ncd['time'][:] = nc.date2num(pytime, ncd['time'].units, ncd['time'].calendar)
815
+ ncd['ground_temperature'][:] = self.values
816
+
817
+ if self.latitude:
818
+ ncd['latitude'][:] = self.latitude
819
+ if self.longitude:
820
+ ncd['longitude'][:] = self.longitude
821
+ if self.site_id:
822
+ if ncd['site_name'].dtype == str:
823
+ ncd['site_name'][0] = self.site_id
824
+ else:
825
+ strlen = ncd['site_name'].shape[0]
826
+ ncd['site_name'][:] = nc.stringtochar(np.array([self.site_id], f"S{strlen}"))
827
+
828
+ if "_elevation" in self.metadata:
829
+ ncd['surface_elevation'][:] = self.metadata.get("_elevation")
830
+
831
+ if only_use_cf_metadata:
832
+ metadata = self.metadata.get('CF', {})
833
+ else:
834
+ metadata = self.metadata
835
+
836
+ for key, value in metadata.items():
837
+ try:
838
+ if isinstance(value, str):
839
+ ncd.setncattr_string(key, value)
840
+ else:
841
+ ncd.setncattr(key, value)
842
+ except Exception:
843
+ warnings.warn(f"Could not set metadata item: {key} : {value}", stacklevel=2)
844
+
845
+ if calculate_extent_metadata:
846
+ calc_ext_meta(ncd)
847
+
848
+ def to_json(self, file: str) -> None:
849
+ """ Write the data to a serialized json file """
850
+ with open(file, 'w') as f:
851
+ f.write(self._to_json())
852
+
853
+ def to_csv(self, file: str, include_metadata=False, long=False) -> None:
854
+ """ Write the data to a tsp-style csv file
855
+
856
+ Parameters
857
+ ----------
858
+ file : str
859
+ Path to the file to write to
860
+ include_metadata : bool | str
861
+ If True, include all metadata as commented lines at the top of the file.
862
+
863
+ long : bool
864
+ If True, write the data in long format, otherwise wide format
865
+ """
866
+ with open(file, 'w', encoding='utf-8') as f:
867
+ if include_metadata == 'standard':
868
+ md = {}
869
+ for key in mdf.standardized_keys.keys():
870
+ if self.metadata.get(key):
871
+ md[key[1:]] = self.metadata.get(key)
872
+
873
+ elif include_metadata is True:
874
+ md = {}
875
+ for key, value in self.metadata.items():
876
+ if key in mdf.standardized_keys.keys():
877
+ md[key[1:]] = value
878
+ else:
879
+ md[key] = value
880
+ else:
881
+ md = {}
882
+
883
+ md_lines = mdf.dict_to_metadata(md)
884
+ for line in md_lines:
885
+ f.write(f"{line}\n")
886
+
887
+ if long:
888
+ df = self.long.round(self._export_precision)
889
+ df.rename(columns={"time": "timestamp",
890
+ "temperature_in_ground": "temperature"}, inplace=True)
891
+ f.write(df.to_csv(index=False, lineterminator='\n'))
892
+ else:
893
+ df = self.wide.round(self._export_precision)
894
+ df.rename(columns={"time": "timestamp"}, inplace=True)
895
+ f.write(df.to_csv(index=False, lineterminator='\n'))
896
+
897
+ def _to_json(self) -> str:
898
+ return self.wide.round(self._export_precision).to_json()
899
+
900
+ def plot_profiles(self, P:int=100, n:int=10, metadata=False) -> Figure:
901
+ """ Create a plot of the temperature profiles at different times
902
+
903
+ Parameters
904
+ ----------
905
+ P : int
906
+ Percentage of time range to plot
907
+ n : int
908
+ Number of evenly-spaced profiles to plot
909
+
910
+ Returns
911
+ -------
912
+ Figure
913
+ matplotlib `Figure` object
914
+ """
915
+ fig = profile_evolution(depths=self.depths, times=self.times, values=self._values, P=P, n=n)
916
+ if metadata:
917
+ fig = _plot_overlay(fig, self)
918
+ fig.show()
919
+ return fig
920
+
921
+ def plot_trumpet(self,
922
+ year: Optional[int]=None,
923
+ begin: Optional[datetime]=None,
924
+ end: Optional[datetime]=None,
925
+ min_completeness: Optional[float]=None,
926
+ metadata=False,
927
+ **kwargs) -> Figure:
928
+ """ Create a trumpet plot from the data
929
+
930
+ Parameters
931
+ ----------
932
+ year : int, optional
933
+ Which year to plot
934
+ begin : datetime, optional
935
+ If 'end' also provided, the earliest measurement to include in the averaging for the plot
936
+ end : datetime, optional
937
+ If 'begin' also provided, the latest measurement to include in the averaging for the plot
938
+ min_completeness : float, optional
939
+ If provided, the minimum completeness (fractional, 0 to 1) required to include
940
+ in temperature envelope, otherwise
941
+ the point is plotted as an unconnected, slightly transparent dot, by default None
942
+ **kwargs : dict, optional
943
+ Extra arguments to the plotting function: refer to the documentation for :func:`~tsp.plots.static.trumpet_curve` for a
944
+ list of all possible arguments.
945
+
946
+ Returns
947
+ -------
948
+ Figure
949
+ a matplotlib `Figure` object
950
+ """
951
+ df = self.long.dropna()
952
+
953
+ if year is not None:
954
+ df = df[df['time'].dt.year == year]
955
+
956
+ elif begin is not None or end is not None:
957
+ raise NotImplementedError
958
+
959
+ else:
960
+ raise ValueError("One of 'year', 'begin', 'end' must be provided.")
961
+
962
+ grouped = df.groupby('depth')
963
+
964
+ max_t = grouped.max().get('temperature_in_ground').values
965
+ min_t = grouped.min().get('temperature_in_ground').values
966
+ mean_t = grouped.mean().get('temperature_in_ground').values
967
+ depth = np.array([d for d in grouped.groups.keys()])
968
+
969
+ # Calculate completeness
970
+ c = self.yearly(None, None, None).completeness
971
+
972
+ if min_completeness is not None and c is not None:
973
+ C = c[c.index.year == year]
974
+ C = C[depth].iloc[0,:].values
975
+
976
+ else:
977
+ C = None
978
+
979
+ fig = trumpet_curve(depth=depth,
980
+ t_max=max_t,
981
+ t_min=min_t,
982
+ t_mean=mean_t,
983
+ min_completeness=min_completeness,
984
+ data_completeness=C,
985
+ **kwargs)
986
+ if metadata:
987
+ fig = _plot_overlay(fig, self)
988
+
989
+ fig.show()
990
+
991
+ return fig
992
+
993
+ def plot_contour(self, metadata=False, **kwargs) -> Figure:
994
+ """ Create a contour plot
995
+
996
+ Parameters
997
+ ----------
998
+ **kwargs : dict, optional
999
+ Extra arguments to the plotting function: refer to the documentation for :func:`~tsp.plots.static.colour_contour` for a
1000
+ list of all possible arguments.
1001
+
1002
+ Returns
1003
+ -------
1004
+ Figure
1005
+ matplotlib `Figure` object
1006
+ """
1007
+ fig = colour_contour(depths=self.depths, times=self.times, values=self._values, **kwargs)
1008
+
1009
+ if self.output_utc_offset is not None:
1010
+ label = format_utc_offset(self.output_utc_offset)
1011
+ if label != "UTC":
1012
+ label = f"UTC{label}"
1013
+ fig.axes[0].set_xlabel(f"Time [{label}]")
1014
+
1015
+ if metadata:
1016
+ fig = _plot_overlay(fig, self)
1017
+
1018
+ fig.show()
1019
+
1020
+ return fig
1021
+
1022
+ def plot_timeseries(self, depths: list=[], metadata=False, **kwargs) -> Figure:
1023
+ """Create a time series T(t) plot
1024
+
1025
+ Parameters
1026
+ ----------
1027
+ depths : list, optional
1028
+ If non-empty, restricts the depths to include in the plot, by default []
1029
+ **kwargs : dict, optional
1030
+ Extra arguments to the plotting function: refer to the documentation for :func:`~tsp.plots.static.time_series` for a
1031
+ list of all possible arguments.
1032
+
1033
+ Returns
1034
+ -------
1035
+ Figure
1036
+ matplotlib `Figure` object
1037
+ """
1038
+ if depths == []:
1039
+ depths = self.depths
1040
+
1041
+ d_mask = np.isin(self.depths, depths)
1042
+
1043
+ fig = time_series(self.depths[d_mask], self.times, self.values[:, d_mask], **kwargs)
1044
+
1045
+
1046
+ if self.output_utc_offset is not None:
1047
+ label = format_utc_offset(self.output_utc_offset)
1048
+ if label != "UTC":
1049
+ label = f"UTC{label}"
1050
+ fig.axes[0].set_xlabel(f"Time [{label}]")
1051
+ fig.autofmt_xdate()
1052
+
1053
+ if metadata:
1054
+ fig = _plot_overlay(fig, self)
1055
+
1056
+ fig.show()
1057
+
1058
+ return fig
1059
+
1060
+
1061
+ class AggregatedTSP(TSP):
1062
+ """ A Time Series Profile that uses indices (1,2,3,...) instead of depth values.
1063
+
1064
+ Used in situations when depths are unknown (such as when reading datlogger exports
1065
+ that don't have depth measurements.)
1066
+
1067
+ Parameters
1068
+ ----------
1069
+ times : list-like
1070
+ t-length array of datetime objects
1071
+ values : numpy.ndarray
1072
+ array with shape (t,d) containing values at (t)emperatures and (d)epths
1073
+ **kwargs : dict
1074
+ Extra arguments to parent class: refer to :py:class:`tsp.core.TSP` documentation for a
1075
+ list of all possible arguments.
1076
+ """
1077
+
1078
+
1079
+ class IndexedTSP(TSP):
1080
+ """ A Time Series Profile that uses indices (1,2,3,...) instead of depth values.
1081
+
1082
+ Used in situations when depths are unknown (such as when reading datlogger exports
1083
+ that don't have depth measurements.)
1084
+
1085
+ Parameters
1086
+ ----------
1087
+ times : list-like
1088
+ t-length array of datetime objects
1089
+ values : numpy.ndarray
1090
+ array with shape (t,d) containing values at (t)emperatures and (d)epths
1091
+ **kwargs : dict
1092
+ Extra arguments to parent class: refer to :py:class:`~tsp.core.TSP` documentation for a
1093
+ list of all possible arguments.
1094
+ """
1095
+
1096
+ def __init__(self, times, values, **kwargs):
1097
+ depths = np.arange(0, values.shape[1]) + 1
1098
+ super().__init__(times=times, depths=depths, values=values, **kwargs)
1099
+
1100
+ @property
1101
+ def depths(self) -> np.ndarray:
1102
+ """Depth indices
1103
+
1104
+ Returns
1105
+ -------
1106
+ numpy.ndarray
1107
+ An array of depth indices
1108
+ """
1109
+ warnings.warn("This TSP uses indices (1,2,3,...) instad of depths. Use set_depths() to use measured depths.", stacklevel=2)
1110
+ return self._depths
1111
+
1112
+ @depths.setter
1113
+ def depths(self, value):
1114
+ TSP.depths.__set__(self, value)
1115
+
1116
+ def set_depths(self, depths: np.ndarray):
1117
+ """Assign depth values to depth indices. Change the object to a :py:class:`~tsp.core.TSP`
1118
+
1119
+ Parameters
1120
+ ----------
1121
+ depths : np.ndarray
1122
+ An array or list of depth values equal in lenth to the depth indices
1123
+ """
1124
+ self.depths = depths
1125
+ self.__class__ = TSP
1126
+
1127
+
1128
+
1129
+ def span(S: pd.Series) -> float:
1130
+ first = S.first_valid_index() # type: pd.Timestamp
1131
+ last = S.last_valid_index() # type: pd.Timestamp
1132
+ if first is None or last is None:
1133
+ return 0
1134
+
1135
+ return (last - first).total_seconds()
1136
+
1137
+ def min_span_mask(R: "pd.core.resample.DatetimeIndexResampler",
1138
+ threshold: float) -> "pd.DataFrame":
1139
+ s = R.apply(lambda x: span(x))
1140
+ return s < threshold
1141
+
1142
+
1143
+ def gap(S: pd.Series) -> float:
1144
+
1145
+ d = np.diff(S.dropna().index)
1146
+ if len(d) == 0:
1147
+ return 0
1148
+ elif len(d) == 1:
1149
+ return 0
1150
+ elif len(d) > 1:
1151
+ gap = max(d).astype('timedelta64[s]').astype(float)
1152
+ return gap
1153
+
1154
+
1155
+ def max_gap_mask(R: "pd.core.resample.DatetimeIndexResampler",
1156
+ threshold: float) -> "pd.DataFrame":
1157
+ g = R.apply(lambda x: gap(x))
1158
+ return (g > threshold) | (g == 0)
1159
+
1160
+
1161
+
1162
+
1163
+ def _temporal_gap_mask(grouped: "pd.core.groupby.DataFrameGroupBy", max_gap: Optional[int], min_span: Optional[int]) -> np.ndarray:
1164
+ """ Mask out observational groups in which there is more than a certain size temporal gap
1165
+
1166
+ Controls for gaps in the data within an aggregation group (using max_gap) and missing data at the beginning
1167
+ or end of the aggregation group (using min_span).
1168
+
1169
+ Parameters
1170
+ ----------
1171
+ grouped : pandas.core.groupby.DataFrameGroupBy
1172
+ groupby with 'time' and 'depth' columns
1173
+ max_gap : int
1174
+ maximum gap in seconds to tolerate between observations in a group
1175
+ min_span : int
1176
+ minimum data range (beginning to end) in seconds.
1177
+
1178
+ Returns
1179
+ -------
1180
+ numpy.ndarray
1181
+ boolean array with ``True`` where measurement spacing or range in group does not satisfy tolerances
1182
+ """
1183
+ if max_gap is not None:
1184
+ max_diff = grouped.time.apply(np.diff).apply(lambda x: np.max(x, initial=np.timedelta64(0))).apply(lambda x: x.total_seconds())
1185
+ max_diff = max_diff.unstack().to_numpy()
1186
+ diff_mask = np.where((max_diff == 0) | (max_diff >= max_gap), True, False)
1187
+ else:
1188
+ diff_mask = np.zeros_like(grouped, dtype=bool)
1189
+
1190
+ if min_span is not None:
1191
+ total_span = grouped.time.apply(np.ptp).apply(lambda x: x.total_seconds()).unstack().to_numpy()
1192
+ span_mask = np.where(total_span < min_span, True, False)
1193
+ else:
1194
+ span_mask = np.zeros_like(grouped, dtype=bool)
1195
+
1196
+ mask = diff_mask * span_mask
1197
+
1198
+ return mask
1199
+
1200
+
1201
+ def _observation_count_mask(number_of_observations: np.ndarray, min_count:int) -> np.ndarray:
1202
+ """ Create a mask array for an
1203
+
1204
+ Parameters
1205
+ ----------
1206
+ number_of_observations : numpy.ndarray
1207
+ Array of how many data points are in aggregation
1208
+ min_count : int
1209
+ Minimum number of data points for aggregation to be 'valid'
1210
+
1211
+ Returns
1212
+ -------
1213
+ np.ndarray
1214
+ a mask, True where data should be masked
1215
+ """
1216
+ valid = np.less(number_of_observations, min_count) # type: np.ndarray
1217
+ return valid
1218
+
1219
+
1220
+ def handle_incoming_times(times: "Union[np.ndarray, pd.DatetimeIndex, pd.Series, list]") -> "pd.DatetimeIndex":
1221
+ """Convert a list of times to a pandas DatetimeIndex object"""
1222
+ invalid_msg = "Times must be a list, numpy array, pandas DatetimeIndex, or pandas Series"
1223
+
1224
+ try:
1225
+ if not len(times):
1226
+ raise ValueError(invalid_msg)
1227
+ except TypeError:
1228
+ raise ValueError(invalid_msg)
1229
+
1230
+ if isinstance(times, pd.DatetimeIndex):
1231
+ return times
1232
+
1233
+ elif isinstance(times, pd.Series):
1234
+ try:
1235
+ times = pd.DatetimeIndex(times)
1236
+ except Exception:
1237
+ raise ValueError("Series must be convertible to DatetimeIndex")
1238
+ times.name = 'time'
1239
+
1240
+ return times
1241
+
1242
+ elif isinstance(times, np.ndarray):
1243
+ times = pd.to_datetime(times)
1244
+ times.name = 'time'
1245
+ return times
1246
+
1247
+ elif isinstance(times, list):
1248
+ return pd.to_datetime(times)
1249
+
1250
+ else:
1251
+ raise ValueError(invalid_msg)
1252
+
1253
+ def tsp_concat(tsp_list, on_conflict='error', metadata='first') -> TSP:
1254
+ """Combine multiple TSPs into a single TSP.
1255
+
1256
+ Parameters
1257
+ ----------
1258
+ tsp_list : list[TSP]
1259
+ List of TSPs to combine. They must have the same depths
1260
+ on_conflict : str, optional
1261
+ Method to resolve duplicate times with different values. Chosen from "error", "keep", by default "error"
1262
+ - "error": Raise an error if duplicate times with different values are found.
1263
+ - "keep": Keep the first occurrence of the duplicate time.
1264
+ metadata : str, optional
1265
+ Method to select metadata from the TSPs. Chosen from "first", "identical", or "none", by default "first"
1266
+ - "first": Use the metadata from the first TSP in the list.
1267
+ - "identical": Only keep metadata records that are identical across TSPs.
1268
+ - "none": Ignore metadata and set it to None.
1269
+ Returns
1270
+ -------
1271
+ TSP
1272
+ Combined TSP.
1273
+
1274
+ Description
1275
+ -----------
1276
+ This function combines multiple TSPs into a single TSP. The TSPs must have the same depths.
1277
+ """
1278
+ tsp_dict = _tsp_concat(tsp_list=tsp_list, on_conflict=on_conflict, metadata=metadata)
1279
+ times = tsp_dict.pop('times')
1280
+ depths = tsp_dict.pop('depths')
1281
+ values = tsp_dict.pop('values')
1282
+ counts = tsp_dict.pop('counts')
1283
+
1284
+ t = TSP(times, depths, values, **tsp_dict)
1285
+ t.counts = counts
1286
+
1287
+ return t
1288
+
1289
+
1290
+ def is_single_line(s: str) -> bool:
1291
+ return "\n" not in s and "\r" not in s
1292
+
1293
+
1294
+ def is_valid_site_name_unicode(s: str) -> bool:
1295
+ if not is_single_line(s):
1296
+ return False
1297
+ try:
1298
+ s.encode("utf-8")
1299
+ except UnicodeEncodeError:
1300
+ return False
1301
+
1302
+ for ch in s:
1303
+ cat = unicodedata.category(ch)
1304
+ if cat.startswith("C"): # control chars, surrogates, etc.
1305
+ return False
1306
+ return True