tsp 1.7.1__py3-none-any.whl → 1.7.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tsp might be problematic. Click here for more details.

Files changed (91) hide show
  1. tsp/__init__.py +11 -11
  2. tsp/__meta__.py +1 -1
  3. tsp/core.py +1035 -1010
  4. tsp/data/2023-01-06_755-test-Dataset_2031-Constant_Over_Interval-Hourly-Ground_Temperature-Thermistor_Automated.timeserie.csv +4 -4
  5. tsp/data/2023-01-06_755-test.metadata.txt +208 -208
  6. tsp/data/NTGS_example_csv.csv +6 -0
  7. tsp/data/NTGS_example_slash_dates.csv +6 -0
  8. tsp/data/example_geotop.csv +5240 -5240
  9. tsp/data/example_gtnp.csv +1298 -1298
  10. tsp/data/example_permos.csv +8 -0
  11. tsp/data/test_geotop_has_space.txt +5 -0
  12. tsp/dataloggers/AbstractReader.py +43 -43
  13. tsp/dataloggers/FG2.py +110 -110
  14. tsp/dataloggers/GP5W.py +114 -114
  15. tsp/dataloggers/Geoprecision.py +34 -34
  16. tsp/dataloggers/HOBO.py +914 -914
  17. tsp/dataloggers/RBRXL800.py +190 -190
  18. tsp/dataloggers/RBRXR420.py +308 -307
  19. tsp/dataloggers/__init__.py +15 -15
  20. tsp/dataloggers/logr.py +115 -115
  21. tsp/dataloggers/test_files/004448.DAT +2543 -2543
  22. tsp/dataloggers/test_files/004531.DAT +17106 -17106
  23. tsp/dataloggers/test_files/004531.HEX +3587 -3587
  24. tsp/dataloggers/test_files/004534.HEX +3587 -3587
  25. tsp/dataloggers/test_files/010252.dat +1731 -1731
  26. tsp/dataloggers/test_files/010252.hex +1739 -1739
  27. tsp/dataloggers/test_files/010274.hex +1291 -1291
  28. tsp/dataloggers/test_files/010278.hex +3544 -3544
  29. tsp/dataloggers/test_files/012064.dat +1286 -1286
  30. tsp/dataloggers/test_files/012064.hex +1294 -1294
  31. tsp/dataloggers/test_files/012081.hex +3532 -3532
  32. tsp/dataloggers/test_files/07B1592.DAT +1483 -1483
  33. tsp/dataloggers/test_files/07B1592.HEX +1806 -1806
  34. tsp/dataloggers/test_files/07B4450.DAT +2234 -2234
  35. tsp/dataloggers/test_files/07B4450.HEX +2559 -2559
  36. tsp/dataloggers/test_files/CSc_CR1000_1.dat +295 -0
  37. tsp/dataloggers/test_files/FG2_399.csv +9881 -9881
  38. tsp/dataloggers/test_files/GP5W.csv +1121 -1121
  39. tsp/dataloggers/test_files/GP5W_260.csv +1884 -1884
  40. tsp/dataloggers/test_files/GP5W_270.csv +2210 -2210
  41. tsp/dataloggers/test_files/H08-030-08_HOBOware.csv +998 -998
  42. tsp/dataloggers/test_files/RBR_01.dat +1046 -1046
  43. tsp/dataloggers/test_files/RBR_02.dat +2426 -2426
  44. tsp/dataloggers/test_files/RSTDT2055.csv +2152 -2152
  45. tsp/dataloggers/test_files/U23-001_HOBOware.csv +1001 -1001
  46. tsp/dataloggers/test_files/hobo-negative-2.txt +6396 -6396
  47. tsp/dataloggers/test_files/hobo-negative-3.txt +5593 -5593
  48. tsp/dataloggers/test_files/hobo-positive-number-1.txt +1000 -1000
  49. tsp/dataloggers/test_files/hobo-positive-number-2.csv +1003 -1003
  50. tsp/dataloggers/test_files/hobo-positive-number-3.csv +1133 -1133
  51. tsp/dataloggers/test_files/hobo-positive-number-4.csv +1209 -1209
  52. tsp/dataloggers/test_files/hobo2.csv +8702 -8702
  53. tsp/dataloggers/test_files/hobo_1_AB.csv +21732 -21732
  54. tsp/dataloggers/test_files/hobo_1_AB_Details.txt +133 -133
  55. tsp/dataloggers/test_files/hobo_1_AB_classic.csv +4373 -4373
  56. tsp/dataloggers/test_files/hobo_1_AB_defaults.csv +21732 -21732
  57. tsp/dataloggers/test_files/hobo_1_AB_minimal.txt +1358 -1358
  58. tsp/dataloggers/test_files/hobo_1_AB_var2.csv +3189 -3189
  59. tsp/dataloggers/test_files/hobo_1_AB_var3.csv +2458 -2458
  60. tsp/dataloggers/test_files/logR_ULogC16-32_1.csv +106 -106
  61. tsp/dataloggers/test_files/logR_ULogC16-32_2.csv +100 -100
  62. tsp/dataloggers/test_files/mon_3_Ta_2010-08-18_2013-02-08.txt +21724 -21724
  63. tsp/dataloggers/test_files/rbr_001.dat +1133 -1133
  64. tsp/dataloggers/test_files/rbr_001.hex +1139 -1139
  65. tsp/dataloggers/test_files/rbr_001_no_comment.dat +1132 -1132
  66. tsp/dataloggers/test_files/rbr_001_no_comment.hex +1138 -1138
  67. tsp/dataloggers/test_files/rbr_002.dat +1179 -1179
  68. tsp/dataloggers/test_files/rbr_002.hex +1185 -1185
  69. tsp/dataloggers/test_files/rbr_003.hex +1292 -1292
  70. tsp/dataloggers/test_files/rbr_003.xls +0 -0
  71. tsp/dataloggers/test_files/rbr_xl_001.DAT +1105 -1105
  72. tsp/dataloggers/test_files/rbr_xl_002.DAT +1126 -1126
  73. tsp/dataloggers/test_files/rbr_xl_003.DAT +4622 -4622
  74. tsp/dataloggers/test_files/rbr_xl_003.HEX +3587 -3587
  75. tsp/gtnp.py +148 -141
  76. tsp/labels.py +3 -3
  77. tsp/misc.py +90 -90
  78. tsp/physics.py +101 -101
  79. tsp/plots/static.py +374 -305
  80. tsp/readers.py +548 -536
  81. tsp/scratch.py +6 -0
  82. tsp/time.py +45 -45
  83. tsp/tspwarnings.py +15 -0
  84. tsp/utils.py +101 -101
  85. tsp/version.py +1 -1
  86. {tsp-1.7.1.dist-info → tsp-1.7.7.dist-info}/LICENSE +674 -674
  87. {tsp-1.7.1.dist-info → tsp-1.7.7.dist-info}/METADATA +10 -6
  88. tsp-1.7.7.dist-info/RECORD +95 -0
  89. {tsp-1.7.1.dist-info → tsp-1.7.7.dist-info}/WHEEL +5 -5
  90. tsp-1.7.1.dist-info/RECORD +0 -88
  91. {tsp-1.7.1.dist-info → tsp-1.7.7.dist-info}/top_level.txt +0 -0
tsp/core.py CHANGED
@@ -1,1010 +1,1035 @@
1
- from __future__ import annotations
2
-
3
- import pandas as pd
4
- import re
5
- import inspect
6
- import numpy as np
7
- import functools
8
- import warnings
9
-
10
- try:
11
- import netCDF4 as nc
12
-
13
- try:
14
- from pfit.pfnet_standard import make_temperature_base
15
- except ModuleNotFoundError:
16
- warnings.warn("Missing pfit library. Some functionality will be limited.", stacklevel=2)
17
-
18
- except ModuleNotFoundError:
19
- warnings.warn("Missing netCDF4 library. Some functionality will be limited.", stacklevel=2)
20
-
21
- from typing import Union, Optional
22
- from datetime import datetime, tzinfo, timezone, timedelta
23
-
24
- import tsp
25
- import tsp.labels as lbl
26
- from tsp.physics import analytical_fourier
27
- from tsp.plots.static import trumpet_curve, colour_contour, time_series
28
- from tsp.time import format_utc_offset
29
- from tsp.time import get_utc_offset
30
- from tsp.misc import completeness
31
-
32
- from matplotlib.figure import Figure
33
-
34
-
35
- class TSP:
36
- """ A Time Series Profile (a collection of time series data at different depths)
37
-
38
- A TSP can also be:
39
- Thermal State of Permafrost
40
- Temperature du Sol en Profondeur
41
- Temperatures, Secondes, Profondeurs
42
-
43
- Parameters
44
- ----------
45
- times : pandas.DatetimeIndex
46
- DatetimeIndex with optional UTC offset. List-like array of datetime objects can also be passed,
47
- but will be converted to a DatetimeIndex with no UTC offset.
48
- depths : list-like
49
- d-length array of depths
50
- values : numpy.ndarray
51
- array with shape (t,d) containing values at (t)emperatures and (d)epths
52
- longitude : float, optional
53
- Longitude at which data were collected
54
- latitude : float, optional
55
- Latitude at which data were collected
56
- site_id : str, optional
57
- Name of location at which data were collected
58
- metadata : dict
59
- Additional metadata
60
-
61
- Attributes
62
- ----------
63
- values
64
- latitude : float
65
- Latitude at which data were collected
66
- longitude : float
67
- Longitude at which data were collected
68
- metadata : dict
69
- Additional metadata provided at instantiation or by other methods
70
- """
71
-
72
- def __repr__(self) -> str:
73
- return repr(self.wide)
74
-
75
- def __str__(self) -> str:
76
- return str(self.wide)
77
-
78
- def __init__(self, times, depths, values,
79
- latitude: Optional[float]=None,
80
- longitude: Optional[float]=None,
81
- site_id: Optional[str]=None,
82
- metadata: dict={}):
83
-
84
- self._times = handle_incoming_times(times)
85
- if self._times.duplicated().any():
86
- warnings.warn(f"Duplicate timestamps found: {self._times[np.where(self._times.duplicated())[0]]}. That's bad.", stacklevel=2)
87
-
88
- if self.utc_offset:
89
- self._output_utc_offset = self.utc_offset
90
- else:
91
- self._output_utc_offset = None
92
-
93
- self._depths = np.atleast_1d(depths)
94
- self._values = np.atleast_2d(values)
95
- self.__number_of_observations = np.ones_like(values, dtype=int)
96
- self.__number_of_observations[np.isnan(values)] = 0
97
- self.metadata = metadata
98
- self.latitude = latitude
99
- self.longitude = longitude
100
- self.site_id = site_id
101
- self._freq = None
102
- self._completeness = None
103
-
104
- @property
105
- def freq(self) -> Optional[int]:
106
- """ Measurement frequency [s] """
107
- return self._freq
108
-
109
- @freq.setter
110
- def freq(self, value: int):
111
- if not isinstance(value, int):
112
- raise TypeError("Must be string, e.g. '1D', '3600s'")
113
- self._freq = value
114
-
115
- @property
116
- def completeness(self) -> Optional[pd.DataFrame]:
117
- """ Data completeness """
118
- return self._completeness
119
-
120
- @completeness.setter
121
- def completeness(self, value):
122
- raise ValueError("You can't assign this variable.")
123
-
124
- @classmethod
125
- def from_tidy_format(cls, times, depths, values,
126
- number_of_observations=None,
127
- latitude: Optional[float]=None,
128
- longitude: Optional[float]=None,
129
- site_id: Optional[str]=None,
130
- metadata:dict={}):
131
- """ Create a TSP from data in a 'tidy' or 'long' format
132
-
133
- Parameters
134
- ----------
135
- times : list-like
136
- n-length array of datetime objects
137
- depths : list-like
138
- n-length array of depths
139
- values : numpy.ndarray
140
- n-length array of (temperaure) values at associated time and depth
141
- number_of_observations : numpy.ndarray, optional
142
- n-length array of number of observations at associated time and
143
- depth for aggregated values (default: 1)
144
- longitude : float, optional
145
- Longitude at which data were collected
146
- latitude : float, optional
147
- Latitude at which data were collected
148
- site_id : str, optional
149
- Name of location at which data were collected
150
- metadata : dict
151
- Additional metadata
152
- """
153
- times = np.atleast_1d(times)
154
- depths = np.atleast_1d(depths)
155
- values = np.atleast_1d(values)
156
-
157
- number_of_observations = number_of_observations if number_of_observations else np.ones_like(values)
158
- df = pd.DataFrame({"times": times, "depths": depths, "temperature_in_ground": values, "number_of_observations": number_of_observations})
159
- df.set_index(["times", "depths"], inplace=True)
160
-
161
- try:
162
- unstacked = df.unstack()
163
- except ValueError as e:
164
- if np.any(df.index.duplicated()):
165
- print(f"Duplicate data found at {df.iloc[np.where(df.index.duplicated())[0], :].index.get_level_values(0).unique()}")
166
- raise e
167
-
168
- temps = unstacked.get('temperature_in_ground')
169
-
170
- this = cls(times=temps.index.values,
171
- depths=temps.columns.values,
172
- values=temps.values,
173
- latitude=latitude,
174
- longitude=longitude,
175
- site_id=site_id,
176
- metadata=metadata)
177
-
178
- number_of_observations = unstacked.get('number_of_observations').values
179
-
180
- number_of_observations[np.isnan(number_of_observations)] = 0
181
- this.__number_of_observations = number_of_observations
182
- return this
183
-
184
- @classmethod
185
- def __from_tsp(cls, t:TSP, **kwargs) -> "TSP":
186
- """ Use an existing TSP object as a template, """
187
- kw = {}
188
- for arg in inspect.getfullargspec(TSP).args[1:]:
189
- if kwargs.get(arg) is not None:
190
- kw[arg] = kwargs.get(arg)
191
- else:
192
- kw[arg] = getattr(t, arg)
193
-
194
- t = TSP(**kw)
195
-
196
- return t
197
-
198
- @classmethod
199
- def from_json(cls, json_file) -> "TSP":
200
- """ Read data from a json file
201
-
202
- Parameters
203
- ----------
204
- json_file : str
205
- Path to a json file from which to read
206
- """
207
- df = pd.read_json(json_file)
208
- depth_pattern = r"^(-?[0-9\.]+)$"
209
-
210
- times = pd.to_datetime(df['time']).values
211
- depths = [re.search(depth_pattern, c).group(1) for c in df.columns if tsp._is_depth_column(c, depth_pattern)]
212
- values = df.loc[:, depths].to_numpy()
213
-
214
- t = cls(times=times, depths=depths, values=values)
215
-
216
- return t
217
-
218
- @classmethod
219
- def synthetic(cls, depths: "np.ndarray", start="2000-01-01", end="2003-01-01",
220
- Q:"Optional[float]"=0.2,
221
- c:"Optional[float]"=1.6e6,
222
- k:"Optional[float]"=2.5,
223
- A:"Optional[float]"=6,
224
- MAGST:"Optional[float]"=-0.5) -> "TSP":
225
- """
226
- Create a 'synthetic' temperature time series using the analytical solution to the heat conduction equation.
227
- Suitable for testing
228
-
229
- Parameters
230
- ----------
231
- depths : np.ndarray
232
- array of depths in m
233
- start : str
234
- array of times in seconds
235
- Q : Optional[float], optional
236
- Ground heat flux [W m-2], by default 0.2
237
- c : Optional[float], optional
238
- heat capacity [J m-3 K-1], by default 1.6e6
239
- k : Optional[float], optional
240
- thermal conductivity [W m-1 K-1], by default 2.5
241
- A : Optional[float], optional
242
- Amplitude of temperature fluctuation [C], by default 6
243
- MAGST : Optional[float], optional
244
- Mean annual ground surface temperature [C], by default -0.5
245
-
246
- Returns
247
- -------
248
- TSP
249
- A timeseries profile (TSP) object
250
- """
251
- times = pd.date_range(start=start, end=end).to_pydatetime()
252
- t_sec = np.array([(t-times[0]).total_seconds() for t in times])
253
-
254
- values = analytical_fourier(depths=depths, times=t_sec, Q=Q, c=c, k=k, A=A, MAGST=MAGST)
255
-
256
- this = cls(depths=depths, times=times, values=values)
257
-
258
- return this
259
-
260
- @property
261
- @functools.lru_cache()
262
- def long(self) -> "pd.DataFrame":
263
- """ Return the data in a 'long' or 'tidy' format (one row per observation, one column per variable)
264
-
265
- Returns
266
- -------
267
- pandas.DataFrame
268
- Time series profile data with columns:
269
- - **time**: time
270
- - **depth**: depth
271
- - **temperature_in_ground**: temperature
272
- - **number_of_observations**: If data are aggregated, how many observations are used in the aggregation
273
- """
274
- values = self.wide.melt(id_vars='time',
275
- var_name="depth",
276
- value_name="temperature_in_ground")
277
-
278
- number_of_observations = self.number_of_observations.melt(id_vars='time',
279
- var_name="depth",
280
- value_name="number_of_observations")
281
-
282
- values['number_of_observations'] = number_of_observations['number_of_observations']
283
-
284
- return values
285
-
286
- @property
287
- @functools.lru_cache()
288
- def wide(self) -> "pd.DataFrame":
289
- """ Return the data in a 'wide' format (one column per depth)
290
-
291
- Returns
292
- -------
293
- pandas.DataFrame
294
- Time series profile data
295
- """
296
- tabular = pd.DataFrame(self._values)
297
- tabular.columns = self._depths
298
- tabular.index = self.times
299
- tabular.insert(0, "time", self.times)
300
-
301
- return tabular
302
-
303
- @property
304
- @functools.lru_cache()
305
- def number_of_observations(self) -> "pd.DataFrame":
306
- """ The number of observations for an average at a particular depth or time.
307
-
308
- For pure observational data, the number of observations will always be '1'. When data are aggregated,
309
- (e.g. using :py:meth:`~tsp.core.TSP.monthly` or :py:meth:`~tsp.core.TSP.daily`) these numbers
310
- will be greater than 1.
311
-
312
- Returns
313
- -------
314
- DataFrame
315
- Number of observations
316
- """
317
- tabular = pd.DataFrame(self.__number_of_observations, dtype=int)
318
- tabular.columns = self._depths
319
- tabular.index = self._times
320
- tabular.insert(0, "time", self._times)
321
-
322
- return tabular
323
-
324
- @number_of_observations.setter
325
- def number_of_observations(self, value):
326
- raise ValueError(f"You can't assign {value} to this variable (no assignment allowed).")
327
-
328
- def reset_counts(self):
329
- """ Set observation count to 1 if data exists, 0 otherwise """
330
- self.__number_of_observations = (~self.wide.isna()).astype('boolean')
331
-
332
- def set_utc_offset(self, offset:"Union[int,str]") -> None:
333
- """ Set the time zone of the data by providing a UTC offset
334
-
335
- Parameters
336
- ----------
337
- offset : int, str
338
- If int, the number of seconds. If str, a string in the format "+HH:MM" or "-HH:MM"
339
- """
340
- if self.utc_offset is not None:
341
- raise ValueError("You can only set the UTC offset once.")
342
-
343
- utc_offset = get_utc_offset(offset)
344
-
345
- tz = timezone(timedelta(seconds = utc_offset))
346
- self._times = self._times.tz_localize(tz)
347
- self._output_utc_offset = timezone(timedelta(seconds = utc_offset))
348
-
349
- TSP.wide.fget.cache_clear()
350
- TSP.long.fget.cache_clear()
351
-
352
- @property
353
- def utc_offset(self) -> "Optional[tzinfo]":
354
- """ Get the time zone of the data by providing a UTC offset
355
-
356
- Returns
357
- -------
358
- datetime.tzinfo
359
- A timezone object
360
- """
361
- if self._times.tz is None:
362
- return None
363
- else:
364
- return self._times.tz
365
-
366
- @utc_offset.setter
367
- def utc_offset(self, value):
368
- self.set_utc_offset(value)
369
-
370
- @property
371
- def output_utc_offset(self) -> "Optional[tzinfo]":
372
- """ Get the time zone in which to output or display the data by providing a UTC offset
373
-
374
- Returns
375
- -------
376
- datetime.tzinfo
377
- A timezone object
378
- """
379
- if self._output_utc_offset is None:
380
- return None
381
- else:
382
- return self._output_utc_offset
383
-
384
- @output_utc_offset.setter
385
- def output_utc_offset(self, offset:"Union[int,str]") -> None:
386
- self.set_output_utc_offset(offset)
387
-
388
- def set_output_utc_offset(self, offset:"Union[int,str]") -> None:
389
- """ Set the time zone in which to display the output or data by providing a UTC offset
390
- Parameters
391
- ----------
392
- offset : int, str
393
- If int, the number of seconds. If str, a string in the format "+HH:MM" or "-HH:MM"
394
- """
395
- utc_offset = get_utc_offset(offset)
396
- tz = timezone(timedelta(seconds = utc_offset))
397
- self._output_utc_offset = tz
398
-
399
- TSP.wide.fget.cache_clear()
400
- TSP.long.fget.cache_clear()
401
-
402
- def reset_output_utc_offset(self) -> None:
403
- """ Reset the time zone in which to output or display the data to the default (the one set by set_utc_offset)
404
-
405
- """
406
- if self.utc_offset is None:
407
- raise ValueError("You can't reset the output time zone if the time zone of the data hasn't yet been set with set_utc_offset.")
408
- else:
409
- self._output_utc_offset = self.utc_offset
410
-
411
- def __nly(self,
412
- freq_fmt:str,
413
- new_freq,
414
- min_count:Optional[int],
415
- max_gap:Optional[int],
416
- min_span:Optional[int]) -> TSP:
417
- """
418
- Temporal aggregation by grouping according to a string-ified time
419
-
420
- Parameters
421
- ----------
422
- freq_fmt : str
423
- Python date format string used to aggregate and recover time
424
-
425
- Returns
426
- -------
427
- tuple[pd.DataFrame, pd.DataFrame]
428
- A tuple of dataframes, the first containing the aggregated data, the second containing the number of observations
429
- """
430
- R = self.wide.drop("time", axis=1).resample(freq_fmt)
431
- cumulative_obs = self.number_of_observations.drop("time", axis=1).resample(freq_fmt).sum()
432
- total_obs = R.count()
433
- values = R.mean()
434
-
435
- # Calculate masks
436
- mc_mask = Mg_mask = ms_mask = pd.DataFrame(index=values.index, columns=values.columns, data=False)
437
-
438
- if min_count is not None:
439
- mc_mask = (cumulative_obs < min_count)
440
- if max_gap is not None:
441
- Mg_mask = max_gap_mask(R, max_gap)
442
- if min_span is not None:
443
- ms_mask = min_span_mask(R, min_span)
444
-
445
- mask = (mc_mask | Mg_mask | ms_mask)
446
- values[mask] = np.nan
447
-
448
- # Construct TSP
449
- t = TSP.__from_tsp(self, times=values.index,
450
- depths=values.columns,
451
- values=values.values)
452
- t.__number_of_observations = cumulative_obs
453
- t.freq = new_freq
454
-
455
- # Calculate data completeness
456
- if self.freq is not None:
457
- f1 = self.freq
458
- f2 = new_freq
459
- t._completeness = completeness(total_obs, f1, f2)
460
-
461
- return t
462
-
463
- def monthly(self,
464
- min_count:Optional[int]=24,
465
- max_gap:Optional[int]=3600*24*8,
466
- min_span:Optional[int]=3600*24*21) -> "TSP":
467
- """ Monthly averages, possibly with some months unavailable (NaN) if there is insufficient data
468
-
469
- Parameters
470
- ----------
471
- min_count : int
472
- Minimum number of observations in a month to be considered a valid average,
473
- defaults to None
474
- max_gap : int
475
- Maximum gap (in seconds) between data points to be considered a valid average,
476
- defaults to None
477
- min_span : int
478
- Minimum total data range (in seconds) to be consiered a valid average,
479
- defaults to None
480
-
481
- Returns
482
- -------
483
- TSP
484
- A TSP object with data aggregated to monthly averages
485
- """
486
- t = self.__nly(freq_fmt="M",
487
- new_freq=lbl.MONTHLY,
488
- min_count=min_count,
489
- max_gap=max_gap,
490
- min_span=min_span)
491
-
492
- return t
493
-
494
- def daily(self,
495
- min_count:Optional[int]=None,
496
- max_gap:Optional[int]=None,
497
- min_span:Optional[int]=None) -> "TSP":
498
- """ Daily averages, possibly with some days unavailable (NaN) if there is insufficient data
499
-
500
- Parameters
501
- ----------
502
- min_count : int
503
- Minimum number of observations in a day to be considered a valid average,
504
- defaults to None
505
- max_gap : int
506
- Maximum gap (in seconds) between data points to be considered a valid average, defaults to None
507
- min_span : int
508
- Minimum total data range (in seconds) to be consiered a valid average, defaults to None
509
-
510
- Returns
511
- -------
512
- TSP
513
- A TSP object with data aggregated to daily averages
514
- """
515
- # if the data is already daily +/- 1min , just return it
516
- t = self.__nly(freq_fmt="D",
517
- new_freq=lbl.DAILY,
518
- min_count=min_count,
519
- max_gap=max_gap,
520
- min_span=min_span)
521
-
522
- return t
523
-
524
- def yearly(self,
525
- min_count:Optional[int]=None,
526
- max_gap:Optional[int]=None,
527
- min_span:Optional[int]=None) -> "TSP":
528
- """ Yearly averages, possibly with some years unavailable (NaN) if there is insufficient data
529
-
530
- Parameters
531
- ----------
532
- min_count : int
533
- Minimum number of observations in a month to be considered a valid average, defaults to None
534
- max_gap : int
535
- Maximum gap (in seconds) between data points to be considered a valid average, defaults to None
536
- min_span : int
537
- Minimum total data range (in seconds) to be consiered a valid average, defaults to None
538
-
539
- Returns
540
- -------
541
- TSP
542
- A TSP object with data aggregated to yearly averages
543
- """
544
- t = self.__nly(freq_fmt="Y",
545
- new_freq=lbl.YEARLY,
546
- min_count=min_count,
547
- max_gap=max_gap,
548
- min_span=min_span)
549
-
550
- return t
551
-
552
- @property
553
- def depths(self) -> "np.ndarray":
554
- """ Return the depth values in the profile
555
-
556
- Returns
557
- -------
558
- numpy.ndarray
559
- The depths in the profile
560
- """
561
- return self._depths
562
-
563
- @depths.setter
564
- def depths(self, value):
565
- depths = np.atleast_1d(value)
566
-
567
- if not len(depths) == len(self._depths):
568
- raise ValueError(f"List of depths must have length of {len(self._depths)}.")
569
-
570
- self._depths = depths
571
-
572
- TSP.wide.fget.cache_clear()
573
- TSP.long.fget.cache_clear()
574
-
575
- @property
576
- def times(self):
577
- """ Return the timestamps in the time series
578
-
579
- Returns
580
- -------
581
- pandas.DatetimeIndex
582
- The timestamps in the time series
583
- """
584
- if self.utc_offset is None:
585
- return self._times
586
-
587
- elif self._output_utc_offset == self.utc_offset:
588
- return self._times
589
-
590
- else:
591
- return self._times.tz_convert(self.output_utc_offset)
592
-
593
- @property
594
- def values(self):
595
- return self._values
596
-
597
- def to_gtnp(self, filename: str) -> None:
598
- """ Write the data in GTN-P format
599
-
600
- Parameters
601
- ----------
602
- filename : str
603
- Path to the file to write to
604
- """
605
- df = self.wide.rename(columns={'time': 'Date/Depth'})
606
- df['Date/Depth'] = df['Date/Depth'].dt.strftime("%Y-%m-%d %H:%M:%S")
607
-
608
- df.to_csv(filename, index=False, na_rep="-999")
609
-
610
- def to_ntgs(self, filename:str, project_name:str="", site_id:"Optional[str]" = None, latitude:"Optional[float]"=None, longitude:"Optional[float]"=None) -> None:
611
- """ Write the data in NTGS template format
612
-
613
- Parameters
614
- ----------
615
- filename : str
616
- Path to the file to write to
617
- project_name : str, optional
618
- The project name, by default ""
619
- site_id : str, optional
620
- The name of the site , by default None
621
- latitude : float, optional
622
- WGS84 latitude at which the observations were recorded, by default None
623
- longitude : float, optional
624
- WGS84 longitude at which the observations were recorded, by default None
625
- """
626
- if latitude is None:
627
- latitude = self.latitude if self.latitude is not None else ""
628
-
629
- if longitude is None:
630
- longitude = self.longitude if self.longitude is not None else ""
631
-
632
- if site_id is None:
633
- site_id = self.site_id if self.site_id is not None else ""
634
- data = self.values
635
-
636
- df = pd.DataFrame({'project_name': pd.Series(dtype='str'),
637
- 'site_id': pd.Series(dtype='str'),
638
- 'latitude': pd.Series(dtype='float'),
639
- 'longitude': pd.Series(dtype='float')
640
- })
641
-
642
- df["date_YYYY-MM-DD"] = pd.Series(self.times).dt.strftime(r"%Y-%m-%d")
643
- df["time_HH:MM:SS"] = pd.Series(self.times).dt.strftime(r"%H:%M:%S")
644
-
645
- df["project_name"] = project_name
646
- df["site_id"] = site_id
647
- df["latitude"] = latitude
648
- df["longitude"] = longitude
649
-
650
- headers = [str(d) + "_m" for d in self.depths]
651
-
652
- for i, h in enumerate(headers):
653
- df[h] = data[:, i]
654
-
655
- df.to_csv(filename, index=False)
656
-
657
- def to_netcdf(self, file: str) -> None:
658
- """ Write the data as a netcdf"""
659
- try:
660
- ncf = make_temperature_base(file, len(self.depths))
661
- except NameError:
662
- warnings.warn("Missing required packages. Try installing with `pip install tsp[nc]`", stacklevel=2)
663
- return
664
-
665
- with nc.Dataset(ncf, 'a') as ncd:
666
- pytime = self.times.to_pydatetime()
667
-
668
- ncd['depth_below_ground_surface'][:] = self.depths
669
-
670
-
671
- ncd['time'][:] = nc.date2num(pytime, ncd['time'].units, ncd['time'].calendar)
672
- ncd['ground_temperature'][:] = self.values
673
-
674
- if self.latitude:
675
- ncd['latitude'][:] = self.latitude
676
- if self.longitude:
677
- ncd['longitude'][:] = self.longitude
678
- if self.site_id:
679
- ncd['site_name'] = self.site_id
680
-
681
- for key, value in self.metadata:
682
- try:
683
- ncd.setncattr(key, value)
684
- except Exception:
685
- warnings.warn(f"Could not set metadata item: {key}", stacklevel=2)
686
-
687
- def to_json(self, file: str) -> None:
688
- """ Write the data to a serialized json file """
689
- with open(file, 'w') as f:
690
- f.write(self._to_json())
691
-
692
- def _to_json(self) -> str:
693
- return self.wide.to_json()
694
-
695
- def plot_trumpet(self,
696
- year: Optional[int]=None,
697
- begin: Optional[datetime]=None,
698
- end: Optional[datetime]=None,
699
- min_completeness: Optional[float]=None,
700
- **kwargs) -> Figure:
701
- """ Create a trumpet plot from the data
702
-
703
- Parameters
704
- ----------
705
- year : int, optional
706
- Which year to plot
707
- begin : datetime, optional
708
- If 'end' also provided, the earliest measurement to include in the averaging for the plot
709
- end : datetime, optional
710
- If 'begin' also provided, the latest measurement to include in the averaging for the plot
711
- **kwargs : dict, optional
712
- Extra arguments to the plotting function: refer to the documentation for :func:`~tsp.plots.static.trumpet_curve` for a
713
- list of all possible arguments.
714
-
715
- Returns
716
- -------
717
- Figure
718
- a matplotlib `Figure` object
719
- """
720
- df = self.long.dropna()
721
-
722
- if year is not None:
723
- df = df[df['time'].dt.year == year]
724
-
725
- elif begin is not None or end is not None:
726
- raise NotImplementedError
727
-
728
- else:
729
- raise ValueError("One of 'year', 'begin', 'end' must be provided.")
730
-
731
- grouped = df.groupby('depth')
732
-
733
- max_t = grouped.max().get('temperature_in_ground').values
734
- min_t = grouped.min().get('temperature_in_ground').values
735
- mean_t = grouped.mean().get('temperature_in_ground').values
736
- depth = np.array([d for d in grouped.groups.keys()])
737
-
738
- # Calculate completeness
739
- c = self.yearly(None, None, None).completeness
740
-
741
- if min_completeness is not None and c is not None:
742
- C = c[c.index.year == year]
743
- C = C[depth].iloc[0,:].values
744
-
745
- else:
746
- C = None
747
-
748
- fig = trumpet_curve(depth=depth,
749
- t_max=max_t,
750
- t_min=min_t,
751
- t_mean=mean_t,
752
- min_completeness=min_completeness,
753
- data_completeness=C,
754
- **kwargs)
755
- fig.show()
756
-
757
- return fig
758
-
759
- def plot_contour(self, **kwargs) -> Figure:
760
- """ Create a contour plot
761
-
762
- Parameters
763
- ----------
764
- **kwargs : dict, optional
765
- Extra arguments to the plotting function: refer to the documentation for :func:`~tsp.plots.static.colour_contour` for a
766
- list of all possible arguments.
767
-
768
- Returns
769
- -------
770
- Figure
771
- matplotlib `Figure` object
772
- """
773
- fig = colour_contour(depths=self.depths, times=self.times, values=self._values, **kwargs)
774
-
775
- if self.output_utc_offset is not None:
776
- label = format_utc_offset(self.output_utc_offset)
777
- if label != "UTC":
778
- label = f"UTC{label}"
779
- fig.axes[0].set_xlabel(f"Time [{label}]")
780
-
781
- fig.show()
782
-
783
- return fig
784
-
785
- def plot_timeseries(self, depths: list=[], **kwargs) -> Figure:
786
- """Create a time series T(t) plot
787
-
788
- Parameters
789
- ----------
790
- depths : list, optional
791
- If non-empty, restricts the depths to include in the plot, by default []
792
- **kwargs : dict, optional
793
- Extra arguments to the plotting function: refer to the documentation for :func:`~tsp.plots.static.time_series` for a
794
- list of all possible arguments.
795
-
796
- Returns
797
- -------
798
- Figure
799
- matplotlib `Figure` object
800
- """
801
- if depths == []:
802
- depths = self.depths
803
-
804
- d_mask = np.isin(self.depths, depths)
805
-
806
- fig = time_series(self.depths[d_mask], self.times, self.values[:, d_mask], **kwargs)
807
-
808
-
809
- if self.output_utc_offset is not None:
810
- label = format_utc_offset(self.output_utc_offset)
811
- if label != "UTC":
812
- label = f"UTC{label}"
813
- fig.axes[0].set_xlabel(f"Time [{label}]")
814
- fig.autofmt_xdate()
815
- fig.show()
816
-
817
- return fig
818
-
819
-
820
- class AggregatedTSP(TSP):
821
- """ A Time Series Profile that uses indices (1,2,3,...) instead of depth values.
822
-
823
- Used in situations when depths are unknown (such as when reading datlogger exports
824
- that don't have depth measurements.)
825
-
826
- Parameters
827
- ----------
828
- times : list-like
829
- t-length array of datetime objects
830
- values : numpy.ndarray
831
- array with shape (t,d) containing values at (t)emperatures and (d)epths
832
- **kwargs : dict
833
- Extra arguments to parent class: refer to :py:class:`tsp.core.TSP` documentation for a
834
- list of all possible arguments.
835
- """
836
-
837
-
838
- class IndexedTSP(TSP):
839
- """ A Time Series Profile that uses indices (1,2,3,...) instead of depth values.
840
-
841
- Used in situations when depths are unknown (such as when reading datlogger exports
842
- that don't have depth measurements.)
843
-
844
- Parameters
845
- ----------
846
- times : list-like
847
- t-length array of datetime objects
848
- values : numpy.ndarray
849
- array with shape (t,d) containing values at (t)emperatures and (d)epths
850
- **kwargs : dict
851
- Extra arguments to parent class: refer to :py:class:`~tsp.core.TSP` documentation for a
852
- list of all possible arguments.
853
- """
854
-
855
- def __init__(self, times, values, **kwargs):
856
- depths = np.arange(0, values.shape[1]) + 1
857
- super().__init__(times=times, depths=depths, values=values, **kwargs)
858
-
859
- @property
860
- def depths(self) -> np.ndarray:
861
- """Depth indices
862
-
863
- Returns
864
- -------
865
- numpy.ndarray
866
- An array of depth indices
867
- """
868
- warnings.warn("This TSP uses indices (1,2,3,...) instad of depths. Use set_depths() to use measured depths.", stacklevel=2)
869
- return self._depths
870
-
871
- @depths.setter
872
- def depths(self, value):
873
- TSP.depths.__set__(self, value)
874
-
875
- def set_depths(self, depths: np.ndarray):
876
- """Assign depth values to depth indices. Change the object to a :py:class:`~tsp.core.TSP`
877
-
878
- Parameters
879
- ----------
880
- depths : np.ndarray
881
- An array or list of depth values equal in lenth to the depth indices
882
- """
883
- self.depths = depths
884
- self.__class__ = TSP
885
-
886
-
887
-
888
- def span(S: pd.Series) -> float:
889
- first = S.first_valid_index() # type: pd.Timestamp
890
- last = S.last_valid_index() # type: pd.Timestamp
891
- if first is None or last is None:
892
- return 0
893
-
894
- return (last - first).total_seconds()
895
-
896
- def min_span_mask(R: "pd.core.resample.DatetimeIndexResampler",
897
- threshold: float) -> "pd.DataFrame":
898
- s = R.apply(lambda x: span(x))
899
- return s < threshold
900
-
901
-
902
- def gap(S: pd.Series) -> float:
903
-
904
- d = np.diff(S.dropna().index)
905
- if len(d) == 0:
906
- return 0
907
- elif len(d) == 1:
908
- return 0
909
- elif len(d) > 1:
910
- gap = max(d).astype('timedelta64[s]').astype(float)
911
- return gap
912
-
913
-
914
- def max_gap_mask(R: "pd.core.resample.DatetimeIndexResampler",
915
- threshold: float) -> "pd.DataFrame":
916
- g = R.apply(lambda x: gap(x))
917
- return (g > threshold) | (g == 0)
918
-
919
-
920
-
921
-
922
- def _temporal_gap_mask(grouped: "pd.core.groupby.DataFrameGroupBy", max_gap: Optional[int], min_span: Optional[int]) -> np.ndarray:
923
- """ Mask out observational groups in which there is more than a certain size temporal gap
924
-
925
- Controls for gaps in the data within an aggregation group (using max_gap) and missing data at the beginning
926
- or end of the aggregation group (using min_span).
927
-
928
- Parameters
929
- ----------
930
- grouped : pandas.core.groupby.DataFrameGroupBy
931
- groupby with 'time' and 'depth' columns
932
- max_gap : int
933
- maximum gap in seconds to tolerate between observations in a group
934
- min_span : int
935
- minimum data range (beginning to end) in seconds.
936
-
937
- Returns
938
- -------
939
- numpy.ndarray
940
- boolean array with ``True`` where measurement spacing or range in group does not satisfy tolerances
941
- """
942
- if max_gap is not None:
943
- max_diff = grouped.time.apply(np.diff).apply(lambda x: np.max(x, initial=np.timedelta64(0))).apply(lambda x: x.total_seconds())
944
- max_diff = max_diff.unstack().to_numpy()
945
- diff_mask = np.where((max_diff == 0) | (max_diff >= max_gap), True, False)
946
- else:
947
- diff_mask = np.zeros_like(grouped, dtype=bool)
948
-
949
- if min_span is not None:
950
- total_span = grouped.time.apply(np.ptp).apply(lambda x: x.total_seconds()).unstack().to_numpy()
951
- span_mask = np.where(total_span < min_span, True, False)
952
- else:
953
- span_mask = np.zeros_like(grouped, dtype=bool)
954
-
955
- mask = diff_mask * span_mask
956
-
957
- return mask
958
-
959
-
960
- def _observation_count_mask(number_of_observations: np.ndarray, min_count:int) -> np.ndarray:
961
- """ Create a mask array for an
962
-
963
- Parameters
964
- ----------
965
- number_of_observations : numpy.ndarray
966
- Array of how many data points are in aggregation
967
- min_count : int
968
- Minimum number of data points for aggregation to be 'valid'
969
-
970
- Returns
971
- -------
972
- np.ndarray
973
- a mask, True where data should be masked
974
- """
975
- valid = np.less(number_of_observations, min_count) # type: np.ndarray
976
- return valid
977
-
978
-
979
- def handle_incoming_times(times: "Union[np.ndarray, pd.DatetimeIndex, pd.Series, list]") -> "pd.DatetimeIndex":
980
- """Convert a list of times to a pandas DatetimeIndex object"""
981
- invalid_msg = "Times must be a list, numpy array, pandas DatetimeIndex, or pandas Series"
982
-
983
- try:
984
- if not len(times):
985
- raise ValueError(invalid_msg)
986
- except TypeError:
987
- raise ValueError(invalid_msg)
988
-
989
- if isinstance(times, pd.DatetimeIndex):
990
- return times
991
-
992
- if isinstance(times, pd.Series):
993
- try:
994
- times = pd.DatetimeIndex(times)
995
- except Exception:
996
- raise ValueError("Series must be convertible to DatetimeIndex")
997
- times.name = 'time'
998
-
999
- return times
1000
-
1001
- elif isinstance(times, np.ndarray):
1002
- times = pd.to_datetime(times)
1003
- times.name = 'time'
1004
- return times
1005
-
1006
- elif isinstance(times, list):
1007
- return pd.to_datetime(times)
1008
-
1009
- else:
1010
- raise ValueError(invalid_msg)
1
+ from __future__ import annotations
2
+
3
+ import pandas as pd
4
+ import re
5
+ import inspect
6
+ import numpy as np
7
+ import functools
8
+ import warnings
9
+
10
+ try:
11
+ import netCDF4 as nc
12
+
13
+ try:
14
+ from pfit.pfnet_standard import make_temperature_base
15
+ except ModuleNotFoundError:
16
+ warnings.warn("Missing pfit library. Some functionality will be limited.", stacklevel=2)
17
+
18
+ except ModuleNotFoundError:
19
+ warnings.warn("Missing netCDF4 library. Some functionality will be limited.", stacklevel=2)
20
+
21
+ from typing import Union, Optional
22
+ from datetime import datetime, tzinfo, timezone, timedelta
23
+
24
+ import tsp
25
+ import tsp.labels as lbl
26
+ import tsp.tspwarnings as tw
27
+
28
+ from tsp.physics import analytical_fourier
29
+ from tsp.plots.static import trumpet_curve, colour_contour, time_series, profile_evolution
30
+ from tsp.time import format_utc_offset
31
+ from tsp.time import get_utc_offset
32
+ from tsp.misc import completeness
33
+
34
+ from matplotlib.figure import Figure
35
+
36
+
37
+ class TSP:
38
+ """ A Time Series Profile (a collection of time series data at different depths)
39
+
40
+ A TSP can also be:
41
+ Thermal State of Permafrost
42
+ Temperature du Sol en Profondeur
43
+ Temperatures, Secondes, Profondeurs
44
+
45
+ Parameters
46
+ ----------
47
+ times : pandas.DatetimeIndex
48
+ DatetimeIndex with optional UTC offset. List-like array of datetime objects can also be passed,
49
+ but will be converted to a DatetimeIndex with no UTC offset.
50
+ depths : list-like
51
+ d-length array of depths
52
+ values : numpy.ndarray
53
+ array with shape (t,d) containing values at (t)emperatures and (d)epths
54
+ longitude : float, optional
55
+ Longitude at which data were collected
56
+ latitude : float, optional
57
+ Latitude at which data were collected
58
+ site_id : str, optional
59
+ Name of location at which data were collected
60
+ metadata : dict
61
+ Additional metadata
62
+
63
+ Attributes
64
+ ----------
65
+ values
66
+ latitude : float
67
+ Latitude at which data were collected
68
+ longitude : float
69
+ Longitude at which data were collected
70
+ metadata : dict
71
+ Additional metadata provided at instantiation or by other methods
72
+ """
73
+
74
+ def __repr__(self) -> str:
75
+ return repr(self.wide)
76
+
77
+ def __str__(self) -> str:
78
+ return str(self.wide)
79
+
80
+ def __init__(self, times, depths, values,
81
+ latitude: Optional[float]=None,
82
+ longitude: Optional[float]=None,
83
+ site_id: Optional[str]=None,
84
+ metadata: dict={}):
85
+
86
+ self._times = handle_incoming_times(times)
87
+ if self._times.duplicated().any():
88
+ warnings.warn(tw.DuplicateTimesWarning(self._times), stacklevel=2)
89
+
90
+ if self.utc_offset:
91
+ self._output_utc_offset = self.utc_offset
92
+ else:
93
+ self._output_utc_offset = None
94
+
95
+ self._depths = np.atleast_1d(depths)
96
+ self._values = np.atleast_2d(values)
97
+ self.__number_of_observations = np.ones_like(values, dtype=int)
98
+ self.__number_of_observations[np.isnan(values)] = 0
99
+ self.metadata = metadata
100
+ self.latitude = latitude
101
+ self.longitude = longitude
102
+ self.site_id = site_id
103
+ self._freq = None
104
+ self._completeness = None
105
+
106
+ @property
107
+ def freq(self) -> Optional[int]:
108
+ """ Measurement frequency [s] """
109
+ return self._freq
110
+
111
+ @freq.setter
112
+ def freq(self, value: int):
113
+ if not isinstance(value, int):
114
+ raise TypeError("Must be string, e.g. '1D', '3600s'")
115
+ self._freq = value
116
+
117
+ @property
118
+ def completeness(self) -> Optional[pd.DataFrame]:
119
+ """ Data completeness """
120
+ return self._completeness
121
+
122
+ @completeness.setter
123
+ def completeness(self, value):
124
+ raise ValueError("You can't assign this variable.")
125
+
126
+ @classmethod
127
+ def from_tidy_format(cls, times, depths, values,
128
+ number_of_observations=None,
129
+ latitude: Optional[float]=None,
130
+ longitude: Optional[float]=None,
131
+ site_id: Optional[str]=None,
132
+ metadata:dict={}):
133
+ """ Create a TSP from data in a 'tidy' or 'long' format
134
+
135
+ Parameters
136
+ ----------
137
+ times : list-like
138
+ n-length array of datetime objects
139
+ depths : list-like
140
+ n-length array of depths
141
+ values : numpy.ndarray
142
+ n-length array of (temperaure) values at associated time and depth
143
+ number_of_observations : numpy.ndarray, optional
144
+ n-length array of number of observations at associated time and
145
+ depth for aggregated values (default: 1)
146
+ longitude : float, optional
147
+ Longitude at which data were collected
148
+ latitude : float, optional
149
+ Latitude at which data were collected
150
+ site_id : str, optional
151
+ Name of location at which data were collected
152
+ metadata : dict
153
+ Additional metadata
154
+ """
155
+ times = np.atleast_1d(times)
156
+ depths = np.atleast_1d(depths)
157
+ values = np.atleast_1d(values)
158
+
159
+ number_of_observations = number_of_observations if number_of_observations else np.ones_like(values)
160
+ df = pd.DataFrame({"times": times, "depths": depths, "temperature_in_ground": values, "number_of_observations": number_of_observations})
161
+ df.set_index(["times", "depths"], inplace=True)
162
+
163
+ try:
164
+ unstacked = df.unstack()
165
+ except ValueError as e:
166
+ if np.any(df.index.duplicated()):
167
+ print(f"Duplicate data found at {df.iloc[np.where(df.index.duplicated())[0], :].index.get_level_values(0).unique()}")
168
+ raise e
169
+
170
+ temps = unstacked.get('temperature_in_ground')
171
+
172
+ this = cls(times=temps.index.values,
173
+ depths=temps.columns.values,
174
+ values=temps.values,
175
+ latitude=latitude,
176
+ longitude=longitude,
177
+ site_id=site_id,
178
+ metadata=metadata)
179
+
180
+ number_of_observations = unstacked.get('number_of_observations').values
181
+
182
+ number_of_observations[np.isnan(number_of_observations)] = 0
183
+ this.__number_of_observations = number_of_observations
184
+ return this
185
+
186
+ @classmethod
187
+ def __from_tsp(cls, t:TSP, **kwargs) -> "TSP":
188
+ """ Use an existing TSP object as a template, """
189
+ kw = {}
190
+ for arg in inspect.getfullargspec(TSP).args[1:]:
191
+ if kwargs.get(arg) is not None:
192
+ kw[arg] = kwargs.get(arg)
193
+ else:
194
+ kw[arg] = getattr(t, arg)
195
+
196
+ t = TSP(**kw)
197
+
198
+ return t
199
+
200
+ @classmethod
201
+ def from_json(cls, json_file) -> "TSP":
202
+ """ Read data from a json file
203
+
204
+ Parameters
205
+ ----------
206
+ json_file : str
207
+ Path to a json file from which to read
208
+ """
209
+ df = pd.read_json(json_file)
210
+ depth_pattern = r"^(-?[0-9\.]+)$"
211
+
212
+ times = pd.to_datetime(df['time']).values
213
+ depths = [re.search(depth_pattern, c).group(1) for c in df.columns if tsp._is_depth_column(c, depth_pattern)]
214
+ values = df.loc[:, depths].to_numpy()
215
+
216
+ t = cls(times=times, depths=depths, values=values)
217
+
218
+ return t
219
+
220
+ @classmethod
221
+ def synthetic(cls, depths: "np.ndarray", start="2000-01-01", end="2003-01-01",
222
+ Q:"Optional[float]"=0.2,
223
+ c:"Optional[float]"=1.6e6,
224
+ k:"Optional[float]"=2.5,
225
+ A:"Optional[float]"=6,
226
+ MAGST:"Optional[float]"=-0.5) -> "TSP":
227
+ """
228
+ Create a 'synthetic' temperature time series using the analytical solution to the heat conduction equation.
229
+ Suitable for testing
230
+
231
+ Parameters
232
+ ----------
233
+ depths : np.ndarray
234
+ array of depths in m
235
+ start : str
236
+ array of times in seconds
237
+ Q : Optional[float], optional
238
+ Ground heat flux [W m-2], by default 0.2
239
+ c : Optional[float], optional
240
+ heat capacity [J m-3 K-1], by default 1.6e6
241
+ k : Optional[float], optional
242
+ thermal conductivity [W m-1 K-1], by default 2.5
243
+ A : Optional[float], optional
244
+ Amplitude of temperature fluctuation [C], by default 6
245
+ MAGST : Optional[float], optional
246
+ Mean annual ground surface temperature [C], by default -0.5
247
+
248
+ Returns
249
+ -------
250
+ TSP
251
+ A timeseries profile (TSP) object
252
+ """
253
+ times = pd.date_range(start=start, end=end).to_pydatetime()
254
+ t_sec = np.array([(t-times[0]).total_seconds() for t in times])
255
+
256
+ values = analytical_fourier(depths=depths, times=t_sec, Q=Q, c=c, k=k, A=A, MAGST=MAGST)
257
+
258
+ this = cls(depths=depths, times=times, values=values)
259
+
260
+ return this
261
+
262
+ @property
263
+ @functools.lru_cache()
264
+ def long(self) -> "pd.DataFrame":
265
+ """ Return the data in a 'long' or 'tidy' format (one row per observation, one column per variable)
266
+
267
+ Returns
268
+ -------
269
+ pandas.DataFrame
270
+ Time series profile data with columns:
271
+ - **time**: time
272
+ - **depth**: depth
273
+ - **temperature_in_ground**: temperature
274
+ - **number_of_observations**: If data are aggregated, how many observations are used in the aggregation
275
+ """
276
+ values = self.wide.melt(id_vars='time',
277
+ var_name="depth",
278
+ value_name="temperature_in_ground")
279
+
280
+ number_of_observations = self.number_of_observations.melt(id_vars='time',
281
+ var_name="depth",
282
+ value_name="number_of_observations")
283
+
284
+ values['number_of_observations'] = number_of_observations['number_of_observations']
285
+
286
+ return values
287
+
288
+ @property
289
+ @functools.lru_cache()
290
+ def wide(self) -> "pd.DataFrame":
291
+ """ Return the data in a 'wide' format (one column per depth)
292
+
293
+ Returns
294
+ -------
295
+ pandas.DataFrame
296
+ Time series profile data
297
+ """
298
+ tabular = pd.DataFrame(self._values)
299
+ tabular.columns = self._depths
300
+ tabular.index = self.times
301
+ tabular.insert(0, "time", self.times)
302
+
303
+ return tabular
304
+
305
+ @property
306
+ @functools.lru_cache()
307
+ def number_of_observations(self) -> "pd.DataFrame":
308
+ """ The number of observations for an average at a particular depth or time.
309
+
310
+ For pure observational data, the number of observations will always be '1'. When data are aggregated,
311
+ (e.g. using :py:meth:`~tsp.core.TSP.monthly` or :py:meth:`~tsp.core.TSP.daily`) these numbers
312
+ will be greater than 1.
313
+
314
+ Returns
315
+ -------
316
+ DataFrame
317
+ Number of observations
318
+ """
319
+ tabular = pd.DataFrame(self.__number_of_observations, dtype=int)
320
+ tabular.columns = self._depths
321
+ tabular.index = self._times
322
+ tabular.insert(0, "time", self._times)
323
+
324
+ return tabular
325
+
326
+ @number_of_observations.setter
327
+ def number_of_observations(self, value):
328
+ raise ValueError(f"You can't assign {value} to this variable (no assignment allowed).")
329
+
330
+ def reset_counts(self):
331
+ """ Set observation count to 1 if data exists, 0 otherwise """
332
+ self.__number_of_observations = (~self.wide.isna()).astype('boolean')
333
+
334
+ def set_utc_offset(self, offset:"Union[int,str]") -> None:
335
+ """ Set the time zone of the data by providing a UTC offset
336
+
337
+ Parameters
338
+ ----------
339
+ offset : int, str
340
+ If int, the number of seconds. If str, a string in the format "+HH:MM" or "-HH:MM"
341
+ """
342
+ if self.utc_offset is not None:
343
+ raise ValueError("You can only set the UTC offset once.")
344
+
345
+ utc_offset = get_utc_offset(offset)
346
+
347
+ tz = timezone(timedelta(seconds = utc_offset))
348
+ self._times = self._times.tz_localize(tz)
349
+ self._output_utc_offset = timezone(timedelta(seconds = utc_offset))
350
+
351
+ TSP.wide.fget.cache_clear()
352
+ TSP.long.fget.cache_clear()
353
+
354
+ @property
355
+ def utc_offset(self) -> "Optional[tzinfo]":
356
+ """ Get the time zone of the data by providing a UTC offset
357
+
358
+ Returns
359
+ -------
360
+ datetime.tzinfo
361
+ A timezone object
362
+ """
363
+ if self._times.tz is None:
364
+ return None
365
+ else:
366
+ return self._times.tz
367
+
368
+ @utc_offset.setter
369
+ def utc_offset(self, value):
370
+ self.set_utc_offset(value)
371
+
372
+ @property
373
+ def output_utc_offset(self) -> "Optional[tzinfo]":
374
+ """ Get the time zone in which to output or display the data by providing a UTC offset
375
+
376
+ Returns
377
+ -------
378
+ datetime.tzinfo
379
+ A timezone object
380
+ """
381
+ if self._output_utc_offset is None:
382
+ return None
383
+ else:
384
+ return self._output_utc_offset
385
+
386
+ @output_utc_offset.setter
387
+ def output_utc_offset(self, offset:"Union[int,str]") -> None:
388
+ self.set_output_utc_offset(offset)
389
+
390
+ def set_output_utc_offset(self, offset:"Union[int,str]") -> None:
391
+ """ Set the time zone in which to display the output or data by providing a UTC offset
392
+ Parameters
393
+ ----------
394
+ offset : int, str
395
+ If int, the number of seconds. If str, a string in the format "+HH:MM" or "-HH:MM"
396
+ """
397
+ utc_offset = get_utc_offset(offset)
398
+ tz = timezone(timedelta(seconds = utc_offset))
399
+ self._output_utc_offset = tz
400
+
401
+ TSP.wide.fget.cache_clear()
402
+ TSP.long.fget.cache_clear()
403
+
404
+ def reset_output_utc_offset(self) -> None:
405
+ """ Reset the time zone in which to output or display the data to the default (the one set by set_utc_offset)
406
+
407
+ """
408
+ if self.utc_offset is None:
409
+ raise ValueError("You can't reset the output time zone if the time zone of the data hasn't yet been set with set_utc_offset.")
410
+ else:
411
+ self._output_utc_offset = self.utc_offset
412
+
413
+ def __nly(self,
414
+ freq_fmt:str,
415
+ new_freq,
416
+ min_count:Optional[int],
417
+ max_gap:Optional[int],
418
+ min_span:Optional[int]) -> TSP:
419
+ """
420
+ Temporal aggregation by grouping according to a string-ified time
421
+
422
+ Parameters
423
+ ----------
424
+ freq_fmt : str
425
+ Python date format string used to aggregate and recover time
426
+
427
+ Returns
428
+ -------
429
+ tuple[pd.DataFrame, pd.DataFrame]
430
+ A tuple of dataframes, the first containing the aggregated data, the second containing the number of observations
431
+ """
432
+ R = self.wide.drop("time", axis=1).resample(freq_fmt)
433
+ cumulative_obs = self.number_of_observations.drop("time", axis=1).resample(freq_fmt).sum()
434
+ total_obs = R.count()
435
+ values = R.mean()
436
+
437
+ # Calculate masks
438
+ mc_mask = Mg_mask = ms_mask = pd.DataFrame(index=values.index, columns=values.columns, data=False)
439
+
440
+ if min_count is not None:
441
+ mc_mask = (cumulative_obs < min_count)
442
+ if max_gap is not None:
443
+ Mg_mask = max_gap_mask(R, max_gap)
444
+ if min_span is not None:
445
+ ms_mask = min_span_mask(R, min_span)
446
+
447
+ mask = (mc_mask | Mg_mask | ms_mask)
448
+ values[mask] = np.nan
449
+
450
+ # Construct TSP
451
+ t = TSP.__from_tsp(self, times=values.index,
452
+ depths=values.columns,
453
+ values=values.values)
454
+ t.__number_of_observations = cumulative_obs
455
+ t.freq = new_freq
456
+
457
+ # Calculate data completeness
458
+ if self.freq is not None:
459
+ f1 = self.freq
460
+ f2 = new_freq
461
+ t._completeness = completeness(total_obs, f1, f2)
462
+
463
+ return t
464
+
465
+ def monthly(self,
466
+ min_count:Optional[int]=24,
467
+ max_gap:Optional[int]=3600*24*8,
468
+ min_span:Optional[int]=3600*24*21) -> "TSP":
469
+ """ Monthly averages, possibly with some months unavailable (NaN) if there is insufficient data
470
+
471
+ Parameters
472
+ ----------
473
+ min_count : int
474
+ Minimum number of observations in a month to be considered a valid average,
475
+ defaults to None
476
+ max_gap : int
477
+ Maximum gap (in seconds) between data points to be considered a valid average,
478
+ defaults to None
479
+ min_span : int
480
+ Minimum total data range (in seconds) to be consiered a valid average,
481
+ defaults to None
482
+
483
+ Returns
484
+ -------
485
+ TSP
486
+ A TSP object with data aggregated to monthly averages
487
+ """
488
+ t = self.__nly(freq_fmt="M",
489
+ new_freq=lbl.MONTHLY,
490
+ min_count=min_count,
491
+ max_gap=max_gap,
492
+ min_span=min_span)
493
+
494
+ return t
495
+
496
+ def daily(self,
497
+ min_count:Optional[int]=None,
498
+ max_gap:Optional[int]=None,
499
+ min_span:Optional[int]=None) -> "TSP":
500
+ """ Daily averages, possibly with some days unavailable (NaN) if there is insufficient data
501
+
502
+ Parameters
503
+ ----------
504
+ min_count : int
505
+ Minimum number of observations in a day to be considered a valid average,
506
+ defaults to None
507
+ max_gap : int
508
+ Maximum gap (in seconds) between data points to be considered a valid average, defaults to None
509
+ min_span : int
510
+ Minimum total data range (in seconds) to be consiered a valid average, defaults to None
511
+
512
+ Returns
513
+ -------
514
+ TSP
515
+ A TSP object with data aggregated to daily averages
516
+ """
517
+ # if the data is already daily +/- 1min , just return it
518
+ t = self.__nly(freq_fmt="D",
519
+ new_freq=lbl.DAILY,
520
+ min_count=min_count,
521
+ max_gap=max_gap,
522
+ min_span=min_span)
523
+
524
+ return t
525
+
526
+ def yearly(self,
527
+ min_count:Optional[int]=None,
528
+ max_gap:Optional[int]=None,
529
+ min_span:Optional[int]=None) -> "TSP":
530
+ """ Yearly averages, possibly with some years unavailable (NaN) if there is insufficient data
531
+
532
+ Parameters
533
+ ----------
534
+ min_count : int
535
+ Minimum number of observations in a month to be considered a valid average, defaults to None
536
+ max_gap : int
537
+ Maximum gap (in seconds) between data points to be considered a valid average, defaults to None
538
+ min_span : int
539
+ Minimum total data range (in seconds) to be consiered a valid average, defaults to None
540
+
541
+ Returns
542
+ -------
543
+ TSP
544
+ A TSP object with data aggregated to yearly averages
545
+ """
546
+ t = self.__nly(freq_fmt="Y",
547
+ new_freq=lbl.YEARLY,
548
+ min_count=min_count,
549
+ max_gap=max_gap,
550
+ min_span=min_span)
551
+
552
+ return t
553
+
554
+ @property
555
+ def depths(self) -> "np.ndarray":
556
+ """ Return the depth values in the profile
557
+
558
+ Returns
559
+ -------
560
+ numpy.ndarray
561
+ The depths in the profile
562
+ """
563
+ return self._depths
564
+
565
+ @depths.setter
566
+ def depths(self, value):
567
+ depths = np.atleast_1d(value)
568
+
569
+ if not len(depths) == len(self._depths):
570
+ raise ValueError(f"List of depths must have length of {len(self._depths)}.")
571
+
572
+ self._depths = depths
573
+
574
+ TSP.wide.fget.cache_clear()
575
+ TSP.long.fget.cache_clear()
576
+
577
+ @property
578
+ def times(self):
579
+ """ Return the timestamps in the time series
580
+
581
+ Returns
582
+ -------
583
+ pandas.DatetimeIndex
584
+ The timestamps in the time series
585
+ """
586
+ if self.utc_offset is None:
587
+ return self._times
588
+
589
+ elif self._output_utc_offset == self.utc_offset:
590
+ return self._times
591
+
592
+ else:
593
+ return self._times.tz_convert(self.output_utc_offset)
594
+
595
+ @property
596
+ def values(self):
597
+ return self._values
598
+
599
+ def to_gtnp(self, filename: str) -> None:
600
+ """ Write the data in GTN-P format
601
+
602
+ Parameters
603
+ ----------
604
+ filename : str
605
+ Path to the file to write to
606
+ """
607
+ df = self.wide.rename(columns={'time': 'Date/Depth'})
608
+ df['Date/Depth'] = df['Date/Depth'].dt.strftime("%Y-%m-%d %H:%M:%S")
609
+
610
+ df.to_csv(filename, index=False, na_rep="-999")
611
+
612
+ def to_ntgs(self, filename:str, project_name:str="", site_id:"Optional[str]" = None, latitude:"Optional[float]"=None, longitude:"Optional[float]"=None) -> None:
613
+ """ Write the data in NTGS template format
614
+
615
+ Parameters
616
+ ----------
617
+ filename : str
618
+ Path to the file to write to
619
+ project_name : str, optional
620
+ The project name, by default ""
621
+ site_id : str, optional
622
+ The name of the site , by default None
623
+ latitude : float, optional
624
+ WGS84 latitude at which the observations were recorded, by default None
625
+ longitude : float, optional
626
+ WGS84 longitude at which the observations were recorded, by default None
627
+ """
628
+ if latitude is None:
629
+ latitude = self.latitude if self.latitude is not None else ""
630
+
631
+ if longitude is None:
632
+ longitude = self.longitude if self.longitude is not None else ""
633
+
634
+ if site_id is None:
635
+ site_id = self.site_id if self.site_id is not None else ""
636
+ data = self.values
637
+
638
+ df = pd.DataFrame({'project_name': pd.Series(dtype='str'),
639
+ 'site_id': pd.Series(dtype='str'),
640
+ 'latitude': pd.Series(dtype='float'),
641
+ 'longitude': pd.Series(dtype='float')
642
+ })
643
+
644
+ df["date_YYYY-MM-DD"] = pd.Series(self.times).dt.strftime(r"%Y-%m-%d")
645
+ df["time_HH:MM:SS"] = pd.Series(self.times).dt.strftime(r"%H:%M:%S")
646
+
647
+ df["project_name"] = project_name
648
+ df["site_id"] = site_id
649
+ df["latitude"] = latitude
650
+ df["longitude"] = longitude
651
+
652
+ headers = [str(d) + "_m" for d in self.depths]
653
+
654
+ for i, h in enumerate(headers):
655
+ df[h] = data[:, i]
656
+
657
+ df.to_csv(filename, index=False)
658
+
659
+ def to_netcdf(self, file: str) -> None:
660
+ """ Write the data as a netcdf"""
661
+ try:
662
+ ncf = make_temperature_base(file, len(self.depths))
663
+ except NameError:
664
+ warnings.warn("Missing required packages. Try installing with `pip install tsp[nc]`", stacklevel=2)
665
+ return
666
+
667
+ with nc.Dataset(ncf, 'a') as ncd:
668
+ pytime = self.times.to_pydatetime()
669
+
670
+ ncd['depth_below_ground_surface'][:] = self.depths
671
+
672
+
673
+ ncd['time'][:] = nc.date2num(pytime, ncd['time'].units, ncd['time'].calendar)
674
+ ncd['ground_temperature'][:] = self.values
675
+
676
+ if self.latitude:
677
+ ncd['latitude'][:] = self.latitude
678
+ if self.longitude:
679
+ ncd['longitude'][:] = self.longitude
680
+ if self.site_id:
681
+ ncd['site_name'] = self.site_id
682
+
683
+ for key, value in self.metadata:
684
+ try:
685
+ ncd.setncattr(key, value)
686
+ except Exception:
687
+ warnings.warn(f"Could not set metadata item: {key}", stacklevel=2)
688
+
689
+ def to_json(self, file: str) -> None:
690
+ """ Write the data to a serialized json file """
691
+ with open(file, 'w') as f:
692
+ f.write(self._to_json())
693
+
694
+ def _to_json(self) -> str:
695
+ return self.wide.to_json()
696
+
697
+ def plot_profiles(self, P:int=100, n:int=10) -> Figure:
698
+ """ Create a plot of the temperature profiles at different times
699
+
700
+ Parameters
701
+ ----------
702
+ P : int
703
+ Percentage of time range to plot
704
+ n : int
705
+ Number of evenly-spaced profiles to plot
706
+
707
+ Returns
708
+ -------
709
+ Figure
710
+ matplotlib `Figure` object
711
+ """
712
+ fig = profile_evolution(depths=self.depths, times=self.times, values=self._values, P=P, n=n)
713
+ fig.show()
714
+ return fig
715
+
716
+ def plot_trumpet(self,
717
+ year: Optional[int]=None,
718
+ begin: Optional[datetime]=None,
719
+ end: Optional[datetime]=None,
720
+ min_completeness: Optional[float]=None,
721
+ **kwargs) -> Figure:
722
+ """ Create a trumpet plot from the data
723
+
724
+ Parameters
725
+ ----------
726
+ year : int, optional
727
+ Which year to plot
728
+ begin : datetime, optional
729
+ If 'end' also provided, the earliest measurement to include in the averaging for the plot
730
+ end : datetime, optional
731
+ If 'begin' also provided, the latest measurement to include in the averaging for the plot
732
+ min_completeness : float, optional
733
+ If provided, the minimum completeness (fractional, 0 to 1) required to include
734
+ in temperature envelope, otherwise
735
+ the point is plotted as an unconnected, slightly transparent dot, by default None
736
+ **kwargs : dict, optional
737
+ Extra arguments to the plotting function: refer to the documentation for :func:`~tsp.plots.static.trumpet_curve` for a
738
+ list of all possible arguments.
739
+
740
+ Returns
741
+ -------
742
+ Figure
743
+ a matplotlib `Figure` object
744
+ """
745
+ df = self.long.dropna()
746
+
747
+ if year is not None:
748
+ df = df[df['time'].dt.year == year]
749
+
750
+ elif begin is not None or end is not None:
751
+ raise NotImplementedError
752
+
753
+ else:
754
+ raise ValueError("One of 'year', 'begin', 'end' must be provided.")
755
+
756
+ grouped = df.groupby('depth')
757
+
758
+ max_t = grouped.max().get('temperature_in_ground').values
759
+ min_t = grouped.min().get('temperature_in_ground').values
760
+ mean_t = grouped.mean().get('temperature_in_ground').values
761
+ depth = np.array([d for d in grouped.groups.keys()])
762
+
763
+ # Calculate completeness
764
+ c = self.yearly(None, None, None).completeness
765
+
766
+ if min_completeness is not None and c is not None:
767
+ C = c[c.index.year == year]
768
+ C = C[depth].iloc[0,:].values
769
+
770
+ else:
771
+ C = None
772
+
773
+ fig = trumpet_curve(depth=depth,
774
+ t_max=max_t,
775
+ t_min=min_t,
776
+ t_mean=mean_t,
777
+ min_completeness=min_completeness,
778
+ data_completeness=C,
779
+ **kwargs)
780
+ fig.show()
781
+
782
+ return fig
783
+
784
+ def plot_contour(self, **kwargs) -> Figure:
785
+ """ Create a contour plot
786
+
787
+ Parameters
788
+ ----------
789
+ **kwargs : dict, optional
790
+ Extra arguments to the plotting function: refer to the documentation for :func:`~tsp.plots.static.colour_contour` for a
791
+ list of all possible arguments.
792
+
793
+ Returns
794
+ -------
795
+ Figure
796
+ matplotlib `Figure` object
797
+ """
798
+ fig = colour_contour(depths=self.depths, times=self.times, values=self._values, **kwargs)
799
+
800
+ if self.output_utc_offset is not None:
801
+ label = format_utc_offset(self.output_utc_offset)
802
+ if label != "UTC":
803
+ label = f"UTC{label}"
804
+ fig.axes[0].set_xlabel(f"Time [{label}]")
805
+
806
+ fig.show()
807
+
808
+ return fig
809
+
810
+ def plot_timeseries(self, depths: list=[], **kwargs) -> Figure:
811
+ """Create a time series T(t) plot
812
+
813
+ Parameters
814
+ ----------
815
+ depths : list, optional
816
+ If non-empty, restricts the depths to include in the plot, by default []
817
+ **kwargs : dict, optional
818
+ Extra arguments to the plotting function: refer to the documentation for :func:`~tsp.plots.static.time_series` for a
819
+ list of all possible arguments.
820
+
821
+ Returns
822
+ -------
823
+ Figure
824
+ matplotlib `Figure` object
825
+ """
826
+ if depths == []:
827
+ depths = self.depths
828
+
829
+ d_mask = np.isin(self.depths, depths)
830
+
831
+ fig = time_series(self.depths[d_mask], self.times, self.values[:, d_mask], **kwargs)
832
+
833
+
834
+ if self.output_utc_offset is not None:
835
+ label = format_utc_offset(self.output_utc_offset)
836
+ if label != "UTC":
837
+ label = f"UTC{label}"
838
+ fig.axes[0].set_xlabel(f"Time [{label}]")
839
+ fig.autofmt_xdate()
840
+ fig.show()
841
+
842
+ return fig
843
+
844
+
845
+ class AggregatedTSP(TSP):
846
+ """ A Time Series Profile that uses indices (1,2,3,...) instead of depth values.
847
+
848
+ Used in situations when depths are unknown (such as when reading datlogger exports
849
+ that don't have depth measurements.)
850
+
851
+ Parameters
852
+ ----------
853
+ times : list-like
854
+ t-length array of datetime objects
855
+ values : numpy.ndarray
856
+ array with shape (t,d) containing values at (t)emperatures and (d)epths
857
+ **kwargs : dict
858
+ Extra arguments to parent class: refer to :py:class:`tsp.core.TSP` documentation for a
859
+ list of all possible arguments.
860
+ """
861
+
862
+
863
+ class IndexedTSP(TSP):
864
+ """ A Time Series Profile that uses indices (1,2,3,...) instead of depth values.
865
+
866
+ Used in situations when depths are unknown (such as when reading datlogger exports
867
+ that don't have depth measurements.)
868
+
869
+ Parameters
870
+ ----------
871
+ times : list-like
872
+ t-length array of datetime objects
873
+ values : numpy.ndarray
874
+ array with shape (t,d) containing values at (t)emperatures and (d)epths
875
+ **kwargs : dict
876
+ Extra arguments to parent class: refer to :py:class:`~tsp.core.TSP` documentation for a
877
+ list of all possible arguments.
878
+ """
879
+
880
+ def __init__(self, times, values, **kwargs):
881
+ depths = np.arange(0, values.shape[1]) + 1
882
+ super().__init__(times=times, depths=depths, values=values, **kwargs)
883
+
884
+ @property
885
+ def depths(self) -> np.ndarray:
886
+ """Depth indices
887
+
888
+ Returns
889
+ -------
890
+ numpy.ndarray
891
+ An array of depth indices
892
+ """
893
+ warnings.warn("This TSP uses indices (1,2,3,...) instad of depths. Use set_depths() to use measured depths.", stacklevel=2)
894
+ return self._depths
895
+
896
+ @depths.setter
897
+ def depths(self, value):
898
+ TSP.depths.__set__(self, value)
899
+
900
+ def set_depths(self, depths: np.ndarray):
901
+ """Assign depth values to depth indices. Change the object to a :py:class:`~tsp.core.TSP`
902
+
903
+ Parameters
904
+ ----------
905
+ depths : np.ndarray
906
+ An array or list of depth values equal in lenth to the depth indices
907
+ """
908
+ self.depths = depths
909
+ self.__class__ = TSP
910
+
911
+
912
+
913
+ def span(S: pd.Series) -> float:
914
+ first = S.first_valid_index() # type: pd.Timestamp
915
+ last = S.last_valid_index() # type: pd.Timestamp
916
+ if first is None or last is None:
917
+ return 0
918
+
919
+ return (last - first).total_seconds()
920
+
921
+ def min_span_mask(R: "pd.core.resample.DatetimeIndexResampler",
922
+ threshold: float) -> "pd.DataFrame":
923
+ s = R.apply(lambda x: span(x))
924
+ return s < threshold
925
+
926
+
927
+ def gap(S: pd.Series) -> float:
928
+
929
+ d = np.diff(S.dropna().index)
930
+ if len(d) == 0:
931
+ return 0
932
+ elif len(d) == 1:
933
+ return 0
934
+ elif len(d) > 1:
935
+ gap = max(d).astype('timedelta64[s]').astype(float)
936
+ return gap
937
+
938
+
939
+ def max_gap_mask(R: "pd.core.resample.DatetimeIndexResampler",
940
+ threshold: float) -> "pd.DataFrame":
941
+ g = R.apply(lambda x: gap(x))
942
+ return (g > threshold) | (g == 0)
943
+
944
+
945
+
946
+
947
+ def _temporal_gap_mask(grouped: "pd.core.groupby.DataFrameGroupBy", max_gap: Optional[int], min_span: Optional[int]) -> np.ndarray:
948
+ """ Mask out observational groups in which there is more than a certain size temporal gap
949
+
950
+ Controls for gaps in the data within an aggregation group (using max_gap) and missing data at the beginning
951
+ or end of the aggregation group (using min_span).
952
+
953
+ Parameters
954
+ ----------
955
+ grouped : pandas.core.groupby.DataFrameGroupBy
956
+ groupby with 'time' and 'depth' columns
957
+ max_gap : int
958
+ maximum gap in seconds to tolerate between observations in a group
959
+ min_span : int
960
+ minimum data range (beginning to end) in seconds.
961
+
962
+ Returns
963
+ -------
964
+ numpy.ndarray
965
+ boolean array with ``True`` where measurement spacing or range in group does not satisfy tolerances
966
+ """
967
+ if max_gap is not None:
968
+ max_diff = grouped.time.apply(np.diff).apply(lambda x: np.max(x, initial=np.timedelta64(0))).apply(lambda x: x.total_seconds())
969
+ max_diff = max_diff.unstack().to_numpy()
970
+ diff_mask = np.where((max_diff == 0) | (max_diff >= max_gap), True, False)
971
+ else:
972
+ diff_mask = np.zeros_like(grouped, dtype=bool)
973
+
974
+ if min_span is not None:
975
+ total_span = grouped.time.apply(np.ptp).apply(lambda x: x.total_seconds()).unstack().to_numpy()
976
+ span_mask = np.where(total_span < min_span, True, False)
977
+ else:
978
+ span_mask = np.zeros_like(grouped, dtype=bool)
979
+
980
+ mask = diff_mask * span_mask
981
+
982
+ return mask
983
+
984
+
985
+ def _observation_count_mask(number_of_observations: np.ndarray, min_count:int) -> np.ndarray:
986
+ """ Create a mask array for an
987
+
988
+ Parameters
989
+ ----------
990
+ number_of_observations : numpy.ndarray
991
+ Array of how many data points are in aggregation
992
+ min_count : int
993
+ Minimum number of data points for aggregation to be 'valid'
994
+
995
+ Returns
996
+ -------
997
+ np.ndarray
998
+ a mask, True where data should be masked
999
+ """
1000
+ valid = np.less(number_of_observations, min_count) # type: np.ndarray
1001
+ return valid
1002
+
1003
+
1004
+ def handle_incoming_times(times: "Union[np.ndarray, pd.DatetimeIndex, pd.Series, list]") -> "pd.DatetimeIndex":
1005
+ """Convert a list of times to a pandas DatetimeIndex object"""
1006
+ invalid_msg = "Times must be a list, numpy array, pandas DatetimeIndex, or pandas Series"
1007
+
1008
+ try:
1009
+ if not len(times):
1010
+ raise ValueError(invalid_msg)
1011
+ except TypeError:
1012
+ raise ValueError(invalid_msg)
1013
+
1014
+ if isinstance(times, pd.DatetimeIndex):
1015
+ return times
1016
+
1017
+ if isinstance(times, pd.Series):
1018
+ try:
1019
+ times = pd.DatetimeIndex(times)
1020
+ except Exception:
1021
+ raise ValueError("Series must be convertible to DatetimeIndex")
1022
+ times.name = 'time'
1023
+
1024
+ return times
1025
+
1026
+ elif isinstance(times, np.ndarray):
1027
+ times = pd.to_datetime(times)
1028
+ times.name = 'time'
1029
+ return times
1030
+
1031
+ elif isinstance(times, list):
1032
+ return pd.to_datetime(times)
1033
+
1034
+ else:
1035
+ raise ValueError(invalid_msg)