tsp 1.7.7__py3-none-any.whl → 1.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tsp might be problematic. Click here for more details.

Files changed (92) hide show
  1. tsp/__init__.py +11 -11
  2. tsp/__meta__.py +1 -1
  3. tsp/concatenation.py +153 -0
  4. tsp/core.py +1108 -1035
  5. tsp/data/2023-01-06_755-test-Dataset_2031-Constant_Over_Interval-Hourly-Ground_Temperature-Thermistor_Automated.timeserie.csv +4 -4
  6. tsp/data/2023-01-06_755-test.metadata.txt +208 -208
  7. tsp/data/NTGS_example_csv.csv +6 -6
  8. tsp/data/NTGS_example_slash_dates.csv +6 -6
  9. tsp/data/example_geotop.csv +5240 -5240
  10. tsp/data/example_gtnp.csv +1298 -1298
  11. tsp/data/example_permos.csv +7 -7
  12. tsp/data/test_geotop_has_space.txt +5 -5
  13. tsp/dataloggers/AbstractReader.py +43 -43
  14. tsp/dataloggers/FG2.py +110 -110
  15. tsp/dataloggers/GP5W.py +114 -114
  16. tsp/dataloggers/Geoprecision.py +34 -34
  17. tsp/dataloggers/HOBO.py +914 -914
  18. tsp/dataloggers/RBRXL800.py +190 -190
  19. tsp/dataloggers/RBRXR420.py +308 -308
  20. tsp/dataloggers/__init__.py +15 -15
  21. tsp/dataloggers/logr.py +115 -115
  22. tsp/dataloggers/test_files/004448.DAT +2543 -2543
  23. tsp/dataloggers/test_files/004531.DAT +17106 -17106
  24. tsp/dataloggers/test_files/004531.HEX +3587 -3587
  25. tsp/dataloggers/test_files/004534.HEX +3587 -3587
  26. tsp/dataloggers/test_files/010252.dat +1731 -1731
  27. tsp/dataloggers/test_files/010252.hex +1739 -1739
  28. tsp/dataloggers/test_files/010274.hex +1291 -1291
  29. tsp/dataloggers/test_files/010278.hex +3544 -3544
  30. tsp/dataloggers/test_files/012064.dat +1286 -1286
  31. tsp/dataloggers/test_files/012064.hex +1294 -1294
  32. tsp/dataloggers/test_files/012081.hex +3532 -3532
  33. tsp/dataloggers/test_files/07B1592.DAT +1483 -1483
  34. tsp/dataloggers/test_files/07B1592.HEX +1806 -1806
  35. tsp/dataloggers/test_files/07B4450.DAT +2234 -2234
  36. tsp/dataloggers/test_files/07B4450.HEX +2559 -2559
  37. tsp/dataloggers/test_files/FG2_399.csv +9881 -9881
  38. tsp/dataloggers/test_files/GP5W.csv +1121 -1121
  39. tsp/dataloggers/test_files/GP5W_260.csv +1884 -1884
  40. tsp/dataloggers/test_files/GP5W_270.csv +2210 -2210
  41. tsp/dataloggers/test_files/H08-030-08_HOBOware.csv +998 -998
  42. tsp/dataloggers/test_files/RBR_01.dat +1046 -1046
  43. tsp/dataloggers/test_files/RBR_02.dat +2426 -2426
  44. tsp/dataloggers/test_files/RSTDT2055.csv +2152 -2152
  45. tsp/dataloggers/test_files/U23-001_HOBOware.csv +1001 -1001
  46. tsp/dataloggers/test_files/hobo-negative-2.txt +6396 -6396
  47. tsp/dataloggers/test_files/hobo-negative-3.txt +5593 -5593
  48. tsp/dataloggers/test_files/hobo-positive-number-1.txt +1000 -1000
  49. tsp/dataloggers/test_files/hobo-positive-number-2.csv +1003 -1003
  50. tsp/dataloggers/test_files/hobo-positive-number-3.csv +1133 -1133
  51. tsp/dataloggers/test_files/hobo-positive-number-4.csv +1209 -1209
  52. tsp/dataloggers/test_files/hobo2.csv +8702 -8702
  53. tsp/dataloggers/test_files/hobo_1_AB.csv +21732 -21732
  54. tsp/dataloggers/test_files/hobo_1_AB_Details.txt +133 -133
  55. tsp/dataloggers/test_files/hobo_1_AB_classic.csv +4373 -4373
  56. tsp/dataloggers/test_files/hobo_1_AB_defaults.csv +21732 -21732
  57. tsp/dataloggers/test_files/hobo_1_AB_minimal.txt +1358 -1358
  58. tsp/dataloggers/test_files/hobo_1_AB_var2.csv +3189 -3189
  59. tsp/dataloggers/test_files/hobo_1_AB_var3.csv +2458 -2458
  60. tsp/dataloggers/test_files/logR_ULogC16-32_1.csv +106 -106
  61. tsp/dataloggers/test_files/logR_ULogC16-32_2.csv +100 -100
  62. tsp/dataloggers/test_files/mon_3_Ta_2010-08-18_2013-02-08.txt +21724 -21724
  63. tsp/dataloggers/test_files/rbr_001.dat +1133 -1133
  64. tsp/dataloggers/test_files/rbr_001.hex +1139 -1139
  65. tsp/dataloggers/test_files/rbr_001_no_comment.dat +1132 -1132
  66. tsp/dataloggers/test_files/rbr_001_no_comment.hex +1138 -1138
  67. tsp/dataloggers/test_files/rbr_002.dat +1179 -1179
  68. tsp/dataloggers/test_files/rbr_002.hex +1185 -1185
  69. tsp/dataloggers/test_files/rbr_003.hex +1292 -1292
  70. tsp/dataloggers/test_files/rbr_003.xls +0 -0
  71. tsp/dataloggers/test_files/rbr_xl_001.DAT +1105 -1105
  72. tsp/dataloggers/test_files/rbr_xl_002.DAT +1126 -1126
  73. tsp/dataloggers/test_files/rbr_xl_003.DAT +4622 -4622
  74. tsp/dataloggers/test_files/rbr_xl_003.HEX +3587 -3587
  75. tsp/gtnp.py +148 -148
  76. tsp/labels.py +3 -3
  77. tsp/misc.py +90 -90
  78. tsp/physics.py +101 -101
  79. tsp/plots/static.py +373 -373
  80. tsp/readers.py +548 -548
  81. tsp/time.py +45 -45
  82. tsp/tspwarnings.py +14 -14
  83. tsp/utils.py +101 -101
  84. tsp/version.py +1 -1
  85. {tsp-1.7.7.dist-info → tsp-1.8.0.dist-info}/METADATA +30 -23
  86. tsp-1.8.0.dist-info/RECORD +94 -0
  87. {tsp-1.7.7.dist-info → tsp-1.8.0.dist-info}/WHEEL +5 -5
  88. {tsp-1.7.7.dist-info → tsp-1.8.0.dist-info/licenses}/LICENSE +674 -674
  89. tsp/dataloggers/test_files/CSc_CR1000_1.dat +0 -295
  90. tsp/scratch.py +0 -6
  91. tsp-1.7.7.dist-info/RECORD +0 -95
  92. {tsp-1.7.7.dist-info → tsp-1.8.0.dist-info}/top_level.txt +0 -0
tsp/core.py CHANGED
@@ -1,1035 +1,1108 @@
1
- from __future__ import annotations
2
-
3
- import pandas as pd
4
- import re
5
- import inspect
6
- import numpy as np
7
- import functools
8
- import warnings
9
-
10
- try:
11
- import netCDF4 as nc
12
-
13
- try:
14
- from pfit.pfnet_standard import make_temperature_base
15
- except ModuleNotFoundError:
16
- warnings.warn("Missing pfit library. Some functionality will be limited.", stacklevel=2)
17
-
18
- except ModuleNotFoundError:
19
- warnings.warn("Missing netCDF4 library. Some functionality will be limited.", stacklevel=2)
20
-
21
- from typing import Union, Optional
22
- from datetime import datetime, tzinfo, timezone, timedelta
23
-
24
- import tsp
25
- import tsp.labels as lbl
26
- import tsp.tspwarnings as tw
27
-
28
- from tsp.physics import analytical_fourier
29
- from tsp.plots.static import trumpet_curve, colour_contour, time_series, profile_evolution
30
- from tsp.time import format_utc_offset
31
- from tsp.time import get_utc_offset
32
- from tsp.misc import completeness
33
-
34
- from matplotlib.figure import Figure
35
-
36
-
37
- class TSP:
38
- """ A Time Series Profile (a collection of time series data at different depths)
39
-
40
- A TSP can also be:
41
- Thermal State of Permafrost
42
- Temperature du Sol en Profondeur
43
- Temperatures, Secondes, Profondeurs
44
-
45
- Parameters
46
- ----------
47
- times : pandas.DatetimeIndex
48
- DatetimeIndex with optional UTC offset. List-like array of datetime objects can also be passed,
49
- but will be converted to a DatetimeIndex with no UTC offset.
50
- depths : list-like
51
- d-length array of depths
52
- values : numpy.ndarray
53
- array with shape (t,d) containing values at (t)emperatures and (d)epths
54
- longitude : float, optional
55
- Longitude at which data were collected
56
- latitude : float, optional
57
- Latitude at which data were collected
58
- site_id : str, optional
59
- Name of location at which data were collected
60
- metadata : dict
61
- Additional metadata
62
-
63
- Attributes
64
- ----------
65
- values
66
- latitude : float
67
- Latitude at which data were collected
68
- longitude : float
69
- Longitude at which data were collected
70
- metadata : dict
71
- Additional metadata provided at instantiation or by other methods
72
- """
73
-
74
- def __repr__(self) -> str:
75
- return repr(self.wide)
76
-
77
- def __str__(self) -> str:
78
- return str(self.wide)
79
-
80
- def __init__(self, times, depths, values,
81
- latitude: Optional[float]=None,
82
- longitude: Optional[float]=None,
83
- site_id: Optional[str]=None,
84
- metadata: dict={}):
85
-
86
- self._times = handle_incoming_times(times)
87
- if self._times.duplicated().any():
88
- warnings.warn(tw.DuplicateTimesWarning(self._times), stacklevel=2)
89
-
90
- if self.utc_offset:
91
- self._output_utc_offset = self.utc_offset
92
- else:
93
- self._output_utc_offset = None
94
-
95
- self._depths = np.atleast_1d(depths)
96
- self._values = np.atleast_2d(values)
97
- self.__number_of_observations = np.ones_like(values, dtype=int)
98
- self.__number_of_observations[np.isnan(values)] = 0
99
- self.metadata = metadata
100
- self.latitude = latitude
101
- self.longitude = longitude
102
- self.site_id = site_id
103
- self._freq = None
104
- self._completeness = None
105
-
106
- @property
107
- def freq(self) -> Optional[int]:
108
- """ Measurement frequency [s] """
109
- return self._freq
110
-
111
- @freq.setter
112
- def freq(self, value: int):
113
- if not isinstance(value, int):
114
- raise TypeError("Must be string, e.g. '1D', '3600s'")
115
- self._freq = value
116
-
117
- @property
118
- def completeness(self) -> Optional[pd.DataFrame]:
119
- """ Data completeness """
120
- return self._completeness
121
-
122
- @completeness.setter
123
- def completeness(self, value):
124
- raise ValueError("You can't assign this variable.")
125
-
126
- @classmethod
127
- def from_tidy_format(cls, times, depths, values,
128
- number_of_observations=None,
129
- latitude: Optional[float]=None,
130
- longitude: Optional[float]=None,
131
- site_id: Optional[str]=None,
132
- metadata:dict={}):
133
- """ Create a TSP from data in a 'tidy' or 'long' format
134
-
135
- Parameters
136
- ----------
137
- times : list-like
138
- n-length array of datetime objects
139
- depths : list-like
140
- n-length array of depths
141
- values : numpy.ndarray
142
- n-length array of (temperaure) values at associated time and depth
143
- number_of_observations : numpy.ndarray, optional
144
- n-length array of number of observations at associated time and
145
- depth for aggregated values (default: 1)
146
- longitude : float, optional
147
- Longitude at which data were collected
148
- latitude : float, optional
149
- Latitude at which data were collected
150
- site_id : str, optional
151
- Name of location at which data were collected
152
- metadata : dict
153
- Additional metadata
154
- """
155
- times = np.atleast_1d(times)
156
- depths = np.atleast_1d(depths)
157
- values = np.atleast_1d(values)
158
-
159
- number_of_observations = number_of_observations if number_of_observations else np.ones_like(values)
160
- df = pd.DataFrame({"times": times, "depths": depths, "temperature_in_ground": values, "number_of_observations": number_of_observations})
161
- df.set_index(["times", "depths"], inplace=True)
162
-
163
- try:
164
- unstacked = df.unstack()
165
- except ValueError as e:
166
- if np.any(df.index.duplicated()):
167
- print(f"Duplicate data found at {df.iloc[np.where(df.index.duplicated())[0], :].index.get_level_values(0).unique()}")
168
- raise e
169
-
170
- temps = unstacked.get('temperature_in_ground')
171
-
172
- this = cls(times=temps.index.values,
173
- depths=temps.columns.values,
174
- values=temps.values,
175
- latitude=latitude,
176
- longitude=longitude,
177
- site_id=site_id,
178
- metadata=metadata)
179
-
180
- number_of_observations = unstacked.get('number_of_observations').values
181
-
182
- number_of_observations[np.isnan(number_of_observations)] = 0
183
- this.__number_of_observations = number_of_observations
184
- return this
185
-
186
- @classmethod
187
- def __from_tsp(cls, t:TSP, **kwargs) -> "TSP":
188
- """ Use an existing TSP object as a template, """
189
- kw = {}
190
- for arg in inspect.getfullargspec(TSP).args[1:]:
191
- if kwargs.get(arg) is not None:
192
- kw[arg] = kwargs.get(arg)
193
- else:
194
- kw[arg] = getattr(t, arg)
195
-
196
- t = TSP(**kw)
197
-
198
- return t
199
-
200
- @classmethod
201
- def from_json(cls, json_file) -> "TSP":
202
- """ Read data from a json file
203
-
204
- Parameters
205
- ----------
206
- json_file : str
207
- Path to a json file from which to read
208
- """
209
- df = pd.read_json(json_file)
210
- depth_pattern = r"^(-?[0-9\.]+)$"
211
-
212
- times = pd.to_datetime(df['time']).values
213
- depths = [re.search(depth_pattern, c).group(1) for c in df.columns if tsp._is_depth_column(c, depth_pattern)]
214
- values = df.loc[:, depths].to_numpy()
215
-
216
- t = cls(times=times, depths=depths, values=values)
217
-
218
- return t
219
-
220
- @classmethod
221
- def synthetic(cls, depths: "np.ndarray", start="2000-01-01", end="2003-01-01",
222
- Q:"Optional[float]"=0.2,
223
- c:"Optional[float]"=1.6e6,
224
- k:"Optional[float]"=2.5,
225
- A:"Optional[float]"=6,
226
- MAGST:"Optional[float]"=-0.5) -> "TSP":
227
- """
228
- Create a 'synthetic' temperature time series using the analytical solution to the heat conduction equation.
229
- Suitable for testing
230
-
231
- Parameters
232
- ----------
233
- depths : np.ndarray
234
- array of depths in m
235
- start : str
236
- array of times in seconds
237
- Q : Optional[float], optional
238
- Ground heat flux [W m-2], by default 0.2
239
- c : Optional[float], optional
240
- heat capacity [J m-3 K-1], by default 1.6e6
241
- k : Optional[float], optional
242
- thermal conductivity [W m-1 K-1], by default 2.5
243
- A : Optional[float], optional
244
- Amplitude of temperature fluctuation [C], by default 6
245
- MAGST : Optional[float], optional
246
- Mean annual ground surface temperature [C], by default -0.5
247
-
248
- Returns
249
- -------
250
- TSP
251
- A timeseries profile (TSP) object
252
- """
253
- times = pd.date_range(start=start, end=end).to_pydatetime()
254
- t_sec = np.array([(t-times[0]).total_seconds() for t in times])
255
-
256
- values = analytical_fourier(depths=depths, times=t_sec, Q=Q, c=c, k=k, A=A, MAGST=MAGST)
257
-
258
- this = cls(depths=depths, times=times, values=values)
259
-
260
- return this
261
-
262
- @property
263
- @functools.lru_cache()
264
- def long(self) -> "pd.DataFrame":
265
- """ Return the data in a 'long' or 'tidy' format (one row per observation, one column per variable)
266
-
267
- Returns
268
- -------
269
- pandas.DataFrame
270
- Time series profile data with columns:
271
- - **time**: time
272
- - **depth**: depth
273
- - **temperature_in_ground**: temperature
274
- - **number_of_observations**: If data are aggregated, how many observations are used in the aggregation
275
- """
276
- values = self.wide.melt(id_vars='time',
277
- var_name="depth",
278
- value_name="temperature_in_ground")
279
-
280
- number_of_observations = self.number_of_observations.melt(id_vars='time',
281
- var_name="depth",
282
- value_name="number_of_observations")
283
-
284
- values['number_of_observations'] = number_of_observations['number_of_observations']
285
-
286
- return values
287
-
288
- @property
289
- @functools.lru_cache()
290
- def wide(self) -> "pd.DataFrame":
291
- """ Return the data in a 'wide' format (one column per depth)
292
-
293
- Returns
294
- -------
295
- pandas.DataFrame
296
- Time series profile data
297
- """
298
- tabular = pd.DataFrame(self._values)
299
- tabular.columns = self._depths
300
- tabular.index = self.times
301
- tabular.insert(0, "time", self.times)
302
-
303
- return tabular
304
-
305
- @property
306
- @functools.lru_cache()
307
- def number_of_observations(self) -> "pd.DataFrame":
308
- """ The number of observations for an average at a particular depth or time.
309
-
310
- For pure observational data, the number of observations will always be '1'. When data are aggregated,
311
- (e.g. using :py:meth:`~tsp.core.TSP.monthly` or :py:meth:`~tsp.core.TSP.daily`) these numbers
312
- will be greater than 1.
313
-
314
- Returns
315
- -------
316
- DataFrame
317
- Number of observations
318
- """
319
- tabular = pd.DataFrame(self.__number_of_observations, dtype=int)
320
- tabular.columns = self._depths
321
- tabular.index = self._times
322
- tabular.insert(0, "time", self._times)
323
-
324
- return tabular
325
-
326
- @number_of_observations.setter
327
- def number_of_observations(self, value):
328
- raise ValueError(f"You can't assign {value} to this variable (no assignment allowed).")
329
-
330
- def reset_counts(self):
331
- """ Set observation count to 1 if data exists, 0 otherwise """
332
- self.__number_of_observations = (~self.wide.isna()).astype('boolean')
333
-
334
- def set_utc_offset(self, offset:"Union[int,str]") -> None:
335
- """ Set the time zone of the data by providing a UTC offset
336
-
337
- Parameters
338
- ----------
339
- offset : int, str
340
- If int, the number of seconds. If str, a string in the format "+HH:MM" or "-HH:MM"
341
- """
342
- if self.utc_offset is not None:
343
- raise ValueError("You can only set the UTC offset once.")
344
-
345
- utc_offset = get_utc_offset(offset)
346
-
347
- tz = timezone(timedelta(seconds = utc_offset))
348
- self._times = self._times.tz_localize(tz)
349
- self._output_utc_offset = timezone(timedelta(seconds = utc_offset))
350
-
351
- TSP.wide.fget.cache_clear()
352
- TSP.long.fget.cache_clear()
353
-
354
- @property
355
- def utc_offset(self) -> "Optional[tzinfo]":
356
- """ Get the time zone of the data by providing a UTC offset
357
-
358
- Returns
359
- -------
360
- datetime.tzinfo
361
- A timezone object
362
- """
363
- if self._times.tz is None:
364
- return None
365
- else:
366
- return self._times.tz
367
-
368
- @utc_offset.setter
369
- def utc_offset(self, value):
370
- self.set_utc_offset(value)
371
-
372
- @property
373
- def output_utc_offset(self) -> "Optional[tzinfo]":
374
- """ Get the time zone in which to output or display the data by providing a UTC offset
375
-
376
- Returns
377
- -------
378
- datetime.tzinfo
379
- A timezone object
380
- """
381
- if self._output_utc_offset is None:
382
- return None
383
- else:
384
- return self._output_utc_offset
385
-
386
- @output_utc_offset.setter
387
- def output_utc_offset(self, offset:"Union[int,str]") -> None:
388
- self.set_output_utc_offset(offset)
389
-
390
- def set_output_utc_offset(self, offset:"Union[int,str]") -> None:
391
- """ Set the time zone in which to display the output or data by providing a UTC offset
392
- Parameters
393
- ----------
394
- offset : int, str
395
- If int, the number of seconds. If str, a string in the format "+HH:MM" or "-HH:MM"
396
- """
397
- utc_offset = get_utc_offset(offset)
398
- tz = timezone(timedelta(seconds = utc_offset))
399
- self._output_utc_offset = tz
400
-
401
- TSP.wide.fget.cache_clear()
402
- TSP.long.fget.cache_clear()
403
-
404
- def reset_output_utc_offset(self) -> None:
405
- """ Reset the time zone in which to output or display the data to the default (the one set by set_utc_offset)
406
-
407
- """
408
- if self.utc_offset is None:
409
- raise ValueError("You can't reset the output time zone if the time zone of the data hasn't yet been set with set_utc_offset.")
410
- else:
411
- self._output_utc_offset = self.utc_offset
412
-
413
- def __nly(self,
414
- freq_fmt:str,
415
- new_freq,
416
- min_count:Optional[int],
417
- max_gap:Optional[int],
418
- min_span:Optional[int]) -> TSP:
419
- """
420
- Temporal aggregation by grouping according to a string-ified time
421
-
422
- Parameters
423
- ----------
424
- freq_fmt : str
425
- Python date format string used to aggregate and recover time
426
-
427
- Returns
428
- -------
429
- tuple[pd.DataFrame, pd.DataFrame]
430
- A tuple of dataframes, the first containing the aggregated data, the second containing the number of observations
431
- """
432
- R = self.wide.drop("time", axis=1).resample(freq_fmt)
433
- cumulative_obs = self.number_of_observations.drop("time", axis=1).resample(freq_fmt).sum()
434
- total_obs = R.count()
435
- values = R.mean()
436
-
437
- # Calculate masks
438
- mc_mask = Mg_mask = ms_mask = pd.DataFrame(index=values.index, columns=values.columns, data=False)
439
-
440
- if min_count is not None:
441
- mc_mask = (cumulative_obs < min_count)
442
- if max_gap is not None:
443
- Mg_mask = max_gap_mask(R, max_gap)
444
- if min_span is not None:
445
- ms_mask = min_span_mask(R, min_span)
446
-
447
- mask = (mc_mask | Mg_mask | ms_mask)
448
- values[mask] = np.nan
449
-
450
- # Construct TSP
451
- t = TSP.__from_tsp(self, times=values.index,
452
- depths=values.columns,
453
- values=values.values)
454
- t.__number_of_observations = cumulative_obs
455
- t.freq = new_freq
456
-
457
- # Calculate data completeness
458
- if self.freq is not None:
459
- f1 = self.freq
460
- f2 = new_freq
461
- t._completeness = completeness(total_obs, f1, f2)
462
-
463
- return t
464
-
465
- def monthly(self,
466
- min_count:Optional[int]=24,
467
- max_gap:Optional[int]=3600*24*8,
468
- min_span:Optional[int]=3600*24*21) -> "TSP":
469
- """ Monthly averages, possibly with some months unavailable (NaN) if there is insufficient data
470
-
471
- Parameters
472
- ----------
473
- min_count : int
474
- Minimum number of observations in a month to be considered a valid average,
475
- defaults to None
476
- max_gap : int
477
- Maximum gap (in seconds) between data points to be considered a valid average,
478
- defaults to None
479
- min_span : int
480
- Minimum total data range (in seconds) to be consiered a valid average,
481
- defaults to None
482
-
483
- Returns
484
- -------
485
- TSP
486
- A TSP object with data aggregated to monthly averages
487
- """
488
- t = self.__nly(freq_fmt="M",
489
- new_freq=lbl.MONTHLY,
490
- min_count=min_count,
491
- max_gap=max_gap,
492
- min_span=min_span)
493
-
494
- return t
495
-
496
- def daily(self,
497
- min_count:Optional[int]=None,
498
- max_gap:Optional[int]=None,
499
- min_span:Optional[int]=None) -> "TSP":
500
- """ Daily averages, possibly with some days unavailable (NaN) if there is insufficient data
501
-
502
- Parameters
503
- ----------
504
- min_count : int
505
- Minimum number of observations in a day to be considered a valid average,
506
- defaults to None
507
- max_gap : int
508
- Maximum gap (in seconds) between data points to be considered a valid average, defaults to None
509
- min_span : int
510
- Minimum total data range (in seconds) to be consiered a valid average, defaults to None
511
-
512
- Returns
513
- -------
514
- TSP
515
- A TSP object with data aggregated to daily averages
516
- """
517
- # if the data is already daily +/- 1min , just return it
518
- t = self.__nly(freq_fmt="D",
519
- new_freq=lbl.DAILY,
520
- min_count=min_count,
521
- max_gap=max_gap,
522
- min_span=min_span)
523
-
524
- return t
525
-
526
- def yearly(self,
527
- min_count:Optional[int]=None,
528
- max_gap:Optional[int]=None,
529
- min_span:Optional[int]=None) -> "TSP":
530
- """ Yearly averages, possibly with some years unavailable (NaN) if there is insufficient data
531
-
532
- Parameters
533
- ----------
534
- min_count : int
535
- Minimum number of observations in a month to be considered a valid average, defaults to None
536
- max_gap : int
537
- Maximum gap (in seconds) between data points to be considered a valid average, defaults to None
538
- min_span : int
539
- Minimum total data range (in seconds) to be consiered a valid average, defaults to None
540
-
541
- Returns
542
- -------
543
- TSP
544
- A TSP object with data aggregated to yearly averages
545
- """
546
- t = self.__nly(freq_fmt="Y",
547
- new_freq=lbl.YEARLY,
548
- min_count=min_count,
549
- max_gap=max_gap,
550
- min_span=min_span)
551
-
552
- return t
553
-
554
- @property
555
- def depths(self) -> "np.ndarray":
556
- """ Return the depth values in the profile
557
-
558
- Returns
559
- -------
560
- numpy.ndarray
561
- The depths in the profile
562
- """
563
- return self._depths
564
-
565
- @depths.setter
566
- def depths(self, value):
567
- depths = np.atleast_1d(value)
568
-
569
- if not len(depths) == len(self._depths):
570
- raise ValueError(f"List of depths must have length of {len(self._depths)}.")
571
-
572
- self._depths = depths
573
-
574
- TSP.wide.fget.cache_clear()
575
- TSP.long.fget.cache_clear()
576
-
577
- @property
578
- def times(self):
579
- """ Return the timestamps in the time series
580
-
581
- Returns
582
- -------
583
- pandas.DatetimeIndex
584
- The timestamps in the time series
585
- """
586
- if self.utc_offset is None:
587
- return self._times
588
-
589
- elif self._output_utc_offset == self.utc_offset:
590
- return self._times
591
-
592
- else:
593
- return self._times.tz_convert(self.output_utc_offset)
594
-
595
- @property
596
- def values(self):
597
- return self._values
598
-
599
- def to_gtnp(self, filename: str) -> None:
600
- """ Write the data in GTN-P format
601
-
602
- Parameters
603
- ----------
604
- filename : str
605
- Path to the file to write to
606
- """
607
- df = self.wide.rename(columns={'time': 'Date/Depth'})
608
- df['Date/Depth'] = df['Date/Depth'].dt.strftime("%Y-%m-%d %H:%M:%S")
609
-
610
- df.to_csv(filename, index=False, na_rep="-999")
611
-
612
- def to_ntgs(self, filename:str, project_name:str="", site_id:"Optional[str]" = None, latitude:"Optional[float]"=None, longitude:"Optional[float]"=None) -> None:
613
- """ Write the data in NTGS template format
614
-
615
- Parameters
616
- ----------
617
- filename : str
618
- Path to the file to write to
619
- project_name : str, optional
620
- The project name, by default ""
621
- site_id : str, optional
622
- The name of the site , by default None
623
- latitude : float, optional
624
- WGS84 latitude at which the observations were recorded, by default None
625
- longitude : float, optional
626
- WGS84 longitude at which the observations were recorded, by default None
627
- """
628
- if latitude is None:
629
- latitude = self.latitude if self.latitude is not None else ""
630
-
631
- if longitude is None:
632
- longitude = self.longitude if self.longitude is not None else ""
633
-
634
- if site_id is None:
635
- site_id = self.site_id if self.site_id is not None else ""
636
- data = self.values
637
-
638
- df = pd.DataFrame({'project_name': pd.Series(dtype='str'),
639
- 'site_id': pd.Series(dtype='str'),
640
- 'latitude': pd.Series(dtype='float'),
641
- 'longitude': pd.Series(dtype='float')
642
- })
643
-
644
- df["date_YYYY-MM-DD"] = pd.Series(self.times).dt.strftime(r"%Y-%m-%d")
645
- df["time_HH:MM:SS"] = pd.Series(self.times).dt.strftime(r"%H:%M:%S")
646
-
647
- df["project_name"] = project_name
648
- df["site_id"] = site_id
649
- df["latitude"] = latitude
650
- df["longitude"] = longitude
651
-
652
- headers = [str(d) + "_m" for d in self.depths]
653
-
654
- for i, h in enumerate(headers):
655
- df[h] = data[:, i]
656
-
657
- df.to_csv(filename, index=False)
658
-
659
- def to_netcdf(self, file: str) -> None:
660
- """ Write the data as a netcdf"""
661
- try:
662
- ncf = make_temperature_base(file, len(self.depths))
663
- except NameError:
664
- warnings.warn("Missing required packages. Try installing with `pip install tsp[nc]`", stacklevel=2)
665
- return
666
-
667
- with nc.Dataset(ncf, 'a') as ncd:
668
- pytime = self.times.to_pydatetime()
669
-
670
- ncd['depth_below_ground_surface'][:] = self.depths
671
-
672
-
673
- ncd['time'][:] = nc.date2num(pytime, ncd['time'].units, ncd['time'].calendar)
674
- ncd['ground_temperature'][:] = self.values
675
-
676
- if self.latitude:
677
- ncd['latitude'][:] = self.latitude
678
- if self.longitude:
679
- ncd['longitude'][:] = self.longitude
680
- if self.site_id:
681
- ncd['site_name'] = self.site_id
682
-
683
- for key, value in self.metadata:
684
- try:
685
- ncd.setncattr(key, value)
686
- except Exception:
687
- warnings.warn(f"Could not set metadata item: {key}", stacklevel=2)
688
-
689
- def to_json(self, file: str) -> None:
690
- """ Write the data to a serialized json file """
691
- with open(file, 'w') as f:
692
- f.write(self._to_json())
693
-
694
- def _to_json(self) -> str:
695
- return self.wide.to_json()
696
-
697
- def plot_profiles(self, P:int=100, n:int=10) -> Figure:
698
- """ Create a plot of the temperature profiles at different times
699
-
700
- Parameters
701
- ----------
702
- P : int
703
- Percentage of time range to plot
704
- n : int
705
- Number of evenly-spaced profiles to plot
706
-
707
- Returns
708
- -------
709
- Figure
710
- matplotlib `Figure` object
711
- """
712
- fig = profile_evolution(depths=self.depths, times=self.times, values=self._values, P=P, n=n)
713
- fig.show()
714
- return fig
715
-
716
- def plot_trumpet(self,
717
- year: Optional[int]=None,
718
- begin: Optional[datetime]=None,
719
- end: Optional[datetime]=None,
720
- min_completeness: Optional[float]=None,
721
- **kwargs) -> Figure:
722
- """ Create a trumpet plot from the data
723
-
724
- Parameters
725
- ----------
726
- year : int, optional
727
- Which year to plot
728
- begin : datetime, optional
729
- If 'end' also provided, the earliest measurement to include in the averaging for the plot
730
- end : datetime, optional
731
- If 'begin' also provided, the latest measurement to include in the averaging for the plot
732
- min_completeness : float, optional
733
- If provided, the minimum completeness (fractional, 0 to 1) required to include
734
- in temperature envelope, otherwise
735
- the point is plotted as an unconnected, slightly transparent dot, by default None
736
- **kwargs : dict, optional
737
- Extra arguments to the plotting function: refer to the documentation for :func:`~tsp.plots.static.trumpet_curve` for a
738
- list of all possible arguments.
739
-
740
- Returns
741
- -------
742
- Figure
743
- a matplotlib `Figure` object
744
- """
745
- df = self.long.dropna()
746
-
747
- if year is not None:
748
- df = df[df['time'].dt.year == year]
749
-
750
- elif begin is not None or end is not None:
751
- raise NotImplementedError
752
-
753
- else:
754
- raise ValueError("One of 'year', 'begin', 'end' must be provided.")
755
-
756
- grouped = df.groupby('depth')
757
-
758
- max_t = grouped.max().get('temperature_in_ground').values
759
- min_t = grouped.min().get('temperature_in_ground').values
760
- mean_t = grouped.mean().get('temperature_in_ground').values
761
- depth = np.array([d for d in grouped.groups.keys()])
762
-
763
- # Calculate completeness
764
- c = self.yearly(None, None, None).completeness
765
-
766
- if min_completeness is not None and c is not None:
767
- C = c[c.index.year == year]
768
- C = C[depth].iloc[0,:].values
769
-
770
- else:
771
- C = None
772
-
773
- fig = trumpet_curve(depth=depth,
774
- t_max=max_t,
775
- t_min=min_t,
776
- t_mean=mean_t,
777
- min_completeness=min_completeness,
778
- data_completeness=C,
779
- **kwargs)
780
- fig.show()
781
-
782
- return fig
783
-
784
- def plot_contour(self, **kwargs) -> Figure:
785
- """ Create a contour plot
786
-
787
- Parameters
788
- ----------
789
- **kwargs : dict, optional
790
- Extra arguments to the plotting function: refer to the documentation for :func:`~tsp.plots.static.colour_contour` for a
791
- list of all possible arguments.
792
-
793
- Returns
794
- -------
795
- Figure
796
- matplotlib `Figure` object
797
- """
798
- fig = colour_contour(depths=self.depths, times=self.times, values=self._values, **kwargs)
799
-
800
- if self.output_utc_offset is not None:
801
- label = format_utc_offset(self.output_utc_offset)
802
- if label != "UTC":
803
- label = f"UTC{label}"
804
- fig.axes[0].set_xlabel(f"Time [{label}]")
805
-
806
- fig.show()
807
-
808
- return fig
809
-
810
- def plot_timeseries(self, depths: list=[], **kwargs) -> Figure:
811
- """Create a time series T(t) plot
812
-
813
- Parameters
814
- ----------
815
- depths : list, optional
816
- If non-empty, restricts the depths to include in the plot, by default []
817
- **kwargs : dict, optional
818
- Extra arguments to the plotting function: refer to the documentation for :func:`~tsp.plots.static.time_series` for a
819
- list of all possible arguments.
820
-
821
- Returns
822
- -------
823
- Figure
824
- matplotlib `Figure` object
825
- """
826
- if depths == []:
827
- depths = self.depths
828
-
829
- d_mask = np.isin(self.depths, depths)
830
-
831
- fig = time_series(self.depths[d_mask], self.times, self.values[:, d_mask], **kwargs)
832
-
833
-
834
- if self.output_utc_offset is not None:
835
- label = format_utc_offset(self.output_utc_offset)
836
- if label != "UTC":
837
- label = f"UTC{label}"
838
- fig.axes[0].set_xlabel(f"Time [{label}]")
839
- fig.autofmt_xdate()
840
- fig.show()
841
-
842
- return fig
843
-
844
-
845
- class AggregatedTSP(TSP):
846
- """ A Time Series Profile that uses indices (1,2,3,...) instead of depth values.
847
-
848
- Used in situations when depths are unknown (such as when reading datlogger exports
849
- that don't have depth measurements.)
850
-
851
- Parameters
852
- ----------
853
- times : list-like
854
- t-length array of datetime objects
855
- values : numpy.ndarray
856
- array with shape (t,d) containing values at (t)emperatures and (d)epths
857
- **kwargs : dict
858
- Extra arguments to parent class: refer to :py:class:`tsp.core.TSP` documentation for a
859
- list of all possible arguments.
860
- """
861
-
862
-
863
- class IndexedTSP(TSP):
864
- """ A Time Series Profile that uses indices (1,2,3,...) instead of depth values.
865
-
866
- Used in situations when depths are unknown (such as when reading datlogger exports
867
- that don't have depth measurements.)
868
-
869
- Parameters
870
- ----------
871
- times : list-like
872
- t-length array of datetime objects
873
- values : numpy.ndarray
874
- array with shape (t,d) containing values at (t)emperatures and (d)epths
875
- **kwargs : dict
876
- Extra arguments to parent class: refer to :py:class:`~tsp.core.TSP` documentation for a
877
- list of all possible arguments.
878
- """
879
-
880
- def __init__(self, times, values, **kwargs):
881
- depths = np.arange(0, values.shape[1]) + 1
882
- super().__init__(times=times, depths=depths, values=values, **kwargs)
883
-
884
- @property
885
- def depths(self) -> np.ndarray:
886
- """Depth indices
887
-
888
- Returns
889
- -------
890
- numpy.ndarray
891
- An array of depth indices
892
- """
893
- warnings.warn("This TSP uses indices (1,2,3,...) instad of depths. Use set_depths() to use measured depths.", stacklevel=2)
894
- return self._depths
895
-
896
- @depths.setter
897
- def depths(self, value):
898
- TSP.depths.__set__(self, value)
899
-
900
- def set_depths(self, depths: np.ndarray):
901
- """Assign depth values to depth indices. Change the object to a :py:class:`~tsp.core.TSP`
902
-
903
- Parameters
904
- ----------
905
- depths : np.ndarray
906
- An array or list of depth values equal in lenth to the depth indices
907
- """
908
- self.depths = depths
909
- self.__class__ = TSP
910
-
911
-
912
-
913
- def span(S: pd.Series) -> float:
914
- first = S.first_valid_index() # type: pd.Timestamp
915
- last = S.last_valid_index() # type: pd.Timestamp
916
- if first is None or last is None:
917
- return 0
918
-
919
- return (last - first).total_seconds()
920
-
921
- def min_span_mask(R: "pd.core.resample.DatetimeIndexResampler",
922
- threshold: float) -> "pd.DataFrame":
923
- s = R.apply(lambda x: span(x))
924
- return s < threshold
925
-
926
-
927
- def gap(S: pd.Series) -> float:
928
-
929
- d = np.diff(S.dropna().index)
930
- if len(d) == 0:
931
- return 0
932
- elif len(d) == 1:
933
- return 0
934
- elif len(d) > 1:
935
- gap = max(d).astype('timedelta64[s]').astype(float)
936
- return gap
937
-
938
-
939
- def max_gap_mask(R: "pd.core.resample.DatetimeIndexResampler",
940
- threshold: float) -> "pd.DataFrame":
941
- g = R.apply(lambda x: gap(x))
942
- return (g > threshold) | (g == 0)
943
-
944
-
945
-
946
-
947
- def _temporal_gap_mask(grouped: "pd.core.groupby.DataFrameGroupBy", max_gap: Optional[int], min_span: Optional[int]) -> np.ndarray:
948
- """ Mask out observational groups in which there is more than a certain size temporal gap
949
-
950
- Controls for gaps in the data within an aggregation group (using max_gap) and missing data at the beginning
951
- or end of the aggregation group (using min_span).
952
-
953
- Parameters
954
- ----------
955
- grouped : pandas.core.groupby.DataFrameGroupBy
956
- groupby with 'time' and 'depth' columns
957
- max_gap : int
958
- maximum gap in seconds to tolerate between observations in a group
959
- min_span : int
960
- minimum data range (beginning to end) in seconds.
961
-
962
- Returns
963
- -------
964
- numpy.ndarray
965
- boolean array with ``True`` where measurement spacing or range in group does not satisfy tolerances
966
- """
967
- if max_gap is not None:
968
- max_diff = grouped.time.apply(np.diff).apply(lambda x: np.max(x, initial=np.timedelta64(0))).apply(lambda x: x.total_seconds())
969
- max_diff = max_diff.unstack().to_numpy()
970
- diff_mask = np.where((max_diff == 0) | (max_diff >= max_gap), True, False)
971
- else:
972
- diff_mask = np.zeros_like(grouped, dtype=bool)
973
-
974
- if min_span is not None:
975
- total_span = grouped.time.apply(np.ptp).apply(lambda x: x.total_seconds()).unstack().to_numpy()
976
- span_mask = np.where(total_span < min_span, True, False)
977
- else:
978
- span_mask = np.zeros_like(grouped, dtype=bool)
979
-
980
- mask = diff_mask * span_mask
981
-
982
- return mask
983
-
984
-
985
- def _observation_count_mask(number_of_observations: np.ndarray, min_count:int) -> np.ndarray:
986
- """ Create a mask array for an
987
-
988
- Parameters
989
- ----------
990
- number_of_observations : numpy.ndarray
991
- Array of how many data points are in aggregation
992
- min_count : int
993
- Minimum number of data points for aggregation to be 'valid'
994
-
995
- Returns
996
- -------
997
- np.ndarray
998
- a mask, True where data should be masked
999
- """
1000
- valid = np.less(number_of_observations, min_count) # type: np.ndarray
1001
- return valid
1002
-
1003
-
1004
- def handle_incoming_times(times: "Union[np.ndarray, pd.DatetimeIndex, pd.Series, list]") -> "pd.DatetimeIndex":
1005
- """Convert a list of times to a pandas DatetimeIndex object"""
1006
- invalid_msg = "Times must be a list, numpy array, pandas DatetimeIndex, or pandas Series"
1007
-
1008
- try:
1009
- if not len(times):
1010
- raise ValueError(invalid_msg)
1011
- except TypeError:
1012
- raise ValueError(invalid_msg)
1013
-
1014
- if isinstance(times, pd.DatetimeIndex):
1015
- return times
1016
-
1017
- if isinstance(times, pd.Series):
1018
- try:
1019
- times = pd.DatetimeIndex(times)
1020
- except Exception:
1021
- raise ValueError("Series must be convertible to DatetimeIndex")
1022
- times.name = 'time'
1023
-
1024
- return times
1025
-
1026
- elif isinstance(times, np.ndarray):
1027
- times = pd.to_datetime(times)
1028
- times.name = 'time'
1029
- return times
1030
-
1031
- elif isinstance(times, list):
1032
- return pd.to_datetime(times)
1033
-
1034
- else:
1035
- raise ValueError(invalid_msg)
1
+ from __future__ import annotations
2
+
3
+ import pandas as pd
4
+ import re
5
+ import inspect
6
+ import numpy as np
7
+ import functools
8
+ import warnings
9
+
10
+ try:
11
+ import netCDF4 as nc
12
+
13
+ try:
14
+ from pfit.pfnet_standard import make_temperature_base
15
+ except ModuleNotFoundError:
16
+ warnings.warn("Missing pfit library. Some functionality will be limited.", stacklevel=2)
17
+
18
+ except ModuleNotFoundError:
19
+ warnings.warn("Missing netCDF4 library. Some functionality will be limited.", stacklevel=2)
20
+
21
+ from typing import Union, Optional
22
+ from numpy.typing import NDArray
23
+ from datetime import datetime, tzinfo, timezone, timedelta
24
+
25
+ import tsp
26
+ import tsp.labels as lbl
27
+ import tsp.tspwarnings as tw
28
+
29
+ from tsp.physics import analytical_fourier
30
+ from tsp.plots.static import trumpet_curve, colour_contour, time_series, profile_evolution
31
+ from tsp.time import format_utc_offset
32
+ from tsp.time import get_utc_offset
33
+ from tsp.misc import completeness
34
+ from tsp.concatenation import _tsp_concat
35
+
36
+ from matplotlib.figure import Figure
37
+
38
+
39
+ class TSP:
40
+ """ A Time Series Profile (a collection of time series data at different depths)
41
+
42
+ A TSP can also be:
43
+ Thermal State of Permafrost
44
+ Temperature du Sol en Profondeur
45
+ Temperatures, Secondes, Profondeurs
46
+
47
+ Parameters
48
+ ----------
49
+ times : pandas.DatetimeIndex
50
+ DatetimeIndex with optional UTC offset. List-like array of datetime objects can also be passed,
51
+ but will be converted to a DatetimeIndex with no UTC offset.
52
+ depths : list-like
53
+ d-length array of depths
54
+ values : numpy.ndarray
55
+ array with shape (t,d) containing values at (t)emperatures and (d)epths
56
+ longitude : float, optional
57
+ Longitude at which data were collected
58
+ latitude : float, optional
59
+ Latitude at which data were collected
60
+ site_id : str, optional
61
+ Name of location at which data were collected
62
+ metadata : dict
63
+ Additional metadata
64
+
65
+ Attributes
66
+ ----------
67
+ values
68
+ latitude : float
69
+ Latitude at which data were collected
70
+ longitude : float
71
+ Longitude at which data were collected
72
+ metadata : dict
73
+ Additional metadata provided at instantiation or by other methods
74
+ """
75
+
76
+ def __repr__(self) -> str:
77
+ return repr(self.wide)
78
+
79
+ def __str__(self) -> str:
80
+ return str(self.wide)
81
+
82
+ def __add__(self, other: TSP) -> TSP:
83
+ """ Concatenate two TSP objects along the time axis.
84
+ The two TSP objects must have the same depths and the same UTC offset.
85
+
86
+ Parameters
87
+ ----------
88
+ other : TSP
89
+ Another TSP object to concatenate with this one
90
+
91
+ Returns
92
+ -------
93
+ TSP
94
+ A new TSP object with the concatenated data
95
+ """
96
+ if not isinstance(other, TSP):
97
+ raise TypeError("Can only concatenate TSP objects.")
98
+
99
+ if self.utc_offset != other.utc_offset:
100
+ raise ValueError("UTC offsets must be the same to concatenate.")
101
+
102
+ return tsp_concat([self, other])
103
+
104
+ def __init__(self, times, depths, values,
105
+ latitude: Optional[float]=None,
106
+ longitude: Optional[float]=None,
107
+ site_id: Optional[str]=None,
108
+ metadata: dict={}):
109
+
110
+ self._times = handle_incoming_times(times)
111
+ if self._times.duplicated().any():
112
+ warnings.warn(tw.DuplicateTimesWarning(self._times), stacklevel=2)
113
+
114
+ if self.utc_offset:
115
+ self._output_utc_offset = self.utc_offset
116
+ else:
117
+ self._output_utc_offset = None
118
+
119
+ self._depths = np.atleast_1d(depths)
120
+ self._values = np.atleast_2d(values)
121
+ self.__number_of_observations = np.ones_like(values, dtype=int)
122
+ self.__number_of_observations[np.isnan(values)] = 0
123
+ self.metadata = metadata
124
+ self.latitude = latitude
125
+ self.longitude = longitude
126
+ self.site_id = site_id
127
+ self._freq = None
128
+ self._completeness = None
129
+
130
+ self._export_precision = 3
131
+
132
+ @property
133
+ def freq(self) -> Optional[int]:
134
+ """ Measurement frequency [s] """
135
+ return self._freq
136
+
137
+ @freq.setter
138
+ def freq(self, value: int):
139
+ if not isinstance(value, int):
140
+ raise TypeError("Must be string, e.g. '1D', '3600s'")
141
+ self._freq = value
142
+
143
+ @property
144
+ def completeness(self) -> Optional[pd.DataFrame]:
145
+ """ Data completeness """
146
+ return self._completeness
147
+
148
+ @completeness.setter
149
+ def completeness(self, value):
150
+ raise ValueError("You can't assign this variable.")
151
+
152
+ @classmethod
153
+ def from_tidy_format(cls, times, depths, values,
154
+ number_of_observations=None,
155
+ latitude: Optional[float]=None,
156
+ longitude: Optional[float]=None,
157
+ site_id: Optional[str]=None,
158
+ metadata:dict={}):
159
+ """ Create a TSP from data in a 'tidy' or 'long' format
160
+
161
+ Parameters
162
+ ----------
163
+ times : list-like
164
+ n-length array of datetime objects
165
+ depths : list-like
166
+ n-length array of depths
167
+ values : numpy.ndarray
168
+ n-length array of (temperaure) values at associated time and depth
169
+ number_of_observations : numpy.ndarray, optional
170
+ n-length array of number of observations at associated time and
171
+ depth for aggregated values (default: 1)
172
+ longitude : float, optional
173
+ Longitude at which data were collected
174
+ latitude : float, optional
175
+ Latitude at which data were collected
176
+ site_id : str, optional
177
+ Name of location at which data were collected
178
+ metadata : dict
179
+ Additional metadata
180
+ """
181
+ times = np.atleast_1d(times)
182
+ depths = np.atleast_1d(depths)
183
+ values = np.atleast_1d(values)
184
+
185
+ number_of_observations = number_of_observations if number_of_observations else np.ones_like(values)
186
+ df = pd.DataFrame({"times": times, "depths": depths, "temperature_in_ground": values, "number_of_observations": number_of_observations})
187
+ df.set_index(["times", "depths"], inplace=True)
188
+
189
+ try:
190
+ unstacked = df.unstack()
191
+ except ValueError as e:
192
+ if np.any(df.index.duplicated()):
193
+ print(f"Duplicate data found at {df.iloc[np.where(df.index.duplicated())[0], :].index.get_level_values(0).unique()}")
194
+ raise e
195
+
196
+ temps = unstacked.get('temperature_in_ground')
197
+
198
+ this = cls(times=temps.index.values,
199
+ depths=temps.columns.values,
200
+ values=temps.values,
201
+ latitude=latitude,
202
+ longitude=longitude,
203
+ site_id=site_id,
204
+ metadata=metadata)
205
+
206
+ number_of_observations = unstacked.get('number_of_observations').values
207
+
208
+ number_of_observations[np.isnan(number_of_observations)] = 0
209
+ this.__number_of_observations = number_of_observations
210
+ return this
211
+
212
+ @classmethod
213
+ def __from_tsp(cls, t:TSP, **kwargs) -> "TSP":
214
+ """ Use an existing TSP object as a template, """
215
+ kw = {}
216
+ for arg in inspect.getfullargspec(TSP).args[1:]:
217
+ if kwargs.get(arg) is not None:
218
+ kw[arg] = kwargs.get(arg)
219
+ else:
220
+ kw[arg] = getattr(t, arg)
221
+
222
+ t = TSP(**kw)
223
+
224
+ return t
225
+
226
+ @classmethod
227
+ def from_json(cls, json_file) -> "TSP":
228
+ """ Read data from a json file
229
+
230
+ Parameters
231
+ ----------
232
+ json_file : str
233
+ Path to a json file from which to read
234
+ """
235
+ df = pd.read_json(json_file)
236
+ depth_pattern = r"^(-?[0-9\.]+)$"
237
+
238
+ times = pd.to_datetime(df['time']).values
239
+ depths = [re.search(depth_pattern, c).group(1) for c in df.columns if tsp._is_depth_column(c, depth_pattern)]
240
+ values = df.loc[:, depths].to_numpy()
241
+
242
+ t = cls(times=times, depths=depths, values=values)
243
+
244
+ return t
245
+
246
+ @classmethod
247
+ def synthetic(cls, depths: NDArray[np.number],
248
+ start:str ="2000-01-01",
249
+ end:str ="2003-01-01",
250
+ freq: "str"="D",
251
+ Q:float=0.2,
252
+ c:float=1.6e6,
253
+ k:float=2.5,
254
+ A:float=6,
255
+ MAGST:float=-0.5) -> "TSP":
256
+ """
257
+ Create a 'synthetic' temperature time series using the analytical solution to the heat conduction equation.
258
+ Suitable for testing
259
+
260
+ Parameters
261
+ ----------
262
+ depths : np.ndarray
263
+ array of depths in metres
264
+ start : str
265
+ start date for the time series, in the format "YYYY-MM-DD"
266
+ end : str
267
+ end date for the time series, in the format "YYYY-MM-DD"
268
+ freq : str
269
+ pandas frequency string, e.g. "D" for daily, "H" for hourly, etc.
270
+ Q : Optional[float], optional
271
+ Ground heat flux [W m-2], by default 0.2
272
+ c : Optional[float], optional
273
+ heat capacity [J m-3 K-1], by default 1.6e6
274
+ k : Optional[float], optional
275
+ thermal conductivity [W m-1 K-1], by default 2.5
276
+ A : Optional[float], optional
277
+ Amplitude of temperature fluctuation [C], by default 6
278
+ MAGST : Optional[float], optional
279
+ Mean annual ground surface temperature [C], by default -0.5
280
+
281
+ Returns
282
+ -------
283
+ TSP
284
+ A timeseries profile (TSP) object
285
+ """
286
+ times = pd.date_range(start=start, end=end, freq=freq).to_pydatetime()
287
+ t_sec = np.array([(t-times[0]).total_seconds() for t in times])
288
+
289
+ values = analytical_fourier(depths=depths,
290
+ times=t_sec,
291
+ Q=Q,
292
+ c=c,
293
+ k=k,
294
+ A=A,
295
+ MAGST=MAGST)
296
+
297
+ this = cls(depths=depths, times=times, values=values)
298
+
299
+ return this
300
+
301
+ @property
302
+ @functools.lru_cache()
303
+ def long(self) -> "pd.DataFrame":
304
+ """ Return the data in a 'long' or 'tidy' format (one row per observation, one column per variable)
305
+
306
+ Returns
307
+ -------
308
+ pandas.DataFrame
309
+ Time series profile data with columns:
310
+ - **time**: time
311
+ - **depth**: depth
312
+ - **temperature_in_ground**: temperature
313
+ - **number_of_observations**: If data are aggregated, how many observations are used in the aggregation
314
+ """
315
+ values = self.wide.melt(id_vars='time',
316
+ var_name="depth",
317
+ value_name="temperature_in_ground")
318
+
319
+ number_of_observations = self.number_of_observations.melt(id_vars='time',
320
+ var_name="depth",
321
+ value_name="number_of_observations")
322
+
323
+ values['number_of_observations'] = number_of_observations['number_of_observations']
324
+
325
+ return values
326
+
327
+ @property
328
+ @functools.lru_cache()
329
+ def wide(self) -> "pd.DataFrame":
330
+ """ Return the data in a 'wide' format (one column per depth)
331
+
332
+ Returns
333
+ -------
334
+ pandas.DataFrame
335
+ Time series profile data
336
+ """
337
+ tabular = pd.DataFrame(self._values)
338
+ tabular.columns = self._depths
339
+ tabular.index = self.times
340
+ tabular.insert(0, "time", self.times)
341
+
342
+ return tabular
343
+
344
+ @property
345
+ @functools.lru_cache()
346
+ def number_of_observations(self) -> "pd.DataFrame":
347
+ """ The number of observations for an average at a particular depth or time.
348
+
349
+ For pure observational data, the number of observations will always be '1'. When data are aggregated,
350
+ (e.g. using :py:meth:`~tsp.core.TSP.monthly` or :py:meth:`~tsp.core.TSP.daily`) these numbers
351
+ will be greater than 1.
352
+
353
+ Returns
354
+ -------
355
+ DataFrame
356
+ Number of observations
357
+ """
358
+ tabular = pd.DataFrame(self.__number_of_observations, dtype=int)
359
+ tabular.columns = self._depths
360
+ tabular.index = self._times
361
+ tabular.insert(0, "time", self._times)
362
+
363
+ return tabular
364
+
365
+ @number_of_observations.setter
366
+ def number_of_observations(self, value):
367
+ raise ValueError(f"You can't assign {value} to this variable (no assignment allowed).")
368
+
369
+ def reset_counts(self):
370
+ """ Set observation count to 1 if data exists, 0 otherwise """
371
+ self.__number_of_observations = (~self.wide.isna()).astype('boolean')
372
+
373
+ def set_utc_offset(self, offset:"Union[int,str]") -> None:
374
+ """ Set the time zone of the data by providing a UTC offset
375
+
376
+ Parameters
377
+ ----------
378
+ offset : int, str
379
+ If int, the number of seconds. If str, a string in the format "+HH:MM" or "-HH:MM"
380
+ """
381
+ if self.utc_offset is not None:
382
+ raise ValueError("You can only set the UTC offset once.")
383
+
384
+ utc_offset = get_utc_offset(offset)
385
+
386
+ tz = timezone(timedelta(seconds = utc_offset))
387
+ self._times = self._times.tz_localize(tz)
388
+ self._output_utc_offset = timezone(timedelta(seconds = utc_offset))
389
+
390
+ TSP.wide.fget.cache_clear()
391
+ TSP.long.fget.cache_clear()
392
+
393
+ @property
394
+ def utc_offset(self) -> "Optional[tzinfo]":
395
+ """ Get the time zone of the data by providing a UTC offset
396
+
397
+ Returns
398
+ -------
399
+ datetime.tzinfo
400
+ A timezone object
401
+ """
402
+ if self._times.tz is None:
403
+ return None
404
+ else:
405
+ return self._times.tz
406
+
407
+ @utc_offset.setter
408
+ def utc_offset(self, value):
409
+ self.set_utc_offset(value)
410
+
411
+ @property
412
+ def output_utc_offset(self) -> "Optional[tzinfo]":
413
+ """ Get the time zone in which to output or display the data by providing a UTC offset
414
+
415
+ Returns
416
+ -------
417
+ datetime.tzinfo
418
+ A timezone object
419
+ """
420
+ if self._output_utc_offset is None:
421
+ return None
422
+ else:
423
+ return self._output_utc_offset
424
+
425
+ @output_utc_offset.setter
426
+ def output_utc_offset(self, offset:"Union[int,str]") -> None:
427
+ self.set_output_utc_offset(offset)
428
+
429
+ def set_output_utc_offset(self, offset:"Union[int,str]") -> None:
430
+ """ Set the time zone in which to display the output or data by providing a UTC offset
431
+ Parameters
432
+ ----------
433
+ offset : int, str
434
+ If int, the number of seconds. If str, a string in the format "+HH:MM" or "-HH:MM"
435
+ """
436
+ utc_offset = get_utc_offset(offset)
437
+ tz = timezone(timedelta(seconds = utc_offset))
438
+ self._output_utc_offset = tz
439
+
440
+ TSP.wide.fget.cache_clear()
441
+ TSP.long.fget.cache_clear()
442
+
443
+ def reset_output_utc_offset(self) -> None:
444
+ """ Reset the time zone in which to output or display the data to the default (the one set by set_utc_offset)
445
+
446
+ """
447
+ if self.utc_offset is None:
448
+ raise ValueError("You can't reset the output time zone if the time zone of the data hasn't yet been set with set_utc_offset.")
449
+ else:
450
+ self._output_utc_offset = self.utc_offset
451
+
452
+ def __nly(self,
453
+ freq_fmt:str,
454
+ new_freq,
455
+ min_count:Optional[int],
456
+ max_gap:Optional[int],
457
+ min_span:Optional[int]) -> TSP:
458
+ """
459
+ Temporal aggregation by grouping according to a string-ified time
460
+
461
+ Parameters
462
+ ----------
463
+ freq_fmt : str
464
+ Python date format string used to aggregate and recover time
465
+
466
+ Returns
467
+ -------
468
+ tuple[pd.DataFrame, pd.DataFrame]
469
+ A tuple of dataframes, the first containing the aggregated data, the second containing the number of observations
470
+ """
471
+ R = self.wide.drop("time", axis=1).resample(freq_fmt)
472
+ cumulative_obs = self.number_of_observations.drop("time", axis=1).resample(freq_fmt).sum()
473
+ total_obs = R.count()
474
+ values = R.mean()
475
+
476
+ # Calculate masks
477
+ mc_mask = Mg_mask = ms_mask = pd.DataFrame(index=values.index, columns=values.columns, data=False)
478
+
479
+ if min_count is not None:
480
+ mc_mask = (cumulative_obs < min_count)
481
+ if max_gap is not None:
482
+ Mg_mask = max_gap_mask(R, max_gap)
483
+ if min_span is not None:
484
+ ms_mask = min_span_mask(R, min_span)
485
+
486
+ mask = (mc_mask | Mg_mask | ms_mask)
487
+ values[mask] = np.nan
488
+
489
+ # Construct TSP
490
+ t = TSP.__from_tsp(self, times=values.index,
491
+ depths=values.columns,
492
+ values=values.values)
493
+ t.__number_of_observations = cumulative_obs
494
+ t.freq = new_freq
495
+
496
+ # Calculate data completeness
497
+ if self.freq is not None:
498
+ f1 = self.freq
499
+ f2 = new_freq
500
+ t._completeness = completeness(total_obs, f1, f2)
501
+
502
+ return t
503
+
504
+ def monthly(self,
505
+ min_count:Optional[int]=24,
506
+ max_gap:Optional[int]=3600*24*8,
507
+ min_span:Optional[int]=3600*24*21) -> "TSP":
508
+ """ Monthly averages, possibly with some months unavailable (NaN) if there is insufficient data
509
+
510
+ Parameters
511
+ ----------
512
+ min_count : int
513
+ Minimum number of observations in a month to be considered a valid average,
514
+ defaults to None
515
+ max_gap : int
516
+ Maximum gap (in seconds) between data points to be considered a valid average,
517
+ defaults to None
518
+ min_span : int
519
+ Minimum total data range (in seconds) to be consiered a valid average,
520
+ defaults to None
521
+
522
+ Returns
523
+ -------
524
+ TSP
525
+ A TSP object with data aggregated to monthly averages
526
+ """
527
+ t = self.__nly(freq_fmt="M",
528
+ new_freq=lbl.MONTHLY,
529
+ min_count=min_count,
530
+ max_gap=max_gap,
531
+ min_span=min_span)
532
+
533
+ return t
534
+
535
+ def daily(self,
536
+ min_count:Optional[int]=None,
537
+ max_gap:Optional[int]=None,
538
+ min_span:Optional[int]=None) -> "TSP":
539
+ """ Daily averages, possibly with some days unavailable (NaN) if there is insufficient data
540
+
541
+ Parameters
542
+ ----------
543
+ min_count : int
544
+ Minimum number of observations in a day to be considered a valid average,
545
+ defaults to None
546
+ max_gap : int
547
+ Maximum gap (in seconds) between data points to be considered a valid average, defaults to None
548
+ min_span : int
549
+ Minimum total data range (in seconds) to be consiered a valid average, defaults to None
550
+
551
+ Returns
552
+ -------
553
+ TSP
554
+ A TSP object with data aggregated to daily averages
555
+ """
556
+ # if the data is already daily +/- 1min , just return it
557
+ t = self.__nly(freq_fmt="D",
558
+ new_freq=lbl.DAILY,
559
+ min_count=min_count,
560
+ max_gap=max_gap,
561
+ min_span=min_span)
562
+
563
+ return t
564
+
565
+ def yearly(self,
566
+ min_count:Optional[int]=None,
567
+ max_gap:Optional[int]=None,
568
+ min_span:Optional[int]=None) -> "TSP":
569
+ """ Yearly averages, possibly with some years unavailable (NaN) if there is insufficient data
570
+
571
+ Parameters
572
+ ----------
573
+ min_count : int
574
+ Minimum number of observations in a month to be considered a valid average, defaults to None
575
+ max_gap : int
576
+ Maximum gap (in seconds) between data points to be considered a valid average, defaults to None
577
+ min_span : int
578
+ Minimum total data range (in seconds) to be consiered a valid average, defaults to None
579
+
580
+ Returns
581
+ -------
582
+ TSP
583
+ A TSP object with data aggregated to yearly averages
584
+ """
585
+ t = self.__nly(freq_fmt="Y",
586
+ new_freq=lbl.YEARLY,
587
+ min_count=min_count,
588
+ max_gap=max_gap,
589
+ min_span=min_span)
590
+
591
+ return t
592
+
593
+ @property
594
+ def depths(self) -> NDArray[np.number]:
595
+ """ Return the depth values in the profile
596
+
597
+ Returns
598
+ -------
599
+ numpy.ndarray
600
+ The depths in the profile
601
+ """
602
+ return self._depths
603
+
604
+ @depths.setter
605
+ def depths(self, value):
606
+ depths = np.atleast_1d(value)
607
+
608
+ if not len(depths) == len(self._depths):
609
+ raise ValueError(f"List of depths must have length of {len(self._depths)}.")
610
+
611
+ self._depths = depths
612
+
613
+ TSP.wide.fget.cache_clear()
614
+ TSP.long.fget.cache_clear()
615
+
616
+ @property
617
+ def times(self):
618
+ """ Return the timestamps in the time series
619
+
620
+ Returns
621
+ -------
622
+ pandas.DatetimeIndex
623
+ The timestamps in the time series
624
+ """
625
+ if self.utc_offset is None:
626
+ return self._times
627
+
628
+ elif self._output_utc_offset == self.utc_offset:
629
+ return self._times
630
+
631
+ else:
632
+ return self._times.tz_convert(self.output_utc_offset)
633
+
634
+ @property
635
+ def values(self):
636
+ return self._values
637
+
638
+ def to_gtnp(self, filename: str) -> None:
639
+ """ Write the data in GTN-P format
640
+
641
+ Parameters
642
+ ----------
643
+ filename : str
644
+ Path to the file to write to
645
+ """
646
+ df = self.wide.round(self._export_precision).rename(columns={'time': 'Date/Depth'})
647
+ df['Date/Depth'] = df['Date/Depth'].dt.strftime("%Y-%m-%d %H:%M:%S")
648
+
649
+ df.to_csv(filename, index=False, na_rep="-999")
650
+
651
+ def to_ntgs(self, filename:str, project_name:str="", site_id:"Optional[str]" = None, latitude:"Optional[float]"=None, longitude:"Optional[float]"=None) -> None:
652
+ """ Write the data in NTGS template format
653
+
654
+ Parameters
655
+ ----------
656
+ filename : str
657
+ Path to the file to write to
658
+ project_name : str, optional
659
+ The project name, by default ""
660
+ site_id : str, optional
661
+ The name of the site , by default None
662
+ latitude : float, optional
663
+ WGS84 latitude at which the observations were recorded, by default None
664
+ longitude : float, optional
665
+ WGS84 longitude at which the observations were recorded, by default None
666
+ """
667
+ if latitude is None:
668
+ latitude = self.latitude if self.latitude is not None else ""
669
+
670
+ if longitude is None:
671
+ longitude = self.longitude if self.longitude is not None else ""
672
+
673
+ if site_id is None:
674
+ site_id = self.site_id if self.site_id is not None else ""
675
+ data = self.values
676
+
677
+ df = pd.DataFrame({'project_name': pd.Series(dtype='str'),
678
+ 'site_id': pd.Series(dtype='str'),
679
+ 'latitude': pd.Series(dtype='float'),
680
+ 'longitude': pd.Series(dtype='float')
681
+ })
682
+
683
+ df["date_YYYY-MM-DD"] = pd.Series(self.times).dt.strftime(r"%Y-%m-%d")
684
+ df["time_HH:MM:SS"] = pd.Series(self.times).dt.strftime(r"%H:%M:%S")
685
+
686
+ df["project_name"] = project_name
687
+ df["site_id"] = site_id
688
+ df["latitude"] = latitude
689
+ df["longitude"] = longitude
690
+
691
+ headers = [str(d) + "_m" for d in self.depths]
692
+
693
+ for i, h in enumerate(headers):
694
+ df[h] = data[:, i].round(self._export_precision)
695
+
696
+ df.to_csv(filename, index=False)
697
+
698
+ def to_netcdf(self, file: str) -> None:
699
+ """ Write the data as a netcdf"""
700
+ try:
701
+ ncf = make_temperature_base(file, len(self.depths))
702
+ except NameError:
703
+ warnings.warn("Missing required packages. Try installing with `pip install tsp[nc]`", stacklevel=2)
704
+ return
705
+
706
+ with nc.Dataset(ncf, 'a') as ncd:
707
+ pytime = self.times.to_pydatetime()
708
+
709
+ ncd['depth_below_ground_surface'][:] = self.depths
710
+
711
+
712
+ ncd['time'][:] = nc.date2num(pytime, ncd['time'].units, ncd['time'].calendar)
713
+ ncd['ground_temperature'][:] = self.values
714
+
715
+ if self.latitude:
716
+ ncd['latitude'][:] = self.latitude
717
+ if self.longitude:
718
+ ncd['longitude'][:] = self.longitude
719
+ if self.site_id:
720
+ ncd['site_name'] = self.site_id
721
+
722
+ for key, value in self.metadata:
723
+ try:
724
+ ncd.setncattr(key, value)
725
+ except Exception:
726
+ warnings.warn(f"Could not set metadata item: {key}", stacklevel=2)
727
+
728
+ def to_json(self, file: str) -> None:
729
+ """ Write the data to a serialized json file """
730
+ with open(file, 'w') as f:
731
+ f.write(self._to_json())
732
+
733
+ def _to_json(self) -> str:
734
+ return self.wide.round(self._export_precision).to_json()
735
+
736
+ def plot_profiles(self, P:int=100, n:int=10) -> Figure:
737
+ """ Create a plot of the temperature profiles at different times
738
+
739
+ Parameters
740
+ ----------
741
+ P : int
742
+ Percentage of time range to plot
743
+ n : int
744
+ Number of evenly-spaced profiles to plot
745
+
746
+ Returns
747
+ -------
748
+ Figure
749
+ matplotlib `Figure` object
750
+ """
751
+ fig = profile_evolution(depths=self.depths, times=self.times, values=self._values, P=P, n=n)
752
+ fig.show()
753
+ return fig
754
+
755
+ def plot_trumpet(self,
756
+ year: Optional[int]=None,
757
+ begin: Optional[datetime]=None,
758
+ end: Optional[datetime]=None,
759
+ min_completeness: Optional[float]=None,
760
+ **kwargs) -> Figure:
761
+ """ Create a trumpet plot from the data
762
+
763
+ Parameters
764
+ ----------
765
+ year : int, optional
766
+ Which year to plot
767
+ begin : datetime, optional
768
+ If 'end' also provided, the earliest measurement to include in the averaging for the plot
769
+ end : datetime, optional
770
+ If 'begin' also provided, the latest measurement to include in the averaging for the plot
771
+ min_completeness : float, optional
772
+ If provided, the minimum completeness (fractional, 0 to 1) required to include
773
+ in temperature envelope, otherwise
774
+ the point is plotted as an unconnected, slightly transparent dot, by default None
775
+ **kwargs : dict, optional
776
+ Extra arguments to the plotting function: refer to the documentation for :func:`~tsp.plots.static.trumpet_curve` for a
777
+ list of all possible arguments.
778
+
779
+ Returns
780
+ -------
781
+ Figure
782
+ a matplotlib `Figure` object
783
+ """
784
+ df = self.long.dropna()
785
+
786
+ if year is not None:
787
+ df = df[df['time'].dt.year == year]
788
+
789
+ elif begin is not None or end is not None:
790
+ raise NotImplementedError
791
+
792
+ else:
793
+ raise ValueError("One of 'year', 'begin', 'end' must be provided.")
794
+
795
+ grouped = df.groupby('depth')
796
+
797
+ max_t = grouped.max().get('temperature_in_ground').values
798
+ min_t = grouped.min().get('temperature_in_ground').values
799
+ mean_t = grouped.mean().get('temperature_in_ground').values
800
+ depth = np.array([d for d in grouped.groups.keys()])
801
+
802
+ # Calculate completeness
803
+ c = self.yearly(None, None, None).completeness
804
+
805
+ if min_completeness is not None and c is not None:
806
+ C = c[c.index.year == year]
807
+ C = C[depth].iloc[0,:].values
808
+
809
+ else:
810
+ C = None
811
+
812
+ fig = trumpet_curve(depth=depth,
813
+ t_max=max_t,
814
+ t_min=min_t,
815
+ t_mean=mean_t,
816
+ min_completeness=min_completeness,
817
+ data_completeness=C,
818
+ **kwargs)
819
+ fig.show()
820
+
821
+ return fig
822
+
823
+ def plot_contour(self, **kwargs) -> Figure:
824
+ """ Create a contour plot
825
+
826
+ Parameters
827
+ ----------
828
+ **kwargs : dict, optional
829
+ Extra arguments to the plotting function: refer to the documentation for :func:`~tsp.plots.static.colour_contour` for a
830
+ list of all possible arguments.
831
+
832
+ Returns
833
+ -------
834
+ Figure
835
+ matplotlib `Figure` object
836
+ """
837
+ fig = colour_contour(depths=self.depths, times=self.times, values=self._values, **kwargs)
838
+
839
+ if self.output_utc_offset is not None:
840
+ label = format_utc_offset(self.output_utc_offset)
841
+ if label != "UTC":
842
+ label = f"UTC{label}"
843
+ fig.axes[0].set_xlabel(f"Time [{label}]")
844
+
845
+ fig.show()
846
+
847
+ return fig
848
+
849
+ def plot_timeseries(self, depths: list=[], **kwargs) -> Figure:
850
+ """Create a time series T(t) plot
851
+
852
+ Parameters
853
+ ----------
854
+ depths : list, optional
855
+ If non-empty, restricts the depths to include in the plot, by default []
856
+ **kwargs : dict, optional
857
+ Extra arguments to the plotting function: refer to the documentation for :func:`~tsp.plots.static.time_series` for a
858
+ list of all possible arguments.
859
+
860
+ Returns
861
+ -------
862
+ Figure
863
+ matplotlib `Figure` object
864
+ """
865
+ if depths == []:
866
+ depths = self.depths
867
+
868
+ d_mask = np.isin(self.depths, depths)
869
+
870
+ fig = time_series(self.depths[d_mask], self.times, self.values[:, d_mask], **kwargs)
871
+
872
+
873
+ if self.output_utc_offset is not None:
874
+ label = format_utc_offset(self.output_utc_offset)
875
+ if label != "UTC":
876
+ label = f"UTC{label}"
877
+ fig.axes[0].set_xlabel(f"Time [{label}]")
878
+ fig.autofmt_xdate()
879
+ fig.show()
880
+
881
+ return fig
882
+
883
+
884
+ class AggregatedTSP(TSP):
885
+ """ A Time Series Profile that uses indices (1,2,3,...) instead of depth values.
886
+
887
+ Used in situations when depths are unknown (such as when reading datlogger exports
888
+ that don't have depth measurements.)
889
+
890
+ Parameters
891
+ ----------
892
+ times : list-like
893
+ t-length array of datetime objects
894
+ values : numpy.ndarray
895
+ array with shape (t,d) containing values at (t)emperatures and (d)epths
896
+ **kwargs : dict
897
+ Extra arguments to parent class: refer to :py:class:`tsp.core.TSP` documentation for a
898
+ list of all possible arguments.
899
+ """
900
+
901
+
902
+ class IndexedTSP(TSP):
903
+ """ A Time Series Profile that uses indices (1,2,3,...) instead of depth values.
904
+
905
+ Used in situations when depths are unknown (such as when reading datlogger exports
906
+ that don't have depth measurements.)
907
+
908
+ Parameters
909
+ ----------
910
+ times : list-like
911
+ t-length array of datetime objects
912
+ values : numpy.ndarray
913
+ array with shape (t,d) containing values at (t)emperatures and (d)epths
914
+ **kwargs : dict
915
+ Extra arguments to parent class: refer to :py:class:`~tsp.core.TSP` documentation for a
916
+ list of all possible arguments.
917
+ """
918
+
919
+ def __init__(self, times, values, **kwargs):
920
+ depths = np.arange(0, values.shape[1]) + 1
921
+ super().__init__(times=times, depths=depths, values=values, **kwargs)
922
+
923
+ @property
924
+ def depths(self) -> np.ndarray:
925
+ """Depth indices
926
+
927
+ Returns
928
+ -------
929
+ numpy.ndarray
930
+ An array of depth indices
931
+ """
932
+ warnings.warn("This TSP uses indices (1,2,3,...) instad of depths. Use set_depths() to use measured depths.", stacklevel=2)
933
+ return self._depths
934
+
935
+ @depths.setter
936
+ def depths(self, value):
937
+ TSP.depths.__set__(self, value)
938
+
939
+ def set_depths(self, depths: np.ndarray):
940
+ """Assign depth values to depth indices. Change the object to a :py:class:`~tsp.core.TSP`
941
+
942
+ Parameters
943
+ ----------
944
+ depths : np.ndarray
945
+ An array or list of depth values equal in lenth to the depth indices
946
+ """
947
+ self.depths = depths
948
+ self.__class__ = TSP
949
+
950
+
951
+
952
+ def span(S: pd.Series) -> float:
953
+ first = S.first_valid_index() # type: pd.Timestamp
954
+ last = S.last_valid_index() # type: pd.Timestamp
955
+ if first is None or last is None:
956
+ return 0
957
+
958
+ return (last - first).total_seconds()
959
+
960
+ def min_span_mask(R: "pd.core.resample.DatetimeIndexResampler",
961
+ threshold: float) -> "pd.DataFrame":
962
+ s = R.apply(lambda x: span(x))
963
+ return s < threshold
964
+
965
+
966
+ def gap(S: pd.Series) -> float:
967
+
968
+ d = np.diff(S.dropna().index)
969
+ if len(d) == 0:
970
+ return 0
971
+ elif len(d) == 1:
972
+ return 0
973
+ elif len(d) > 1:
974
+ gap = max(d).astype('timedelta64[s]').astype(float)
975
+ return gap
976
+
977
+
978
+ def max_gap_mask(R: "pd.core.resample.DatetimeIndexResampler",
979
+ threshold: float) -> "pd.DataFrame":
980
+ g = R.apply(lambda x: gap(x))
981
+ return (g > threshold) | (g == 0)
982
+
983
+
984
+
985
+
986
+ def _temporal_gap_mask(grouped: "pd.core.groupby.DataFrameGroupBy", max_gap: Optional[int], min_span: Optional[int]) -> np.ndarray:
987
+ """ Mask out observational groups in which there is more than a certain size temporal gap
988
+
989
+ Controls for gaps in the data within an aggregation group (using max_gap) and missing data at the beginning
990
+ or end of the aggregation group (using min_span).
991
+
992
+ Parameters
993
+ ----------
994
+ grouped : pandas.core.groupby.DataFrameGroupBy
995
+ groupby with 'time' and 'depth' columns
996
+ max_gap : int
997
+ maximum gap in seconds to tolerate between observations in a group
998
+ min_span : int
999
+ minimum data range (beginning to end) in seconds.
1000
+
1001
+ Returns
1002
+ -------
1003
+ numpy.ndarray
1004
+ boolean array with ``True`` where measurement spacing or range in group does not satisfy tolerances
1005
+ """
1006
+ if max_gap is not None:
1007
+ max_diff = grouped.time.apply(np.diff).apply(lambda x: np.max(x, initial=np.timedelta64(0))).apply(lambda x: x.total_seconds())
1008
+ max_diff = max_diff.unstack().to_numpy()
1009
+ diff_mask = np.where((max_diff == 0) | (max_diff >= max_gap), True, False)
1010
+ else:
1011
+ diff_mask = np.zeros_like(grouped, dtype=bool)
1012
+
1013
+ if min_span is not None:
1014
+ total_span = grouped.time.apply(np.ptp).apply(lambda x: x.total_seconds()).unstack().to_numpy()
1015
+ span_mask = np.where(total_span < min_span, True, False)
1016
+ else:
1017
+ span_mask = np.zeros_like(grouped, dtype=bool)
1018
+
1019
+ mask = diff_mask * span_mask
1020
+
1021
+ return mask
1022
+
1023
+
1024
+ def _observation_count_mask(number_of_observations: np.ndarray, min_count:int) -> np.ndarray:
1025
+ """ Create a mask array for an
1026
+
1027
+ Parameters
1028
+ ----------
1029
+ number_of_observations : numpy.ndarray
1030
+ Array of how many data points are in aggregation
1031
+ min_count : int
1032
+ Minimum number of data points for aggregation to be 'valid'
1033
+
1034
+ Returns
1035
+ -------
1036
+ np.ndarray
1037
+ a mask, True where data should be masked
1038
+ """
1039
+ valid = np.less(number_of_observations, min_count) # type: np.ndarray
1040
+ return valid
1041
+
1042
+
1043
+ def handle_incoming_times(times: "Union[np.ndarray, pd.DatetimeIndex, pd.Series, list]") -> "pd.DatetimeIndex":
1044
+ """Convert a list of times to a pandas DatetimeIndex object"""
1045
+ invalid_msg = "Times must be a list, numpy array, pandas DatetimeIndex, or pandas Series"
1046
+
1047
+ try:
1048
+ if not len(times):
1049
+ raise ValueError(invalid_msg)
1050
+ except TypeError:
1051
+ raise ValueError(invalid_msg)
1052
+
1053
+ if isinstance(times, pd.DatetimeIndex):
1054
+ return times
1055
+
1056
+ if isinstance(times, pd.Series):
1057
+ try:
1058
+ times = pd.DatetimeIndex(times)
1059
+ except Exception:
1060
+ raise ValueError("Series must be convertible to DatetimeIndex")
1061
+ times.name = 'time'
1062
+
1063
+ return times
1064
+
1065
+ elif isinstance(times, np.ndarray):
1066
+ times = pd.to_datetime(times)
1067
+ times.name = 'time'
1068
+ return times
1069
+
1070
+ elif isinstance(times, list):
1071
+ return pd.to_datetime(times)
1072
+
1073
+ else:
1074
+ raise ValueError(invalid_msg)
1075
+
1076
+ def tsp_concat(tsp_list, on_conflict='error', metadata='first') -> TSP:
1077
+ """Combine multiple TSPs into a single TSP.
1078
+
1079
+ Parameters
1080
+ ----------
1081
+ tsp_list : list[TSP]
1082
+ List of TSPs to combine. They must have the same depths
1083
+ on_conflict : str, optional
1084
+ Method to resolve duplicate times with different values. Chosen from "error", "keep", by default "error"
1085
+ - "error": Raise an error if duplicate times with different values are found.
1086
+ - "keep": Keep the first occurrence of the duplicate time.
1087
+ metadata : str, optional
1088
+ Method to select metadata from the TSPs. Chosen from "first", "identical", or "none", by default "first"
1089
+ - "first": Use the metadata from the first TSP in the list.
1090
+ - "identical": Only keep metadata records that are identical across TSPs.
1091
+ - "none": Ignore metadata and set it to None.
1092
+ Returns
1093
+ -------
1094
+ TSP
1095
+ Combined TSP.
1096
+
1097
+ Description
1098
+ -----------
1099
+ This function combines multiple TSPs into a single TSP. The TSPs must have the same depths.
1100
+ """
1101
+ tsp_dict = _tsp_concat(tsp_list=tsp_list, on_conflict=on_conflict, metadata=metadata)
1102
+ times = tsp_dict.pop('times')
1103
+ depths = tsp_dict.pop('depths')
1104
+ values = tsp_dict.pop('values')
1105
+
1106
+ t = TSP(times, depths, values, **tsp_dict)
1107
+
1108
+ return t