tsp 1.7.7__py3-none-any.whl → 1.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. tsp/__init__.py +11 -11
  2. tsp/__meta__.py +1 -1
  3. tsp/concatenation.py +153 -0
  4. tsp/core.py +1162 -1035
  5. tsp/data/2023-01-06_755-test-Dataset_2031-Constant_Over_Interval-Hourly-Ground_Temperature-Thermistor_Automated.timeserie.csv +4 -4
  6. tsp/data/2023-01-06_755-test.metadata.txt +208 -208
  7. tsp/data/NTGS_example_csv.csv +6 -6
  8. tsp/data/NTGS_example_slash_dates.csv +6 -6
  9. tsp/data/example_geotop.csv +5240 -5240
  10. tsp/data/example_gtnp.csv +1298 -1298
  11. tsp/data/example_permos.csv +7 -7
  12. tsp/data/test_geotop_has_space.txt +5 -5
  13. tsp/dataloggers/AbstractReader.py +43 -43
  14. tsp/dataloggers/FG2.py +110 -110
  15. tsp/dataloggers/GP5W.py +114 -114
  16. tsp/dataloggers/Geoprecision.py +34 -34
  17. tsp/dataloggers/HOBO.py +914 -914
  18. tsp/dataloggers/RBRXL800.py +190 -190
  19. tsp/dataloggers/RBRXR420.py +308 -308
  20. tsp/dataloggers/__init__.py +15 -15
  21. tsp/dataloggers/logr.py +115 -115
  22. tsp/dataloggers/test_files/004448.DAT +2543 -2543
  23. tsp/dataloggers/test_files/004531.DAT +17106 -17106
  24. tsp/dataloggers/test_files/004531.HEX +3587 -3587
  25. tsp/dataloggers/test_files/004534.HEX +3587 -3587
  26. tsp/dataloggers/test_files/010252.dat +1731 -1731
  27. tsp/dataloggers/test_files/010252.hex +1739 -1739
  28. tsp/dataloggers/test_files/010274.hex +1291 -1291
  29. tsp/dataloggers/test_files/010278.hex +3544 -3544
  30. tsp/dataloggers/test_files/012064.dat +1286 -1286
  31. tsp/dataloggers/test_files/012064.hex +1294 -1294
  32. tsp/dataloggers/test_files/012081.hex +3532 -3532
  33. tsp/dataloggers/test_files/07B1592.DAT +1483 -1483
  34. tsp/dataloggers/test_files/07B1592.HEX +1806 -1806
  35. tsp/dataloggers/test_files/07B4450.DAT +2234 -2234
  36. tsp/dataloggers/test_files/07B4450.HEX +2559 -2559
  37. tsp/dataloggers/test_files/FG2_399.csv +9881 -9881
  38. tsp/dataloggers/test_files/GP5W.csv +1121 -1121
  39. tsp/dataloggers/test_files/GP5W_260.csv +1884 -1884
  40. tsp/dataloggers/test_files/GP5W_270.csv +2210 -2210
  41. tsp/dataloggers/test_files/H08-030-08_HOBOware.csv +998 -998
  42. tsp/dataloggers/test_files/RBR_01.dat +1046 -1046
  43. tsp/dataloggers/test_files/RBR_02.dat +2426 -2426
  44. tsp/dataloggers/test_files/RSTDT2055.csv +2152 -2152
  45. tsp/dataloggers/test_files/U23-001_HOBOware.csv +1001 -1001
  46. tsp/dataloggers/test_files/hobo-negative-2.txt +6396 -6396
  47. tsp/dataloggers/test_files/hobo-negative-3.txt +5593 -5593
  48. tsp/dataloggers/test_files/hobo-positive-number-1.txt +1000 -1000
  49. tsp/dataloggers/test_files/hobo-positive-number-2.csv +1003 -1003
  50. tsp/dataloggers/test_files/hobo-positive-number-3.csv +1133 -1133
  51. tsp/dataloggers/test_files/hobo-positive-number-4.csv +1209 -1209
  52. tsp/dataloggers/test_files/hobo2.csv +8702 -8702
  53. tsp/dataloggers/test_files/hobo_1_AB.csv +21732 -21732
  54. tsp/dataloggers/test_files/hobo_1_AB_Details.txt +133 -133
  55. tsp/dataloggers/test_files/hobo_1_AB_classic.csv +4373 -4373
  56. tsp/dataloggers/test_files/hobo_1_AB_defaults.csv +21732 -21732
  57. tsp/dataloggers/test_files/hobo_1_AB_minimal.txt +1358 -1358
  58. tsp/dataloggers/test_files/hobo_1_AB_var2.csv +3189 -3189
  59. tsp/dataloggers/test_files/hobo_1_AB_var3.csv +2458 -2458
  60. tsp/dataloggers/test_files/logR_ULogC16-32_1.csv +106 -106
  61. tsp/dataloggers/test_files/logR_ULogC16-32_2.csv +100 -100
  62. tsp/dataloggers/test_files/mon_3_Ta_2010-08-18_2013-02-08.txt +21724 -21724
  63. tsp/dataloggers/test_files/rbr_001.dat +1133 -1133
  64. tsp/dataloggers/test_files/rbr_001.hex +1139 -1139
  65. tsp/dataloggers/test_files/rbr_001_no_comment.dat +1132 -1132
  66. tsp/dataloggers/test_files/rbr_001_no_comment.hex +1138 -1138
  67. tsp/dataloggers/test_files/rbr_002.dat +1179 -1179
  68. tsp/dataloggers/test_files/rbr_002.hex +1185 -1185
  69. tsp/dataloggers/test_files/rbr_003.hex +1292 -1292
  70. tsp/dataloggers/test_files/rbr_003.xls +0 -0
  71. tsp/dataloggers/test_files/rbr_xl_001.DAT +1105 -1105
  72. tsp/dataloggers/test_files/rbr_xl_002.DAT +1126 -1126
  73. tsp/dataloggers/test_files/rbr_xl_003.DAT +4622 -4622
  74. tsp/dataloggers/test_files/rbr_xl_003.HEX +3587 -3587
  75. tsp/gtnp.py +148 -148
  76. tsp/labels.py +3 -3
  77. tsp/misc.py +90 -90
  78. tsp/physics.py +101 -101
  79. tsp/plots/static.py +373 -373
  80. tsp/readers.py +548 -548
  81. tsp/time.py +45 -45
  82. tsp/tspwarnings.py +14 -14
  83. tsp/utils.py +101 -101
  84. tsp/version.py +1 -1
  85. {tsp-1.7.7.dist-info → tsp-1.8.1.dist-info}/METADATA +30 -23
  86. tsp-1.8.1.dist-info/RECORD +94 -0
  87. {tsp-1.7.7.dist-info → tsp-1.8.1.dist-info}/WHEEL +5 -5
  88. {tsp-1.7.7.dist-info → tsp-1.8.1.dist-info/licenses}/LICENSE +674 -674
  89. tsp/dataloggers/test_files/CSc_CR1000_1.dat +0 -295
  90. tsp/scratch.py +0 -6
  91. tsp-1.7.7.dist-info/RECORD +0 -95
  92. {tsp-1.7.7.dist-info → tsp-1.8.1.dist-info}/top_level.txt +0 -0
tsp/readers.py CHANGED
@@ -1,548 +1,548 @@
1
- import datetime
2
- import numpy as np
3
- import pandas as pd
4
- import re
5
- import warnings
6
-
7
- try:
8
- import netCDF4 as nc
9
- except ModuleNotFoundError:
10
- warnings.warn("Missing netCDF4 library. Some functionality will be limited.")
11
-
12
- from pathlib import Path
13
- from typing import Union, Optional
14
-
15
- from tsp.dataloggers.Geoprecision import detect_geoprecision_type
16
- from tsp.dataloggers.HOBO import HOBO, HOBOProperties
17
- from tsp.dataloggers.logr import LogR, guessed_depths_ok
18
- from tsp.dataloggers.RBRXL800 import RBRXL800
19
- from tsp.dataloggers.RBRXR420 import RBRXR420
20
- import tsp.tspwarnings as tw
21
-
22
- from tsp.core import TSP, IndexedTSP
23
- from tsp.misc import _is_depth_column
24
- from tsp.gtnp import GtnpMetadata
25
-
26
-
27
- def read_classic(filepath: str, init_file: "Optional[str]"=None) -> TSP:
28
- """Read output from CLASSIC land surface model
29
-
30
- Depth values, if provided, represent the midpoint of the model cells.
31
-
32
- Parameters
33
- ----------
34
- filepath : str
35
- Path to an output file
36
- init_file : str
37
- Path to a classic init file. If provided, depth values will be calculated. Otherwise an :py:class:`~tsp.core.IndexedTSP` is returned
38
-
39
- Returns
40
- -------
41
- TSP
42
- An IndexedTSP. Use :py:meth:`~tsp.core.IndexedTSP.set_depths` to provide depth information if init_file is not provided.
43
- """
44
- try:
45
- nc
46
- except NameError:
47
- warnings.warn("netCDF4 library must be installed.")
48
-
49
- # tbaracc_d / tbaracc_m / tbaracc_y
50
- with nc.Dataset(filepath, 'r') as ncdf:
51
- lat = ncdf['lat'][:]
52
- lon = ncdf['lon'][:]
53
- temp = ncdf['tsl'][:] # t, z
54
-
55
- try:
56
- time = nc.num2date(ncdf['time'][:], ncdf['time'].units, ncdf['time'].calendar,
57
- only_use_cftime_datetimes=False,
58
- only_use_python_datetimes=True)
59
- except ValueError:
60
- cf_time = nc.num2date(ncdf['time'][:], ncdf['time'].units, ncdf['time'].calendar)
61
- time = np.array([datetime.datetime.fromisoformat(t.isoformat()) for t in cf_time])
62
-
63
- if init_file:
64
- with nc.Dataset(init_file, 'r') as init:
65
- delz = init["DELZ"][:]
66
- depths = np.round(np.cumsum(delz) - np.multiply(delz, 0.5), 7) # delz precision is lower so we get some very small offsets
67
-
68
- if len(lat) > 1:
69
- warnings.warn("Multiple points in file. Returning the first one found.")
70
- # TODO: return Ensemble if multiple points
71
- lat = lat[0]
72
- lon = lon[0]
73
- temp = temp[:,:,0,0]
74
- else:
75
- temp = temp[:,:,0,0]
76
-
77
- t = IndexedTSP(times=time, values=temp, latitude=lat, longitude=lon)
78
-
79
- if init_file:
80
- t.set_depths(depths)
81
-
82
- return t
83
-
84
-
85
- def read_csv(filepath: str,
86
- datecol: "Union[str, int]",
87
- datefmt: str = "%Y-%m-%d %H:%M:%S",
88
- depth_pattern: "Union[str, dict]" = r"^(-?[0-9\.]+)$",
89
- na_values:list = [],
90
- **kwargs) -> TSP:
91
- r"""Read an arbitrary CSV file
92
-
93
- Date and time must be in a single column, and the csv must be in the
94
- 'wide' data format (each depth is a separate column)
95
-
96
- Parameters
97
- ----------
98
- filepath : str
99
- Path to csv file
100
- datecol : Union[str, int]
101
- Either the numeric index (starting at 0) of date column (if int) or name of date column or regular expression (if str)
102
- datefmt : str, optional
103
- The format of the datetime values. Use `python strftime format codes <https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes>`_,
104
- by default ``"%Y-%m-%d %H:%M:%S"``
105
- depth_pattern : str or dict
106
- If string: A regular expression that matches the column names with depths. The regular expression must
107
- have a single capture group that extracts just the numeric part of the column header, by default r"^(-?[0-9\.]+)$".
108
- If column names were in the form ``"+/-1.0_m"`` (i.e. included 'm' to denote units), you could use the regular expression ``r"^(-?[0-9\.]+)_m$"``
109
- If a dictionary is passed, the keys must be the column names and the values are the depths. This is useful if the column names are not numeric.
110
- na_values : list, optional
111
- Additional strings to recognize as NA. Passed to pandas.read_csv, by default []
112
-
113
- Returns
114
- -------
115
- TSP
116
- A TSP
117
- """
118
- raw = pd.read_csv(filepath, na_values=na_values, **kwargs)
119
-
120
- if not datecol in raw.columns and isinstance(datecol, str):
121
- datecol = [re.search(datecol, c).group(1) for c in raw.columns if re.search(datecol, c)][0]
122
-
123
- if isinstance(datecol, int):
124
- datecol = raw.columns[datecol]
125
-
126
- time = pd.to_datetime(raw[datecol], format=datefmt).to_numpy()
127
-
128
- if isinstance(depth_pattern, str):
129
- depth = [re.search(depth_pattern, c).group(1) for c in raw.columns if _is_depth_column(c, depth_pattern)]
130
- depth_numeric = np.array([float(d) for d in depth])
131
-
132
- elif isinstance(depth_pattern, dict):
133
- depth = [c for c in raw.columns if c in depth_pattern.keys()]
134
- depth_numeric = [depth_pattern[c] for c in raw.columns if c in depth_pattern.keys()]
135
-
136
- else:
137
- raise ValueError("depth_pattern must be a string or dictionary")
138
-
139
- values = raw.loc[:, depth].to_numpy()
140
-
141
- t = TSP(time, depth_numeric, values)
142
-
143
- return t
144
-
145
-
146
- def read_geoprecision(filepath: str) -> IndexedTSP:
147
- """Read a Geoprecision datalogger export (text file)
148
-
149
- Reads GP5W- and FG2-style files from geoprecision.
150
-
151
- Parameters
152
- ----------
153
- filepath : str
154
- Path to file.
155
-
156
- Returns
157
- -------
158
- IndexedTSP
159
- An IndexedTSP
160
- """
161
- Reader = detect_geoprecision_type(filepath)
162
-
163
- if Reader is None:
164
- raise RuntimeError("Could not detect type of geoprecision file (GP5W or FG2 missing from header")
165
- reader = Reader()
166
-
167
- data = reader.read(filepath)
168
- t = IndexedTSP(times=data['TIME'].dt.to_pydatetime(),
169
- values=data.drop("TIME", axis=1).values)
170
-
171
- t.metadata = reader.META
172
- return t
173
-
174
-
175
- def read_geotop(file: str) -> TSP:
176
- """Read a GEOtop soil temperature output file
177
-
178
- Parameters
179
- ----------
180
- file : str
181
- Path to file.
182
-
183
- Returns
184
- -------
185
- TSP
186
- A TSP
187
-
188
- Description
189
- -----------
190
- Only the last run of the last simulation period is returned. This is because GEOtop outputs
191
- all runs of all simulation periods in the same file. This function will only return the last
192
- run of the last simulation period.
193
- """
194
- with warnings.catch_warnings():
195
- warnings.filterwarnings("ignore", category=tw.DuplicateTimesWarning)
196
-
197
- t = read_csv(file,
198
- na_values=[-9999.0],
199
- datecol="^(Date.*)",
200
- datefmt=r"%d/%m/%Y %H:%M",
201
- depth_pattern=r"^(-?[0-9\.]+\s*)$")
202
-
203
- t._depths *= 0.001 # Convert to [m]
204
-
205
- # Only use last simulation period
206
- # TODO: this could be improved
207
- raw = pd.read_csv(file)
208
-
209
- is_max_sim_period = raw['Simulation_Period'] == max( raw['Simulation_Period'])
210
- is_last_run_in_max_sim_period = raw['Run'] = raw['Run'][is_max_sim_period].max()
211
- last_run = np.logical_and(is_max_sim_period, is_last_run_in_max_sim_period)
212
-
213
- last = TSP(times = t.times[last_run],
214
- depths = t.depths,
215
- values = t.values[last_run, :],
216
- metadata={"Simulation_Period": max(raw['Simulation_Period']),
217
- "Run": max( raw['Run'] )
218
- }
219
- )
220
-
221
- return last
222
-
223
-
224
-
225
- def read_gtnp(filename: str,
226
- metadata_filepath=None,
227
- autodetect_metadata=True) -> TSP:
228
- """Read test file from GTN-P database export
229
-
230
- Parameters
231
- ----------
232
- filename : str
233
- Path to file.
234
- metadata_file : str, optional
235
- Path to GTN-P metadata file (), by default None
236
-
237
- Returns
238
- -------
239
- TSP
240
- A TSP
241
- """
242
-
243
- t = read_csv(filename,
244
- na_values=[-999.0],
245
- datecol="Date/Depth",
246
- datefmt="%Y-%m-%d %H:%M:%S",
247
- depth_pattern=r"^(-?[0-9\.]+)$")
248
-
249
- # try to automatically detect metadata file
250
- if metadata_filepath is None and autodetect_metadata:
251
- partial_name = Path(filename).stem
252
-
253
- while partial_name:
254
- test_metadata = Path(Path(filename).parent, partial_name).with_suffix(".metadata.txt")
255
-
256
- if test_metadata.is_file():
257
- metadata_filepath = test_metadata
258
- break
259
- else:
260
- partial_name = partial_name[:-1]
261
-
262
- if metadata_filepath is not None:
263
- try:
264
- meta = GtnpMetadata(metadata_filepath)
265
- except Exception as e:
266
- warnings.warn(f"Failed to read metadata file: {e}")
267
- return t
268
- t.metadata['raw'] = meta.raw
269
- t.metadata['parsed'] = meta.parsed
270
-
271
- # set time zone
272
- tz = meta.get_timezone()
273
- if tz:
274
- t.set_utc_offset(int(tz.utcoffset(datetime.datetime.now()).total_seconds()))
275
-
276
- # set location
277
- t.latitude = meta.get_latitude() if meta.get_latitude() else None
278
- t.longitude = meta.get_longitude() if meta.get_longitude() else None
279
-
280
- return t
281
-
282
-
283
- def read_gtpem(file: str) -> "list[TSP]":
284
- output = list()
285
- try:
286
- with nc.Dataset(file) as ncdf:
287
- n_sim = len(ncdf['geotop']['sitename'][:])
288
- time = 1
289
- for i, name in enumerate(ncdf['geotop']['sitename'][:]):
290
- pass
291
- #t = TSP()
292
- except NameError:
293
- warnings.warn("netCDF4 library must be installed.")
294
-
295
- return output
296
-
297
-
298
- def read_hoboware(filepath: str, hoboware_config: Optional[HOBOProperties]=None) -> IndexedTSP:
299
- """Read Onset HoboWare datalogger exports
300
-
301
- Parameters
302
- ----------
303
- filepath : str
304
- Path to a file
305
- hoboware_config : HOBOProperties, optional
306
- A HOBOProperties object with information about how the file is configured. If not
307
- provided, the configuration will be automatically detected if possible, by default None
308
-
309
- Returns
310
- -------
311
- IndexedTSP
312
- An IndexedTSP. Use the `set_depths` method to provide depth information
313
- """
314
- reader = HOBO(properties=hoboware_config)
315
- data = reader.read(filepath)
316
-
317
- t = IndexedTSP(times=data['TIME'],
318
- values=data.drop("TIME", axis=1).values)
319
-
320
- return t
321
-
322
-
323
- def read_logr(filepath: str) -> "Union[IndexedTSP,TSP]":
324
- """Read a LogR datalogger export (text file)
325
-
326
- Reads LogR ULogC16-32 files.
327
-
328
- Parameters
329
- ----------
330
- filepath : str
331
- Path to file.
332
-
333
- Returns
334
- -------
335
- IndexedTSP, TSP
336
- An IndexedTSP or TSP, depending on whether the depth labels are sensible
337
- """
338
- r = LogR()
339
- data = r.read(filepath)
340
-
341
- times = data['TIME'].dt.to_pydatetime()
342
- channels = pd.Series(data.columns).str.match("^CH")
343
- values = data.loc[:, channels.to_numpy()]
344
-
345
- if guessed_depths_ok(r.META['guessed_depths'], sum(channels)):
346
- t = TSP(times=times,
347
- depths=r.META['guessed_depths'][-sum(channels):],
348
- values=values.values,)
349
-
350
- else:
351
- warnings.warn(f"Could not convert all channel labels into numeric depths."
352
- "Use the set_depths() method to specify observation depths."
353
- "Guessed depths can be accessed from .metadata['guessed_depths'].")
354
-
355
- t = IndexedTSP(times=times,
356
- values=values.values,
357
- metadata = r.META)
358
-
359
- return t
360
-
361
-
362
- def read_netcdf(file:str, standard_name='temperature_in_ground') -> TSP:
363
- """Read a CF-compliant netCDF file
364
-
365
- Parameters
366
- ----------
367
- file : str
368
- Path to netCDF file.
369
- standard_name : str, optional
370
- The standard name of the data variable, by default 'temperature_in_ground'.
371
- 'soil_temperature' is also common.
372
-
373
- The file must represent data from a single location
374
- A single time variable (with attribute 'axis=T') must be present.
375
- A single depth variable (with attribute 'axis=Z') must be present.
376
- A single data variable (with 'temperature_in_ground' or '' 'standard name' either ) must be present.
377
-
378
- """
379
- try:
380
- with nc.Dataset(file) as ncdf:
381
- globals = {k: v for k, v in ncdf.__dict__.items() if not k.startswith("_")}
382
-
383
- # Checks - global attributes
384
- if not globals.get("featureType", "").lower() == "timeseriesprofile":
385
- warnings.warn("featureType is not a time series profile")
386
-
387
- # Checks - data
388
- time = ncdf.get_variables_by_attributes(axis='T')
389
- if len(time) == 0:
390
- raise ValueError("No time variable (with attribute 'axis=T') found")
391
- if len(time) > 1:
392
- raise ValueError("More than one time variable (with attribute 'axis=T') found")
393
-
394
- if not 'units' in time[0].ncattrs():
395
- raise ValueError("Time variable does not have a 'units' attribute")
396
- if not 'calendar' in time[0].ncattrs():
397
- raise ValueError("Time variable does not have a 'calendar' attribute")
398
-
399
- depth = ncdf.get_variables_by_attributes(axis='Z')
400
- if len(depth) == 0:
401
- raise ValueError("No depth variable (with attribute 'axis=Z') found")
402
- if len(depth) > 1:
403
- raise ValueError("More than one depth variable (with attribute 'axis=Z') found")
404
-
405
- temperature = ncdf.get_variables_by_attributes(standard_name=lambda x: x in ['temperature_in_ground', 'soil_temperature'])
406
- if len(temperature) == 0:
407
- raise ValueError("No temperature variable (with standard name 'temperature_in_ground' or 'soil_temperature') found")
408
- if len(temperature) > 1:
409
- raise ValueError("More than one temperature variable (with standard name 'temperature_in_ground' or 'soil_temperature') found")
410
-
411
- # Get data
412
- times = nc.num2date(time[0][:],
413
- units=time[0].units,
414
- calendar=time[0].calendar,
415
- only_use_cftime_datetimes=False,
416
- only_use_python_datetimes=True)
417
- depths = np.round(np.array(depth[0][:], dtype='float64'), 5)
418
- values = temperature[0][:]
419
-
420
- except NameError:
421
- warnings.warn("netCDF4 library must be installed.")
422
- return None
423
-
424
- except ValueError as e:
425
- warnings.warn(f"File does not meet formatting requirements: ({e})")
426
- return None
427
-
428
- t = TSP(times=times, depths=depths, values=values, metadata=globals)
429
- return t
430
-
431
-
432
- def read_ntgs(filename: str) -> TSP:
433
- """Read a file from the NTGS permafrost database
434
-
435
- Parameters
436
- ----------
437
- filename : str
438
- Path to file.
439
-
440
- Returns
441
- -------
442
- TSP
443
- A TSP
444
- """
445
- if Path(filename).suffix == ".csv":
446
- try:
447
- raw = pd.read_csv(filename,
448
- keep_default_na=False,na_values=[''],
449
- parse_dates={"time": ["date_YYYY-MM-DD","time_HH:MM:SS"]})
450
- except IndexError:
451
- raise IndexError("There are insufficient columns, the file format is invalid.")
452
- elif Path(filename).suffix in [".xls", ".xlsx"]:
453
- raise NotImplementedError("Convert to CSV")
454
- #try:
455
- # raw = pd.read_excel(filename, keep_default_na=False, parse_dates={"time": [4,5]}, date_parser=self.getISOFormat)
456
- #except IndexError:
457
- # raise IndexError("There are insufficient columns, the file format is invalid.")
458
- else:
459
- raise TypeError("Unsupported file extension.")
460
-
461
- metadata = {
462
- 'project_name': raw['project_name'].values[0],
463
- 'site_id': raw['site_id'].values[0],
464
- 'latitude': raw['latitude'].values[0],
465
- 'longitude': raw['longitude'].values[0]
466
- }
467
- match_depths = [c for c in [re.search(r"(-?[0-9\.]+)_m$", C) for C in raw.columns] if c]
468
- values = raw.loc[:, [d.group(0) for d in match_depths]].values
469
- times = raw['time'].dt.to_pydatetime()
470
-
471
- t = TSP(times=times,
472
- depths=[float(d.group(1)) for d in match_depths],
473
- values=values,
474
- latitude=raw['latitude'].values[0],
475
- longitude=raw['longitude'].values[0],
476
- site_id=raw['site_id'].values[0],
477
- metadata=metadata)
478
-
479
- return t
480
-
481
-
482
- def read_rbr(file_path: str) -> IndexedTSP:
483
- """
484
-
485
- Parameters
486
- ----------
487
- filepath
488
-
489
- Returns
490
- -------
491
-
492
- """
493
- file_extention = Path(file_path).suffix.lower()
494
- if file_extention in [".dat", ".hex"]:
495
- with open(file_path, "r") as f:
496
- first_line = f.readline()
497
- model = first_line.split()[1]
498
- if model == "XL-800":
499
- r = RBRXL800()
500
- elif model in ["XR-420", "XR-420-T8"]:
501
- r = RBRXR420()
502
- else:
503
- raise ValueError(f"logger model {model} unsupported")
504
- data = r.read(file_path)
505
- elif file_extention in [".xls", ".xlsx", ".rsk"]:
506
- r = RBRXR420()
507
- data = r.read(file_path)
508
- else:
509
- raise IOError("File is not .dat, .hex, .xls, .xlsx, or .rsk")
510
-
511
- times = data['TIME'].dt.to_pydatetime()
512
- channels = pd.Series(data.columns).str.match("^ch")
513
- values = data.loc[:, channels.to_numpy()]
514
-
515
- t = IndexedTSP(times=times, values=values.values, metadata=r.META)
516
- if "utc offset" in list(r.META.keys()):
517
- t.set_utc_offset(r.META["utc offset"])
518
-
519
- return t
520
-
521
-
522
- def read_permos(filepath:str) -> TSP:
523
- """Read file from PERMOS database export
524
-
525
- Parameters
526
- ----------
527
- filename : str
528
- Path to file.
529
-
530
- Returns
531
- -------
532
- TSP
533
- A TSP
534
-
535
- Used for data obtained from PERMOS (permos.ch/data-portal/permafrost-temperature-and-active-layer)
536
- """
537
- try:
538
- raw = pd.read_csv(filepath,
539
- index_col=0,
540
- parse_dates=True)
541
- except IndexError:
542
- raise IndexError("There are insufficient columns, the file format is invalid.")
543
-
544
- t = TSP(times=raw.index,
545
- depths=[float(C) for C in raw.columns],
546
- values=raw.values)
547
-
548
- return t
1
+ import datetime
2
+ import numpy as np
3
+ import pandas as pd
4
+ import re
5
+ import warnings
6
+
7
+ try:
8
+ import netCDF4 as nc
9
+ except ModuleNotFoundError:
10
+ warnings.warn("Missing netCDF4 library. Some functionality will be limited.")
11
+
12
+ from pathlib import Path
13
+ from typing import Union, Optional
14
+
15
+ from tsp.dataloggers.Geoprecision import detect_geoprecision_type
16
+ from tsp.dataloggers.HOBO import HOBO, HOBOProperties
17
+ from tsp.dataloggers.logr import LogR, guessed_depths_ok
18
+ from tsp.dataloggers.RBRXL800 import RBRXL800
19
+ from tsp.dataloggers.RBRXR420 import RBRXR420
20
+ import tsp.tspwarnings as tw
21
+
22
+ from tsp.core import TSP, IndexedTSP
23
+ from tsp.misc import _is_depth_column
24
+ from tsp.gtnp import GtnpMetadata
25
+
26
+
27
+ def read_classic(filepath: str, init_file: "Optional[str]"=None) -> TSP:
28
+ """Read output from CLASSIC land surface model
29
+
30
+ Depth values, if provided, represent the midpoint of the model cells.
31
+
32
+ Parameters
33
+ ----------
34
+ filepath : str
35
+ Path to an output file
36
+ init_file : str
37
+ Path to a classic init file. If provided, depth values will be calculated. Otherwise an :py:class:`~tsp.core.IndexedTSP` is returned
38
+
39
+ Returns
40
+ -------
41
+ TSP
42
+ An IndexedTSP. Use :py:meth:`~tsp.core.IndexedTSP.set_depths` to provide depth information if init_file is not provided.
43
+ """
44
+ try:
45
+ nc
46
+ except NameError:
47
+ warnings.warn("netCDF4 library must be installed.")
48
+
49
+ # tbaracc_d / tbaracc_m / tbaracc_y
50
+ with nc.Dataset(filepath, 'r') as ncdf:
51
+ lat = ncdf['lat'][:]
52
+ lon = ncdf['lon'][:]
53
+ temp = ncdf['tsl'][:] # t, z
54
+
55
+ try:
56
+ time = nc.num2date(ncdf['time'][:], ncdf['time'].units, ncdf['time'].calendar,
57
+ only_use_cftime_datetimes=False,
58
+ only_use_python_datetimes=True)
59
+ except ValueError:
60
+ cf_time = nc.num2date(ncdf['time'][:], ncdf['time'].units, ncdf['time'].calendar)
61
+ time = np.array([datetime.datetime.fromisoformat(t.isoformat()) for t in cf_time])
62
+
63
+ if init_file:
64
+ with nc.Dataset(init_file, 'r') as init:
65
+ delz = init["DELZ"][:]
66
+ depths = np.round(np.cumsum(delz) - np.multiply(delz, 0.5), 7) # delz precision is lower so we get some very small offsets
67
+
68
+ if len(lat) > 1:
69
+ warnings.warn("Multiple points in file. Returning the first one found.")
70
+ # TODO: return Ensemble if multiple points
71
+ lat = lat[0]
72
+ lon = lon[0]
73
+ temp = temp[:,:,0,0]
74
+ else:
75
+ temp = temp[:,:,0,0]
76
+
77
+ t = IndexedTSP(times=time, values=temp, latitude=lat, longitude=lon)
78
+
79
+ if init_file:
80
+ t.set_depths(depths)
81
+
82
+ return t
83
+
84
+
85
+ def read_csv(filepath: str,
86
+ datecol: "Union[str, int]",
87
+ datefmt: str = "%Y-%m-%d %H:%M:%S",
88
+ depth_pattern: "Union[str, dict]" = r"^(-?[0-9\.]+)$",
89
+ na_values:list = [],
90
+ **kwargs) -> TSP:
91
+ r"""Read an arbitrary CSV file
92
+
93
+ Date and time must be in a single column, and the csv must be in the
94
+ 'wide' data format (each depth is a separate column)
95
+
96
+ Parameters
97
+ ----------
98
+ filepath : str
99
+ Path to csv file
100
+ datecol : Union[str, int]
101
+ Either the numeric index (starting at 0) of date column (if int) or name of date column or regular expression (if str)
102
+ datefmt : str, optional
103
+ The format of the datetime values. Use `python strftime format codes <https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes>`_,
104
+ by default ``"%Y-%m-%d %H:%M:%S"``
105
+ depth_pattern : str or dict
106
+ If string: A regular expression that matches the column names with depths. The regular expression must
107
+ have a single capture group that extracts just the numeric part of the column header, by default r"^(-?[0-9\.]+)$".
108
+ If column names were in the form ``"+/-1.0_m"`` (i.e. included 'm' to denote units), you could use the regular expression ``r"^(-?[0-9\.]+)_m$"``
109
+ If a dictionary is passed, the keys must be the column names and the values are the depths. This is useful if the column names are not numeric.
110
+ na_values : list, optional
111
+ Additional strings to recognize as NA. Passed to pandas.read_csv, by default []
112
+
113
+ Returns
114
+ -------
115
+ TSP
116
+ A TSP
117
+ """
118
+ raw = pd.read_csv(filepath, na_values=na_values, **kwargs)
119
+
120
+ if not datecol in raw.columns and isinstance(datecol, str):
121
+ datecol = [re.search(datecol, c).group(1) for c in raw.columns if re.search(datecol, c)][0]
122
+
123
+ if isinstance(datecol, int):
124
+ datecol = raw.columns[datecol]
125
+
126
+ time = pd.to_datetime(raw[datecol], format=datefmt).to_numpy()
127
+
128
+ if isinstance(depth_pattern, str):
129
+ depth = [re.search(depth_pattern, c).group(1) for c in raw.columns if _is_depth_column(c, depth_pattern)]
130
+ depth_numeric = np.array([float(d) for d in depth])
131
+
132
+ elif isinstance(depth_pattern, dict):
133
+ depth = [c for c in raw.columns if c in depth_pattern.keys()]
134
+ depth_numeric = [depth_pattern[c] for c in raw.columns if c in depth_pattern.keys()]
135
+
136
+ else:
137
+ raise ValueError("depth_pattern must be a string or dictionary")
138
+
139
+ values = raw.loc[:, depth].to_numpy()
140
+
141
+ t = TSP(time, depth_numeric, values)
142
+
143
+ return t
144
+
145
+
146
+ def read_geoprecision(filepath: str) -> IndexedTSP:
147
+ """Read a Geoprecision datalogger export (text file)
148
+
149
+ Reads GP5W- and FG2-style files from geoprecision.
150
+
151
+ Parameters
152
+ ----------
153
+ filepath : str
154
+ Path to file.
155
+
156
+ Returns
157
+ -------
158
+ IndexedTSP
159
+ An IndexedTSP
160
+ """
161
+ Reader = detect_geoprecision_type(filepath)
162
+
163
+ if Reader is None:
164
+ raise RuntimeError("Could not detect type of geoprecision file (GP5W or FG2 missing from header")
165
+ reader = Reader()
166
+
167
+ data = reader.read(filepath)
168
+ t = IndexedTSP(times=data['TIME'].dt.to_pydatetime(),
169
+ values=data.drop("TIME", axis=1).values)
170
+
171
+ t.metadata = reader.META
172
+ return t
173
+
174
+
175
+ def read_geotop(file: str) -> TSP:
176
+ """Read a GEOtop soil temperature output file
177
+
178
+ Parameters
179
+ ----------
180
+ file : str
181
+ Path to file.
182
+
183
+ Returns
184
+ -------
185
+ TSP
186
+ A TSP
187
+
188
+ Description
189
+ -----------
190
+ Only the last run of the last simulation period is returned. This is because GEOtop outputs
191
+ all runs of all simulation periods in the same file. This function will only return the last
192
+ run of the last simulation period.
193
+ """
194
+ with warnings.catch_warnings():
195
+ warnings.filterwarnings("ignore", category=tw.DuplicateTimesWarning)
196
+
197
+ t = read_csv(file,
198
+ na_values=[-9999.0],
199
+ datecol="^(Date.*)",
200
+ datefmt=r"%d/%m/%Y %H:%M",
201
+ depth_pattern=r"^(-?[0-9\.]+\s*)$")
202
+
203
+ t._depths *= 0.001 # Convert to [m]
204
+
205
+ # Only use last simulation period
206
+ # TODO: this could be improved
207
+ raw = pd.read_csv(file)
208
+
209
+ is_max_sim_period = raw['Simulation_Period'] == max( raw['Simulation_Period'])
210
+ is_last_run_in_max_sim_period = raw['Run'] = raw['Run'][is_max_sim_period].max()
211
+ last_run = np.logical_and(is_max_sim_period, is_last_run_in_max_sim_period)
212
+
213
+ last = TSP(times = t.times[last_run],
214
+ depths = t.depths,
215
+ values = t.values[last_run, :],
216
+ metadata={"Simulation_Period": max(raw['Simulation_Period']),
217
+ "Run": max( raw['Run'] )
218
+ }
219
+ )
220
+
221
+ return last
222
+
223
+
224
+
225
+ def read_gtnp(filename: str,
226
+ metadata_filepath=None,
227
+ autodetect_metadata=True) -> TSP:
228
+ """Read test file from GTN-P database export
229
+
230
+ Parameters
231
+ ----------
232
+ filename : str
233
+ Path to file.
234
+ metadata_file : str, optional
235
+ Path to GTN-P metadata file (), by default None
236
+
237
+ Returns
238
+ -------
239
+ TSP
240
+ A TSP
241
+ """
242
+
243
+ t = read_csv(filename,
244
+ na_values=[-999.0],
245
+ datecol="Date/Depth",
246
+ datefmt="%Y-%m-%d %H:%M:%S",
247
+ depth_pattern=r"^(-?[0-9\.]+)$")
248
+
249
+ # try to automatically detect metadata file
250
+ if metadata_filepath is None and autodetect_metadata:
251
+ partial_name = Path(filename).stem
252
+
253
+ while partial_name:
254
+ test_metadata = Path(Path(filename).parent, partial_name).with_suffix(".metadata.txt")
255
+
256
+ if test_metadata.is_file():
257
+ metadata_filepath = test_metadata
258
+ break
259
+ else:
260
+ partial_name = partial_name[:-1]
261
+
262
+ if metadata_filepath is not None:
263
+ try:
264
+ meta = GtnpMetadata(metadata_filepath)
265
+ except Exception as e:
266
+ warnings.warn(f"Failed to read metadata file: {e}")
267
+ return t
268
+ t.metadata['raw'] = meta.raw
269
+ t.metadata['parsed'] = meta.parsed
270
+
271
+ # set time zone
272
+ tz = meta.get_timezone()
273
+ if tz:
274
+ t.set_utc_offset(int(tz.utcoffset(datetime.datetime.now()).total_seconds()))
275
+
276
+ # set location
277
+ t.latitude = meta.get_latitude() if meta.get_latitude() else None
278
+ t.longitude = meta.get_longitude() if meta.get_longitude() else None
279
+
280
+ return t
281
+
282
+
283
+ def read_gtpem(file: str) -> "list[TSP]":
284
+ output = list()
285
+ try:
286
+ with nc.Dataset(file) as ncdf:
287
+ n_sim = len(ncdf['geotop']['sitename'][:])
288
+ time = 1
289
+ for i, name in enumerate(ncdf['geotop']['sitename'][:]):
290
+ pass
291
+ #t = TSP()
292
+ except NameError:
293
+ warnings.warn("netCDF4 library must be installed.")
294
+
295
+ return output
296
+
297
+
298
+ def read_hoboware(filepath: str, hoboware_config: Optional[HOBOProperties]=None) -> IndexedTSP:
299
+ """Read Onset HoboWare datalogger exports
300
+
301
+ Parameters
302
+ ----------
303
+ filepath : str
304
+ Path to a file
305
+ hoboware_config : HOBOProperties, optional
306
+ A HOBOProperties object with information about how the file is configured. If not
307
+ provided, the configuration will be automatically detected if possible, by default None
308
+
309
+ Returns
310
+ -------
311
+ IndexedTSP
312
+ An IndexedTSP. Use the `set_depths` method to provide depth information
313
+ """
314
+ reader = HOBO(properties=hoboware_config)
315
+ data = reader.read(filepath)
316
+
317
+ t = IndexedTSP(times=data['TIME'],
318
+ values=data.drop("TIME", axis=1).values)
319
+
320
+ return t
321
+
322
+
323
+ def read_logr(filepath: str) -> "Union[IndexedTSP,TSP]":
324
+ """Read a LogR datalogger export (text file)
325
+
326
+ Reads LogR ULogC16-32 files.
327
+
328
+ Parameters
329
+ ----------
330
+ filepath : str
331
+ Path to file.
332
+
333
+ Returns
334
+ -------
335
+ IndexedTSP, TSP
336
+ An IndexedTSP or TSP, depending on whether the depth labels are sensible
337
+ """
338
+ r = LogR()
339
+ data = r.read(filepath)
340
+
341
+ times = data['TIME'].dt.to_pydatetime()
342
+ channels = pd.Series(data.columns).str.match("^CH")
343
+ values = data.loc[:, channels.to_numpy()]
344
+
345
+ if guessed_depths_ok(r.META['guessed_depths'], sum(channels)):
346
+ t = TSP(times=times,
347
+ depths=r.META['guessed_depths'][-sum(channels):],
348
+ values=values.values,)
349
+
350
+ else:
351
+ warnings.warn(f"Could not convert all channel labels into numeric depths."
352
+ "Use the set_depths() method to specify observation depths."
353
+ "Guessed depths can be accessed from .metadata['guessed_depths'].")
354
+
355
+ t = IndexedTSP(times=times,
356
+ values=values.values,
357
+ metadata = r.META)
358
+
359
+ return t
360
+
361
+
362
+ def read_netcdf(file:str, standard_name='temperature_in_ground') -> TSP:
363
+ """Read a CF-compliant netCDF file
364
+
365
+ Parameters
366
+ ----------
367
+ file : str
368
+ Path to netCDF file.
369
+ standard_name : str, optional
370
+ The standard name of the data variable, by default 'temperature_in_ground'.
371
+ 'soil_temperature' is also common.
372
+
373
+ The file must represent data from a single location
374
+ A single time variable (with attribute 'axis=T') must be present.
375
+ A single depth variable (with attribute 'axis=Z') must be present.
376
+ A single data variable (with 'temperature_in_ground' or '' 'standard name' either ) must be present.
377
+
378
+ """
379
+ try:
380
+ with nc.Dataset(file) as ncdf:
381
+ globals = {k: v for k, v in ncdf.__dict__.items() if not k.startswith("_")}
382
+
383
+ # Checks - global attributes
384
+ if not globals.get("featureType", "").lower() == "timeseriesprofile":
385
+ warnings.warn("featureType is not a time series profile")
386
+
387
+ # Checks - data
388
+ time = ncdf.get_variables_by_attributes(axis='T')
389
+ if len(time) == 0:
390
+ raise ValueError("No time variable (with attribute 'axis=T') found")
391
+ if len(time) > 1:
392
+ raise ValueError("More than one time variable (with attribute 'axis=T') found")
393
+
394
+ if not 'units' in time[0].ncattrs():
395
+ raise ValueError("Time variable does not have a 'units' attribute")
396
+ if not 'calendar' in time[0].ncattrs():
397
+ raise ValueError("Time variable does not have a 'calendar' attribute")
398
+
399
+ depth = ncdf.get_variables_by_attributes(axis='Z')
400
+ if len(depth) == 0:
401
+ raise ValueError("No depth variable (with attribute 'axis=Z') found")
402
+ if len(depth) > 1:
403
+ raise ValueError("More than one depth variable (with attribute 'axis=Z') found")
404
+
405
+ temperature = ncdf.get_variables_by_attributes(standard_name=lambda x: x in ['temperature_in_ground', 'soil_temperature'])
406
+ if len(temperature) == 0:
407
+ raise ValueError("No temperature variable (with standard name 'temperature_in_ground' or 'soil_temperature') found")
408
+ if len(temperature) > 1:
409
+ raise ValueError("More than one temperature variable (with standard name 'temperature_in_ground' or 'soil_temperature') found")
410
+
411
+ # Get data
412
+ times = nc.num2date(time[0][:],
413
+ units=time[0].units,
414
+ calendar=time[0].calendar,
415
+ only_use_cftime_datetimes=False,
416
+ only_use_python_datetimes=True)
417
+ depths = np.round(np.array(depth[0][:], dtype='float64'), 5)
418
+ values = temperature[0][:]
419
+
420
+ except NameError:
421
+ warnings.warn("netCDF4 library must be installed.")
422
+ return None
423
+
424
+ except ValueError as e:
425
+ warnings.warn(f"File does not meet formatting requirements: ({e})")
426
+ return None
427
+
428
+ t = TSP(times=times, depths=depths, values=values, metadata=globals)
429
+ return t
430
+
431
+
432
+ def read_ntgs(filename: str) -> TSP:
433
+ """Read a file from the NTGS permafrost database
434
+
435
+ Parameters
436
+ ----------
437
+ filename : str
438
+ Path to file.
439
+
440
+ Returns
441
+ -------
442
+ TSP
443
+ A TSP
444
+ """
445
+ if Path(filename).suffix == ".csv":
446
+ try:
447
+ raw = pd.read_csv(filename,
448
+ keep_default_na=False,na_values=[''],
449
+ parse_dates={"time": ["date_YYYY-MM-DD","time_HH:MM:SS"]})
450
+ except IndexError:
451
+ raise IndexError("There are insufficient columns, the file format is invalid.")
452
+ elif Path(filename).suffix in [".xls", ".xlsx"]:
453
+ raise NotImplementedError("Convert to CSV")
454
+ #try:
455
+ # raw = pd.read_excel(filename, keep_default_na=False, parse_dates={"time": [4,5]}, date_parser=self.getISOFormat)
456
+ #except IndexError:
457
+ # raise IndexError("There are insufficient columns, the file format is invalid.")
458
+ else:
459
+ raise TypeError("Unsupported file extension.")
460
+
461
+ metadata = {
462
+ 'project_name': raw['project_name'].values[0],
463
+ 'site_id': raw['site_id'].values[0],
464
+ 'latitude': raw['latitude'].values[0],
465
+ 'longitude': raw['longitude'].values[0]
466
+ }
467
+ match_depths = [c for c in [re.search(r"(-?[0-9\.]+)_m$", C) for C in raw.columns] if c]
468
+ values = raw.loc[:, [d.group(0) for d in match_depths]].values
469
+ times = raw['time'].dt.to_pydatetime()
470
+
471
+ t = TSP(times=times,
472
+ depths=[float(d.group(1)) for d in match_depths],
473
+ values=values,
474
+ latitude=raw['latitude'].values[0],
475
+ longitude=raw['longitude'].values[0],
476
+ site_id=raw['site_id'].values[0],
477
+ metadata=metadata)
478
+
479
+ return t
480
+
481
+
482
+ def read_rbr(file_path: str) -> IndexedTSP:
483
+ """
484
+
485
+ Parameters
486
+ ----------
487
+ filepath
488
+
489
+ Returns
490
+ -------
491
+
492
+ """
493
+ file_extention = Path(file_path).suffix.lower()
494
+ if file_extention in [".dat", ".hex"]:
495
+ with open(file_path, "r") as f:
496
+ first_line = f.readline()
497
+ model = first_line.split()[1]
498
+ if model == "XL-800":
499
+ r = RBRXL800()
500
+ elif model in ["XR-420", "XR-420-T8"]:
501
+ r = RBRXR420()
502
+ else:
503
+ raise ValueError(f"logger model {model} unsupported")
504
+ data = r.read(file_path)
505
+ elif file_extention in [".xls", ".xlsx", ".rsk"]:
506
+ r = RBRXR420()
507
+ data = r.read(file_path)
508
+ else:
509
+ raise IOError("File is not .dat, .hex, .xls, .xlsx, or .rsk")
510
+
511
+ times = data['TIME'].dt.to_pydatetime()
512
+ channels = pd.Series(data.columns).str.match("^ch")
513
+ values = data.loc[:, channels.to_numpy()]
514
+
515
+ t = IndexedTSP(times=times, values=values.values, metadata=r.META)
516
+ if "utc offset" in list(r.META.keys()):
517
+ t.set_utc_offset(r.META["utc offset"])
518
+
519
+ return t
520
+
521
+
522
+ def read_permos(filepath:str) -> TSP:
523
+ """Read file from PERMOS database export
524
+
525
+ Parameters
526
+ ----------
527
+ filename : str
528
+ Path to file.
529
+
530
+ Returns
531
+ -------
532
+ TSP
533
+ A TSP
534
+
535
+ Used for data obtained from PERMOS (permos.ch/data-portal/permafrost-temperature-and-active-layer)
536
+ """
537
+ try:
538
+ raw = pd.read_csv(filepath,
539
+ index_col=0,
540
+ parse_dates=True)
541
+ except IndexError:
542
+ raise IndexError("There are insufficient columns, the file format is invalid.")
543
+
544
+ t = TSP(times=raw.index,
545
+ depths=[float(C) for C in raw.columns],
546
+ values=raw.values)
547
+
548
+ return t