tsp 1.8.1__py3-none-any.whl → 1.10.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tsp/__init__.py +11 -11
- tsp/__meta__.py +1 -1
- tsp/concatenation.py +159 -153
- tsp/core.py +1306 -1162
- tsp/data/2023-01-06_755-test-Dataset_2031-Constant_Over_Interval-Hourly-Ground_Temperature-Thermistor_Automated.timeserie.csv +4 -4
- tsp/data/2023-01-06_755-test.metadata.txt +208 -208
- tsp/data/NTGS_example_csv.csv +6 -6
- tsp/data/NTGS_example_slash_dates.csv +6 -6
- tsp/data/NTGS_gtr_example_excel.xlsx +0 -0
- tsp/data/example_geotop.csv +5240 -5240
- tsp/data/example_gtnp.csv +1298 -1298
- tsp/data/example_permos.csv +7 -7
- tsp/data/ntgs-db-multi.txt +3872 -0
- tsp/data/ntgs-db-single.txt +2251 -0
- tsp/data/test_geotop_has_space.txt +5 -5
- tsp/data/tsp_format_long.csv +10 -0
- tsp/data/tsp_format_wide_1.csv +7 -0
- tsp/data/tsp_format_wide_2.csv +7 -0
- tsp/dataloggers/AbstractReader.py +43 -43
- tsp/dataloggers/FG2.py +110 -110
- tsp/dataloggers/GP5W.py +114 -114
- tsp/dataloggers/Geoprecision.py +34 -34
- tsp/dataloggers/HOBO.py +930 -914
- tsp/dataloggers/RBRXL800.py +190 -190
- tsp/dataloggers/RBRXR420.py +371 -308
- tsp/dataloggers/Vemco.py +84 -0
- tsp/dataloggers/__init__.py +15 -15
- tsp/dataloggers/logr.py +196 -115
- tsp/dataloggers/test_files/004448.DAT +2543 -2543
- tsp/dataloggers/test_files/004531.DAT +17106 -17106
- tsp/dataloggers/test_files/004531.HEX +3587 -3587
- tsp/dataloggers/test_files/004534.HEX +3587 -3587
- tsp/dataloggers/test_files/010252.dat +1731 -1731
- tsp/dataloggers/test_files/010252.hex +1739 -1739
- tsp/dataloggers/test_files/010274.hex +1291 -1291
- tsp/dataloggers/test_files/010278.hex +3544 -3544
- tsp/dataloggers/test_files/012064.dat +1286 -1286
- tsp/dataloggers/test_files/012064.hex +1294 -1294
- tsp/dataloggers/test_files/012064_modified_start.hex +1294 -0
- tsp/dataloggers/test_files/012081.hex +3532 -3532
- tsp/dataloggers/test_files/013138_recovery_stamp.hex +1123 -0
- tsp/dataloggers/test_files/014037-2007.hex +95 -0
- tsp/dataloggers/test_files/019360_20160918_1146_SlumpIslandTopofHill.hex +11253 -0
- tsp/dataloggers/test_files/019360_20160918_1146_SlumpIslandTopofHill.xls +0 -0
- tsp/dataloggers/test_files/07B1592.DAT +1483 -1483
- tsp/dataloggers/test_files/07B1592.HEX +1806 -1806
- tsp/dataloggers/test_files/07B4450.DAT +2234 -2234
- tsp/dataloggers/test_files/07B4450.HEX +2559 -2559
- tsp/dataloggers/test_files/2022018_2025-09-18T22-16-16.txt +36 -0
- tsp/dataloggers/test_files/2022018_2025-09-18T22-16-16_raw.csv +2074 -0
- tsp/dataloggers/test_files/2022018_2025-09-18T22-16-16_temp.csv +2074 -0
- tsp/dataloggers/test_files/2025004_2025-12-02T17-07-28_cfg.txt +30 -0
- tsp/dataloggers/test_files/2025004_2025-12-02T17-07-28_raw.csv +35 -0
- tsp/dataloggers/test_files/2025004_2025-12-02T17-07-28_temp.csv +35 -0
- tsp/dataloggers/test_files/204087.xlsx +0 -0
- tsp/dataloggers/test_files/Asc-1455As02.000 +2982 -0
- tsp/dataloggers/test_files/Asc-1456As02.000 +2992 -0
- tsp/dataloggers/test_files/Asc-1457As02.000 +2917 -0
- tsp/dataloggers/test_files/BGC_BH15_019362_20140610_1253.hex +1729 -0
- tsp/dataloggers/test_files/Bin2944.csv +759 -0
- tsp/dataloggers/test_files/Bin5494.csv +2972 -0
- tsp/dataloggers/test_files/Bin6786.csv +272 -0
- tsp/dataloggers/test_files/FG2_399.csv +9881 -9881
- tsp/dataloggers/test_files/GP5W.csv +1121 -1121
- tsp/dataloggers/test_files/GP5W_260.csv +1884 -1884
- tsp/dataloggers/test_files/GP5W_270.csv +2210 -2210
- tsp/dataloggers/test_files/H08-030-08_HOBOware.csv +998 -998
- tsp/dataloggers/test_files/Minilog-II-T_350763_20190711_1.csv +2075 -0
- tsp/dataloggers/test_files/Minilog-II-T_350769_20190921_1.csv +6384 -0
- tsp/dataloggers/test_files/Minilog-II-T_354284_20190921_1.csv +4712 -0
- tsp/dataloggers/test_files/Minilog-T_7943_20140920_1.csv +5826 -0
- tsp/dataloggers/test_files/Minilog-T_8979_20140806_1.csv +2954 -0
- tsp/dataloggers/test_files/Minilog-T_975_20110824_1.csv +4343 -0
- tsp/dataloggers/test_files/RBR_01.dat +1046 -1046
- tsp/dataloggers/test_files/RBR_02.dat +2426 -2426
- tsp/dataloggers/test_files/RI03b_062831_20240905_1801.rsk +0 -0
- tsp/dataloggers/test_files/RI03b_062831_20240905_1801.xlsx +0 -0
- tsp/dataloggers/test_files/RSTDT2055.csv +2152 -2152
- tsp/dataloggers/test_files/U23-001_HOBOware.csv +1001 -1001
- tsp/dataloggers/test_files/hobo-negative-2.txt +6396 -6396
- tsp/dataloggers/test_files/hobo-negative-3.txt +5593 -5593
- tsp/dataloggers/test_files/hobo-positive-number-1.txt +1000 -1000
- tsp/dataloggers/test_files/hobo-positive-number-2.csv +1003 -1003
- tsp/dataloggers/test_files/hobo-positive-number-3.csv +1133 -1133
- tsp/dataloggers/test_files/hobo-positive-number-4.csv +1209 -1209
- tsp/dataloggers/test_files/hobo2.csv +8702 -8702
- tsp/dataloggers/test_files/hobo_1_AB.csv +21732 -21732
- tsp/dataloggers/test_files/hobo_1_AB_Details.txt +133 -133
- tsp/dataloggers/test_files/hobo_1_AB_classic.csv +4373 -4373
- tsp/dataloggers/test_files/hobo_1_AB_defaults.csv +21732 -21732
- tsp/dataloggers/test_files/hobo_1_AB_minimal.txt +1358 -1358
- tsp/dataloggers/test_files/hobo_1_AB_var2.csv +3189 -3189
- tsp/dataloggers/test_files/hobo_1_AB_var3.csv +2458 -2458
- tsp/dataloggers/test_files/logR_ULogC16-32_1.csv +106 -106
- tsp/dataloggers/test_files/logR_ULogC16-32_2.csv +100 -100
- tsp/dataloggers/test_files/mon_3_Ta_2010-08-18_2013-02-08.txt +21724 -21724
- tsp/dataloggers/test_files/rbr_001.dat +1133 -1133
- tsp/dataloggers/test_files/rbr_001.hex +1139 -1139
- tsp/dataloggers/test_files/rbr_001_no_comment.dat +1132 -1132
- tsp/dataloggers/test_files/rbr_001_no_comment.hex +1138 -1138
- tsp/dataloggers/test_files/rbr_002.dat +1179 -1179
- tsp/dataloggers/test_files/rbr_002.hex +1185 -1185
- tsp/dataloggers/test_files/rbr_003.hex +1292 -1292
- tsp/dataloggers/test_files/rbr_xl_001.DAT +1105 -1105
- tsp/dataloggers/test_files/rbr_xl_002.DAT +1126 -1126
- tsp/dataloggers/test_files/rbr_xl_003.DAT +4622 -4622
- tsp/dataloggers/test_files/rbr_xl_003.HEX +3587 -3587
- tsp/gtnp.py +148 -148
- tsp/labels.py +3 -3
- tsp/misc.py +90 -90
- tsp/physics.py +101 -101
- tsp/plots/static.py +388 -374
- tsp/readers.py +829 -548
- tsp/standardization/__init__.py +0 -0
- tsp/standardization/metadata.py +95 -0
- tsp/standardization/metadata_ref.py +0 -0
- tsp/standardization/validator.py +535 -0
- tsp/time.py +45 -45
- tsp/tspwarnings.py +27 -15
- tsp/utils.py +131 -101
- tsp/version.py +1 -1
- {tsp-1.8.1.dist-info → tsp-1.10.2.dist-info}/METADATA +95 -86
- tsp-1.10.2.dist-info/RECORD +132 -0
- {tsp-1.8.1.dist-info → tsp-1.10.2.dist-info}/licenses/LICENSE +674 -674
- {tsp-1.8.1.dist-info → tsp-1.10.2.dist-info}/top_level.txt +1 -0
- tsp-1.8.1.dist-info/RECORD +0 -94
- {tsp-1.8.1.dist-info → tsp-1.10.2.dist-info}/WHEEL +0 -0
tsp/readers.py
CHANGED
|
@@ -1,548 +1,829 @@
|
|
|
1
|
-
import datetime
|
|
2
|
-
import numpy as np
|
|
3
|
-
import pandas as pd
|
|
4
|
-
import
|
|
5
|
-
import
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
from
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
from tsp.dataloggers.
|
|
17
|
-
from tsp.dataloggers.
|
|
18
|
-
from tsp.dataloggers.
|
|
19
|
-
from tsp.dataloggers.
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
from tsp.
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
Path to
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
temp = temp[:,:,0,0]
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
if metadata_filepath is
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
"""
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
1
|
+
import datetime
|
|
2
|
+
import numpy as np
|
|
3
|
+
import pandas as pd
|
|
4
|
+
import datetime as dt
|
|
5
|
+
import re
|
|
6
|
+
import warnings
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
import netCDF4 as nc
|
|
10
|
+
except ModuleNotFoundError:
|
|
11
|
+
warnings.warn("Missing netCDF4 library. Some functionality will be limited.")
|
|
12
|
+
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Union, Optional, Callable
|
|
15
|
+
|
|
16
|
+
from tsp.dataloggers.Geoprecision import detect_geoprecision_type
|
|
17
|
+
from tsp.dataloggers.HOBO import HOBO, HOBOProperties
|
|
18
|
+
from tsp.dataloggers.logr import LogR, guessed_depths_ok
|
|
19
|
+
from tsp.dataloggers.RBRXL800 import RBRXL800
|
|
20
|
+
from tsp.dataloggers.RBRXR420 import RBRXR420
|
|
21
|
+
from tsp.dataloggers.Vemco import Vemco
|
|
22
|
+
import tsp.tspwarnings as tw
|
|
23
|
+
|
|
24
|
+
from tsp.core import TSP, IndexedTSP
|
|
25
|
+
from tsp.misc import _is_depth_column
|
|
26
|
+
from tsp.gtnp import GtnpMetadata
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def read_classic(filepath: str, init_file: "Optional[str]"=None) -> TSP:
|
|
30
|
+
"""Read output from CLASSIC land surface model
|
|
31
|
+
|
|
32
|
+
Depth values, if provided, represent the midpoint of the model cells.
|
|
33
|
+
|
|
34
|
+
Parameters
|
|
35
|
+
----------
|
|
36
|
+
filepath : str
|
|
37
|
+
Path to an output file
|
|
38
|
+
init_file : str
|
|
39
|
+
Path to a classic init file. If provided, depth values will be calculated. Otherwise an :py:class:`~tsp.core.IndexedTSP` is returned
|
|
40
|
+
|
|
41
|
+
Returns
|
|
42
|
+
-------
|
|
43
|
+
TSP
|
|
44
|
+
An IndexedTSP. Use :py:meth:`~tsp.core.IndexedTSP.set_depths` to provide depth information if init_file is not provided.
|
|
45
|
+
"""
|
|
46
|
+
try:
|
|
47
|
+
nc
|
|
48
|
+
except NameError:
|
|
49
|
+
warnings.warn("netCDF4 library must be installed.")
|
|
50
|
+
|
|
51
|
+
# tbaracc_d / tbaracc_m / tbaracc_y
|
|
52
|
+
with nc.Dataset(filepath, 'r') as ncdf:
|
|
53
|
+
lat = ncdf['lat'][:]
|
|
54
|
+
lon = ncdf['lon'][:]
|
|
55
|
+
temp = ncdf['tsl'][:] # t, z
|
|
56
|
+
|
|
57
|
+
try:
|
|
58
|
+
time = nc.num2date(ncdf['time'][:], ncdf['time'].units, ncdf['time'].calendar,
|
|
59
|
+
only_use_cftime_datetimes=False,
|
|
60
|
+
only_use_python_datetimes=True)
|
|
61
|
+
except ValueError:
|
|
62
|
+
cf_time = nc.num2date(ncdf['time'][:], ncdf['time'].units, ncdf['time'].calendar)
|
|
63
|
+
time = np.array([datetime.datetime.fromisoformat(t.isoformat()) for t in cf_time])
|
|
64
|
+
|
|
65
|
+
if init_file:
|
|
66
|
+
with nc.Dataset(init_file, 'r') as init:
|
|
67
|
+
delz = init["DELZ"][:]
|
|
68
|
+
depths = np.round(np.cumsum(delz) - np.multiply(delz, 0.5), 7) # delz precision is lower so we get some very small offsets
|
|
69
|
+
|
|
70
|
+
if len(lat) > 1:
|
|
71
|
+
warnings.warn("Multiple points in file. Returning the first one found.")
|
|
72
|
+
# TODO: return Ensemble if multiple points
|
|
73
|
+
lat = lat[0]
|
|
74
|
+
lon = lon[0]
|
|
75
|
+
temp = temp[:,:,0,0]
|
|
76
|
+
else:
|
|
77
|
+
temp = temp[:,:,0,0]
|
|
78
|
+
|
|
79
|
+
t = IndexedTSP(times=time,
|
|
80
|
+
values=temp,
|
|
81
|
+
latitude=lat,
|
|
82
|
+
longitude=lon,
|
|
83
|
+
metadata={"source_file": filepath})
|
|
84
|
+
|
|
85
|
+
if init_file:
|
|
86
|
+
t.set_depths(depths)
|
|
87
|
+
|
|
88
|
+
return t
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def read_csv(filepath: str,
|
|
92
|
+
datecol: "Union[str, int]",
|
|
93
|
+
datefmt: str = "%Y-%m-%d %H:%M:%S",
|
|
94
|
+
depth_pattern: "Union[str, dict]" = r"^(-?[0-9\.]+)$",
|
|
95
|
+
na_values:list = [],
|
|
96
|
+
**kwargs) -> TSP:
|
|
97
|
+
r"""Read an arbitrary CSV file
|
|
98
|
+
|
|
99
|
+
Date and time must be in a single column, and the csv must be in the
|
|
100
|
+
'wide' data format (each depth is a separate column)
|
|
101
|
+
|
|
102
|
+
Parameters
|
|
103
|
+
----------
|
|
104
|
+
filepath : str
|
|
105
|
+
Path to csv file
|
|
106
|
+
datecol : Union[str, int]
|
|
107
|
+
Either the numeric index (starting at 0) of date column (if int) or name of date column or regular expression (if str)
|
|
108
|
+
datefmt : str, optional
|
|
109
|
+
The format of the datetime values. Use `python strftime format codes <https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes>`_,
|
|
110
|
+
by default ``"%Y-%m-%d %H:%M:%S"``
|
|
111
|
+
depth_pattern : str or dict
|
|
112
|
+
If string: A regular expression that matches the column names with depths. The regular expression must
|
|
113
|
+
have a single capture group that extracts just the numeric part of the column header, by default r"^(-?[0-9\.]+)$".
|
|
114
|
+
If column names were in the form ``"+/-1.0_m"`` (i.e. included 'm' to denote units), you could use the regular expression ``r"^(-?[0-9\.]+)_m$"``
|
|
115
|
+
If a dictionary is passed, the keys must be the column names and the values are the depths. This is useful if the column names are not numeric.
|
|
116
|
+
na_values : list, optional
|
|
117
|
+
Additional strings to recognize as NA. Passed to pandas.read_csv, by default []
|
|
118
|
+
|
|
119
|
+
Returns
|
|
120
|
+
-------
|
|
121
|
+
TSP
|
|
122
|
+
A TSP
|
|
123
|
+
"""
|
|
124
|
+
raw = pd.read_csv(filepath, na_values=na_values, **kwargs)
|
|
125
|
+
|
|
126
|
+
if not datecol in raw.columns and isinstance(datecol, str):
|
|
127
|
+
datecol = [re.search(datecol, c).group(1) for c in raw.columns if re.search(datecol, c)][0]
|
|
128
|
+
|
|
129
|
+
if isinstance(datecol, int):
|
|
130
|
+
datecol = raw.columns[datecol]
|
|
131
|
+
|
|
132
|
+
time = pd.to_datetime(raw[datecol], format=datefmt).to_numpy()
|
|
133
|
+
|
|
134
|
+
if isinstance(depth_pattern, str):
|
|
135
|
+
depth = [re.search(depth_pattern, c).group(1) for c in raw.columns if _is_depth_column(c, depth_pattern)]
|
|
136
|
+
depth_numeric = np.array([float(d) for d in depth])
|
|
137
|
+
|
|
138
|
+
elif isinstance(depth_pattern, dict):
|
|
139
|
+
depth = [c for c in raw.columns if c in depth_pattern.keys()]
|
|
140
|
+
depth_numeric = [depth_pattern[c] for c in raw.columns if c in depth_pattern.keys()]
|
|
141
|
+
|
|
142
|
+
else:
|
|
143
|
+
raise ValueError("depth_pattern must be a string or dictionary")
|
|
144
|
+
|
|
145
|
+
values = raw.loc[:, depth].to_numpy()
|
|
146
|
+
|
|
147
|
+
t = TSP(time,
|
|
148
|
+
depth_numeric,
|
|
149
|
+
values,
|
|
150
|
+
metadata={"source_file": filepath})
|
|
151
|
+
|
|
152
|
+
return t
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def read_geoprecision(filepath: str) -> IndexedTSP:
|
|
156
|
+
"""Read a Geoprecision datalogger export (text file)
|
|
157
|
+
|
|
158
|
+
Reads GP5W- and FG2-style files from geoprecision.
|
|
159
|
+
|
|
160
|
+
Parameters
|
|
161
|
+
----------
|
|
162
|
+
filepath : str
|
|
163
|
+
Path to file.
|
|
164
|
+
|
|
165
|
+
Returns
|
|
166
|
+
-------
|
|
167
|
+
IndexedTSP
|
|
168
|
+
An IndexedTSP
|
|
169
|
+
"""
|
|
170
|
+
Reader = detect_geoprecision_type(filepath)
|
|
171
|
+
|
|
172
|
+
if Reader is None:
|
|
173
|
+
raise RuntimeError("Could not detect type of geoprecision file (GP5W or FG2 missing from header")
|
|
174
|
+
reader = Reader()
|
|
175
|
+
|
|
176
|
+
data = reader.read(filepath)
|
|
177
|
+
metadata = reader.META
|
|
178
|
+
metadata['_source_file'] = filepath
|
|
179
|
+
t = IndexedTSP(times=np.array(data['TIME'].dt.to_pydatetime()),
|
|
180
|
+
values=data.drop("TIME", axis=1).values,
|
|
181
|
+
metadata=metadata)
|
|
182
|
+
|
|
183
|
+
return t
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def read_geotop(file: str) -> TSP:
|
|
187
|
+
"""Read a GEOtop soil temperature output file
|
|
188
|
+
|
|
189
|
+
Parameters
|
|
190
|
+
----------
|
|
191
|
+
file : str
|
|
192
|
+
Path to file.
|
|
193
|
+
|
|
194
|
+
Returns
|
|
195
|
+
-------
|
|
196
|
+
TSP
|
|
197
|
+
A TSP
|
|
198
|
+
|
|
199
|
+
Description
|
|
200
|
+
-----------
|
|
201
|
+
Only the last run of the last simulation period is returned. This is because GEOtop outputs
|
|
202
|
+
all runs of all simulation periods in the same file. This function will only return the last
|
|
203
|
+
run of the last simulation period.
|
|
204
|
+
"""
|
|
205
|
+
with warnings.catch_warnings():
|
|
206
|
+
warnings.filterwarnings("ignore", category=tw.DuplicateTimesWarning)
|
|
207
|
+
|
|
208
|
+
t = read_csv(file,
|
|
209
|
+
na_values=[-9999.0],
|
|
210
|
+
datecol="^(Date.*)",
|
|
211
|
+
datefmt=r"%d/%m/%Y %H:%M",
|
|
212
|
+
depth_pattern=r"^(-?[0-9\.]+\s*)$")
|
|
213
|
+
|
|
214
|
+
t._depths *= 0.001 # Convert to [m]
|
|
215
|
+
|
|
216
|
+
# Only use last simulation period
|
|
217
|
+
# TODO: this could be improved
|
|
218
|
+
raw = pd.read_csv(file)
|
|
219
|
+
|
|
220
|
+
is_max_sim_period = raw['Simulation_Period'] == max( raw['Simulation_Period'])
|
|
221
|
+
is_last_run_in_max_sim_period = raw['Run'] = raw['Run'][is_max_sim_period].max()
|
|
222
|
+
last_run = np.logical_and(is_max_sim_period, is_last_run_in_max_sim_period)
|
|
223
|
+
|
|
224
|
+
last = TSP(times = t.times[last_run],
|
|
225
|
+
depths = t.depths,
|
|
226
|
+
values = t.values[last_run, :],
|
|
227
|
+
metadata={"_source_file": file,
|
|
228
|
+
"Simulation_Period": max(raw['Simulation_Period']),
|
|
229
|
+
"Run": max( raw['Run'] )
|
|
230
|
+
}
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
return last
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def read_gtnp(filename: str,
|
|
238
|
+
metadata_filepath=None,
|
|
239
|
+
autodetect_metadata=True) -> TSP:
|
|
240
|
+
"""Read test file from GTN-P database export
|
|
241
|
+
|
|
242
|
+
Parameters
|
|
243
|
+
----------
|
|
244
|
+
filename : str
|
|
245
|
+
Path to file.
|
|
246
|
+
metadata_file : str, optional
|
|
247
|
+
Path to GTN-P metadata file (), by default None
|
|
248
|
+
|
|
249
|
+
Returns
|
|
250
|
+
-------
|
|
251
|
+
TSP
|
|
252
|
+
A TSP
|
|
253
|
+
"""
|
|
254
|
+
|
|
255
|
+
t = read_csv(filename,
|
|
256
|
+
na_values=[-999.0],
|
|
257
|
+
datecol="Date/Depth",
|
|
258
|
+
datefmt="%Y-%m-%d %H:%M:%S",
|
|
259
|
+
depth_pattern=r"^(-?[0-9\.]+)$")
|
|
260
|
+
|
|
261
|
+
# try to automatically detect metadata file
|
|
262
|
+
if metadata_filepath is None and autodetect_metadata:
|
|
263
|
+
partial_name = Path(filename).stem
|
|
264
|
+
|
|
265
|
+
while partial_name:
|
|
266
|
+
test_metadata = Path(Path(filename).parent, partial_name).with_suffix(".metadata.txt")
|
|
267
|
+
|
|
268
|
+
if test_metadata.is_file():
|
|
269
|
+
metadata_filepath = test_metadata
|
|
270
|
+
break
|
|
271
|
+
else:
|
|
272
|
+
partial_name = partial_name[:-1]
|
|
273
|
+
|
|
274
|
+
if metadata_filepath is not None:
|
|
275
|
+
try:
|
|
276
|
+
meta = GtnpMetadata(metadata_filepath)
|
|
277
|
+
except Exception as e:
|
|
278
|
+
warnings.warn(f"Failed to read metadata file: {e}")
|
|
279
|
+
return t
|
|
280
|
+
t.metadata['raw'] = meta.raw
|
|
281
|
+
t.metadata['parsed'] = meta.parsed
|
|
282
|
+
|
|
283
|
+
# set time zone
|
|
284
|
+
tz = meta.get_timezone()
|
|
285
|
+
if tz:
|
|
286
|
+
t.set_utc_offset(int(tz.utcoffset(datetime.datetime.now()).total_seconds()))
|
|
287
|
+
|
|
288
|
+
# set location
|
|
289
|
+
t.latitude = meta.get_latitude() if meta.get_latitude() else None
|
|
290
|
+
t.longitude = meta.get_longitude() if meta.get_longitude() else None
|
|
291
|
+
|
|
292
|
+
t.metadata['_source_file'] = filename
|
|
293
|
+
|
|
294
|
+
return t
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
def read_gtpem(file: str) -> "list[TSP]":
|
|
298
|
+
output = list()
|
|
299
|
+
try:
|
|
300
|
+
with nc.Dataset(file) as ncdf:
|
|
301
|
+
n_sim = len(ncdf['geotop']['sitename'][:])
|
|
302
|
+
time = 1
|
|
303
|
+
for i, name in enumerate(ncdf['geotop']['sitename'][:]):
|
|
304
|
+
pass
|
|
305
|
+
#t = TSP()
|
|
306
|
+
except NameError:
|
|
307
|
+
warnings.warn("netCDF4 library must be installed.")
|
|
308
|
+
|
|
309
|
+
return output
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
def read_hoboware(filepath: str, hoboware_config: Optional[HOBOProperties]=None) -> IndexedTSP:
|
|
313
|
+
"""Read Onset HoboWare datalogger exports
|
|
314
|
+
|
|
315
|
+
Parameters
|
|
316
|
+
----------
|
|
317
|
+
filepath : str
|
|
318
|
+
Path to a file
|
|
319
|
+
hoboware_config : HOBOProperties, optional
|
|
320
|
+
A HOBOProperties object with information about how the file is configured. If not
|
|
321
|
+
provided, the configuration will be automatically detected if possible, by default None
|
|
322
|
+
|
|
323
|
+
Returns
|
|
324
|
+
-------
|
|
325
|
+
IndexedTSP
|
|
326
|
+
An IndexedTSP. Use the `set_depths` method to provide depth information
|
|
327
|
+
"""
|
|
328
|
+
reader = HOBO(properties=hoboware_config)
|
|
329
|
+
data = reader.read(filepath)
|
|
330
|
+
|
|
331
|
+
metadata = reader.META
|
|
332
|
+
metadata['_source_file'] = filepath
|
|
333
|
+
|
|
334
|
+
t = IndexedTSP(times=data['TIME'],
|
|
335
|
+
values=data.drop("TIME", axis=1).values,
|
|
336
|
+
metadata=metadata)
|
|
337
|
+
|
|
338
|
+
return t
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
def read_logr(filepath: str, cfg_txt: str = None) -> "Union[IndexedTSP,TSP]":
|
|
342
|
+
"""Read a LogR datalogger export (text file)
|
|
343
|
+
|
|
344
|
+
Reads LogR ULogC16-32 files.
|
|
345
|
+
|
|
346
|
+
Parameters
|
|
347
|
+
----------
|
|
348
|
+
filepath : str
|
|
349
|
+
Path to file.
|
|
350
|
+
cfg_txt : str, optional
|
|
351
|
+
Path of the config text file containing of the logger. Required if raw is True.
|
|
352
|
+
|
|
353
|
+
Returns
|
|
354
|
+
-------
|
|
355
|
+
IndexedTSP, TSP
|
|
356
|
+
An IndexedTSP or TSP, depending on whether the depth labels are sensible
|
|
357
|
+
"""
|
|
358
|
+
r = LogR()
|
|
359
|
+
data = r.read(file=filepath, cfg_txt=cfg_txt)
|
|
360
|
+
times = np.array(np.array(data['TIME'].dt.to_pydatetime()))
|
|
361
|
+
channels = pd.Series(data.columns).str.match("^CH")
|
|
362
|
+
values = data.loc[:, channels.to_numpy()]
|
|
363
|
+
metadata = r.META
|
|
364
|
+
metadata['_source_file'] = filepath
|
|
365
|
+
|
|
366
|
+
if guessed_depths_ok(metadata['guessed_depths'], sum(channels)):
|
|
367
|
+
t = TSP(times=times,
|
|
368
|
+
depths=metadata['guessed_depths'][-sum(channels):],
|
|
369
|
+
values=values.values,)
|
|
370
|
+
|
|
371
|
+
else:
|
|
372
|
+
warnings.warn(f"Could not convert all channel labels into numeric depths."
|
|
373
|
+
"Use the set_depths() method to specify observation depths."
|
|
374
|
+
"Guessed depths can be accessed from .metadata['guessed_depths'].")
|
|
375
|
+
|
|
376
|
+
t = IndexedTSP(times=times,
|
|
377
|
+
values=values.values,
|
|
378
|
+
metadata = metadata)
|
|
379
|
+
|
|
380
|
+
return t
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
def read_netcdf(file:str, standard_name='temperature_in_ground') -> TSP:
|
|
384
|
+
"""Read a CF-compliant netCDF file
|
|
385
|
+
|
|
386
|
+
Parameters
|
|
387
|
+
----------
|
|
388
|
+
file : str
|
|
389
|
+
Path to netCDF file.
|
|
390
|
+
standard_name : str, optional
|
|
391
|
+
The standard name of the data variable, by default 'temperature_in_ground'.
|
|
392
|
+
'soil_temperature' is also common.
|
|
393
|
+
|
|
394
|
+
The file must represent data from a single location
|
|
395
|
+
A single time variable (with attribute 'axis=T') must be present.
|
|
396
|
+
A single depth variable (with attribute 'axis=Z') must be present.
|
|
397
|
+
A single data variable (with 'temperature_in_ground' or '' 'standard name' either ) must be present.
|
|
398
|
+
|
|
399
|
+
"""
|
|
400
|
+
try:
|
|
401
|
+
with nc.Dataset(file) as ncdf:
|
|
402
|
+
globals = {k: v for k, v in ncdf.__dict__.items() if not k.startswith("_")}
|
|
403
|
+
|
|
404
|
+
# Checks - global attributes
|
|
405
|
+
if not globals.get("featureType", "").lower() == "timeseriesprofile":
|
|
406
|
+
warnings.warn("featureType is not a time series profile")
|
|
407
|
+
|
|
408
|
+
# Checks - data
|
|
409
|
+
time = ncdf.get_variables_by_attributes(axis='T')
|
|
410
|
+
if len(time) == 0:
|
|
411
|
+
raise ValueError("No time variable (with attribute 'axis=T') found")
|
|
412
|
+
if len(time) > 1:
|
|
413
|
+
raise ValueError("More than one time variable (with attribute 'axis=T') found")
|
|
414
|
+
|
|
415
|
+
if not 'units' in time[0].ncattrs():
|
|
416
|
+
raise ValueError("Time variable does not have a 'units' attribute")
|
|
417
|
+
if not 'calendar' in time[0].ncattrs():
|
|
418
|
+
raise ValueError("Time variable does not have a 'calendar' attribute")
|
|
419
|
+
|
|
420
|
+
depth = ncdf.get_variables_by_attributes(axis='Z')
|
|
421
|
+
if len(depth) == 0:
|
|
422
|
+
raise ValueError("No depth variable (with attribute 'axis=Z') found")
|
|
423
|
+
if len(depth) > 1:
|
|
424
|
+
raise ValueError("More than one depth variable (with attribute 'axis=Z') found")
|
|
425
|
+
|
|
426
|
+
temperature = ncdf.get_variables_by_attributes(standard_name=lambda x: x in ['temperature_in_ground', 'soil_temperature'])
|
|
427
|
+
if len(temperature) == 0:
|
|
428
|
+
raise ValueError("No temperature variable (with standard name 'temperature_in_ground' or 'soil_temperature') found")
|
|
429
|
+
if len(temperature) > 1:
|
|
430
|
+
raise ValueError("More than one temperature variable (with standard name 'temperature_in_ground' or 'soil_temperature') found")
|
|
431
|
+
|
|
432
|
+
# Get data
|
|
433
|
+
times = nc.num2date(time[0][:],
|
|
434
|
+
units=time[0].units,
|
|
435
|
+
calendar=time[0].calendar,
|
|
436
|
+
only_use_cftime_datetimes=False,
|
|
437
|
+
only_use_python_datetimes=True)
|
|
438
|
+
depths = np.round(np.array(depth[0][:], dtype='float64'), 5)
|
|
439
|
+
values = temperature[0][:]
|
|
440
|
+
|
|
441
|
+
except NameError:
|
|
442
|
+
warnings.warn("netCDF4 library must be installed.")
|
|
443
|
+
return None
|
|
444
|
+
|
|
445
|
+
except ValueError as e:
|
|
446
|
+
warnings.warn(f"File does not meet formatting requirements: ({e})")
|
|
447
|
+
return None
|
|
448
|
+
|
|
449
|
+
metadata = {"CF":globals,
|
|
450
|
+
"_source_file": file}
|
|
451
|
+
|
|
452
|
+
t = TSP(times=times, depths=depths, values=values, metadata=metadata)
|
|
453
|
+
|
|
454
|
+
return t
|
|
455
|
+
|
|
456
|
+
|
|
457
|
+
def read_ntgs_gtr(filename: str) -> TSP:
|
|
458
|
+
"""Read a file from the NTGS permafrost ground temperature report
|
|
459
|
+
|
|
460
|
+
Parameters
|
|
461
|
+
----------
|
|
462
|
+
filename : str
|
|
463
|
+
Path to file.
|
|
464
|
+
|
|
465
|
+
Returns
|
|
466
|
+
-------
|
|
467
|
+
TSP
|
|
468
|
+
A TSP
|
|
469
|
+
"""
|
|
470
|
+
if Path(filename).suffix == ".csv":
|
|
471
|
+
try:
|
|
472
|
+
raw = pd.read_csv(filename,
|
|
473
|
+
keep_default_na=False,na_values=[''],
|
|
474
|
+
parse_dates={"time": ["date_YYYY-MM-DD","time_HH:MM:SS"]})
|
|
475
|
+
except IndexError:
|
|
476
|
+
raise IndexError("There are insufficient columns, the file format is invalid.")
|
|
477
|
+
elif Path(filename).suffix in [".xls", ".xlsx"]:
|
|
478
|
+
try:
|
|
479
|
+
raw = pd.read_excel(filename,
|
|
480
|
+
sheet_name=1, parse_dates=False)
|
|
481
|
+
# Avoid any excel date nonsense
|
|
482
|
+
safe_date = raw.pop('date_YYYY-MM-DD').astype(str).str.extract(r"([0-9]{4}-[0-9]{2}-[0-9]{2})")
|
|
483
|
+
safe_time = raw.pop('time_HH:MM:SS').astype(str).str.extract(r"([0-9]{2}:[0-9]{2}:[0-9]{2})")
|
|
484
|
+
raw.insert(0, 'time', safe_date[0] + " " + safe_time[0])
|
|
485
|
+
raw['time'] = pd.to_datetime(raw['time'], format="%Y-%m-%d %H:%M:%S")
|
|
486
|
+
except IndexError:
|
|
487
|
+
raise IndexError("There are insufficient columns, the file format is invalid.")
|
|
488
|
+
else:
|
|
489
|
+
raise TypeError("Unsupported file extension.")
|
|
490
|
+
|
|
491
|
+
metadata = {
|
|
492
|
+
'project_name': raw['project_name'].values[0],
|
|
493
|
+
'_site_id': raw['site_id'].values[0],
|
|
494
|
+
'_latitude': raw['latitude'].values[0],
|
|
495
|
+
'_longitude': raw['longitude'].values[0],
|
|
496
|
+
'_source_file': filename
|
|
497
|
+
}
|
|
498
|
+
match_depths = [c for c in [re.search(r"(-?[0-9\.]+)_m$", C) for C in raw.columns] if c]
|
|
499
|
+
values = raw.loc[:, [d.group(0) for d in match_depths]].values
|
|
500
|
+
times = np.array(raw['time'].dt.to_pydatetime())
|
|
501
|
+
|
|
502
|
+
t = TSP(times=times,
|
|
503
|
+
depths=[float(d.group(1)) for d in match_depths],
|
|
504
|
+
values=values,
|
|
505
|
+
latitude=raw['latitude'].values[0],
|
|
506
|
+
longitude=raw['longitude'].values[0],
|
|
507
|
+
site_id=raw['site_id'].values[0],
|
|
508
|
+
metadata=metadata)
|
|
509
|
+
|
|
510
|
+
return t
|
|
511
|
+
|
|
512
|
+
|
|
513
|
+
def read_ntgs_db(filename:str) -> dict[str, TSP]:
|
|
514
|
+
"""Read a file from the NTGS permafrost database export
|
|
515
|
+
|
|
516
|
+
Parameters
|
|
517
|
+
----------
|
|
518
|
+
filename : str
|
|
519
|
+
Path to file.
|
|
520
|
+
|
|
521
|
+
Returns
|
|
522
|
+
-------
|
|
523
|
+
dict[str, TSP]
|
|
524
|
+
A dictionary of TSPs, keyed by SITE_ID
|
|
525
|
+
"""
|
|
526
|
+
df = pd.read_csv(filename, parse_dates=['MEASUREMENT_DATETIME'])
|
|
527
|
+
grouped = df.groupby("SITE_ID")
|
|
528
|
+
wide_dict = {name:__parse_ntgs_db_df(data, site_id=name) for name, data in grouped}
|
|
529
|
+
|
|
530
|
+
for name, tsp_obj in wide_dict.items():
|
|
531
|
+
tsp_obj.metadata['_source_file'] = filename
|
|
532
|
+
tsp_obj.metadata['_site_id'] = name
|
|
533
|
+
|
|
534
|
+
return wide_dict
|
|
535
|
+
|
|
536
|
+
|
|
537
|
+
def read_ntgs_db_single(filename:str,
|
|
538
|
+
select = None,
|
|
539
|
+
duplicate_depths='mean') -> TSP:
|
|
540
|
+
"""Read a file from the NTGS permafrost database export with a single TSP output
|
|
541
|
+
Parameters
|
|
542
|
+
----------
|
|
543
|
+
filename : str
|
|
544
|
+
Path to file.
|
|
545
|
+
select : str, int, optional
|
|
546
|
+
How to handle multiple SITE_IDs in the file. If an integer, it is treated as the index of the SITE_ID to use (0-based).
|
|
547
|
+
If a string, treat it as the site ID to use. If None, an error is raised if multiple SITE_IDs are found.
|
|
548
|
+
duplicate_depths : str, optional
|
|
549
|
+
How to handle duplicate depth measurements. Options are 'mean' (default), 'error'
|
|
550
|
+
Returns
|
|
551
|
+
-------
|
|
552
|
+
TSP
|
|
553
|
+
A TSP
|
|
554
|
+
"""
|
|
555
|
+
df = pd.read_csv(filename)
|
|
556
|
+
|
|
557
|
+
if len(df['SITE_ID'].unique()) > 1 and select is None:
|
|
558
|
+
raise ValueError("Multiple SITE_IDs found in file.")
|
|
559
|
+
elif len(df['SITE_ID'].unique()) > 1 and isinstance(select, int):
|
|
560
|
+
df = df[df['SITE_ID'] == df['SITE_ID'].unique()[select]]
|
|
561
|
+
elif len(df['SITE_ID'].unique()) > 1 and isinstance(select, str):
|
|
562
|
+
df = df[df['SITE_ID'] == select]
|
|
563
|
+
|
|
564
|
+
metadata = {'_source_file': filename,
|
|
565
|
+
'_site_id': df['SITE_ID'].unique()[0]}
|
|
566
|
+
|
|
567
|
+
t = __parse_ntgs_db_df(df, duplicate_depths=duplicate_depths, site_id=metadata['_site_id'])
|
|
568
|
+
t.metadata.update(metadata)
|
|
569
|
+
return t
|
|
570
|
+
|
|
571
|
+
|
|
572
|
+
def __parse_ntgs_db_df(df:pd.DataFrame, site_id=None, duplicate_depths='mean') -> TSP:
|
|
573
|
+
wide = df.pivot_table(index='MEASUREMENT_DATETIME',
|
|
574
|
+
columns='DEPTH_M',
|
|
575
|
+
values='TEMPERATURE_C',
|
|
576
|
+
aggfunc=duplicate_depths).reset_index()
|
|
577
|
+
|
|
578
|
+
times = wide.pop('MEASUREMENT_DATETIME').to_numpy()
|
|
579
|
+
depths = wide.columns.to_numpy().astype(float)
|
|
580
|
+
values = wide.to_numpy()
|
|
581
|
+
|
|
582
|
+
t = TSP(times=times,
|
|
583
|
+
depths=depths,
|
|
584
|
+
values=values,
|
|
585
|
+
site_id=site_id)
|
|
586
|
+
|
|
587
|
+
return t
|
|
588
|
+
|
|
589
|
+
|
|
590
|
+
def read_ntgs(filename: str, allow_multiple_sites=False) -> TSP | dict[str, TSP]:
|
|
591
|
+
"""Read a NTGS file.
|
|
592
|
+
|
|
593
|
+
Parameters
|
|
594
|
+
----------
|
|
595
|
+
filename : str
|
|
596
|
+
Path to file.
|
|
597
|
+
|
|
598
|
+
Returns
|
|
599
|
+
-------
|
|
600
|
+
TSP | dict[str, TSP]
|
|
601
|
+
A TSP or a dictionary of TSPs with SITE_ID as keys if multiple SITE_IDs are found and `allow_multiple_sites` is True.
|
|
602
|
+
|
|
603
|
+
Description
|
|
604
|
+
-----------
|
|
605
|
+
Attempts to read the file as a ground temperature report file first. If that fails, attempts to read
|
|
606
|
+
it as a database export. If multiple SITE_IDs are found in the database export,
|
|
607
|
+
a dictionary of TSPs is returned if `allow_multiple_sites` is True.
|
|
608
|
+
"""
|
|
609
|
+
try:
|
|
610
|
+
return read_ntgs_gtr(filename)
|
|
611
|
+
except Exception:
|
|
612
|
+
dict_t = read_ntgs_db(filename)
|
|
613
|
+
|
|
614
|
+
if len(dict_t.keys()) == 1:
|
|
615
|
+
return list(dict_t.values())[0]
|
|
616
|
+
|
|
617
|
+
elif allow_multiple_sites:
|
|
618
|
+
return dict_t
|
|
619
|
+
|
|
620
|
+
else:
|
|
621
|
+
raise ValueError(f"Found {len(dict_t.keys())} unique SITE_ID values in file. "
|
|
622
|
+
"Use read_ntgs_db() or set `allow_multiple_sites=True` to return all sites as a dictionary.")
|
|
623
|
+
|
|
624
|
+
|
|
625
|
+
|
|
626
|
+
def read_rbr(file_path: str) -> IndexedTSP:
|
|
627
|
+
"""
|
|
628
|
+
|
|
629
|
+
Parameters
|
|
630
|
+
----------
|
|
631
|
+
filepath
|
|
632
|
+
|
|
633
|
+
Returns
|
|
634
|
+
-------
|
|
635
|
+
|
|
636
|
+
"""
|
|
637
|
+
file_extention = Path(file_path).suffix.lower()
|
|
638
|
+
if file_extention in [".dat", ".hex"]:
|
|
639
|
+
with open(file_path, "r") as f:
|
|
640
|
+
first_line = f.readline()
|
|
641
|
+
model = first_line.split()[1]
|
|
642
|
+
if model == "XL-800":
|
|
643
|
+
r = RBRXL800()
|
|
644
|
+
elif model in ["XR-420", "XR-420-T8"]:
|
|
645
|
+
r = RBRXR420()
|
|
646
|
+
else:
|
|
647
|
+
raise ValueError(f"logger model {model} unsupported")
|
|
648
|
+
data = r.read(file_path)
|
|
649
|
+
elif file_extention in [".xls", ".xlsx", ".rsk"]:
|
|
650
|
+
r = RBRXR420()
|
|
651
|
+
data = r.read(file_path)
|
|
652
|
+
else:
|
|
653
|
+
raise IOError("File is not .dat, .hex, .xls, .xlsx, or .rsk")
|
|
654
|
+
|
|
655
|
+
times = np.array(data['TIME'].dt.to_pydatetime())
|
|
656
|
+
channels = pd.Series(data.columns).str.match("^ch")
|
|
657
|
+
values = data.loc[:, channels.to_numpy()]
|
|
658
|
+
|
|
659
|
+
metadata = r.META
|
|
660
|
+
metadata['_source_file'] = file_path
|
|
661
|
+
|
|
662
|
+
t = IndexedTSP(times=times, values=values.values, metadata=metadata)
|
|
663
|
+
if "utc_offset" in list(r.META.keys()):
|
|
664
|
+
t.set_utc_offset(r.META["utc_offset"])
|
|
665
|
+
|
|
666
|
+
return t
|
|
667
|
+
|
|
668
|
+
|
|
669
|
+
def read_vemco(file_path: str) -> IndexedTSP:
|
|
670
|
+
"""
|
|
671
|
+
|
|
672
|
+
Parameters
|
|
673
|
+
----------
|
|
674
|
+
filepath
|
|
675
|
+
|
|
676
|
+
Returns
|
|
677
|
+
-------
|
|
678
|
+
|
|
679
|
+
"""
|
|
680
|
+
file_extention = Path(file_path).suffix.lower()
|
|
681
|
+
if file_extention in [".000", ".csv"]:
|
|
682
|
+
r = Vemco()
|
|
683
|
+
data = r.read(file_path)
|
|
684
|
+
else:
|
|
685
|
+
raise IOError("File is not .000, .csv")
|
|
686
|
+
|
|
687
|
+
times = np.array(data['TIME'].dt.to_pydatetime())
|
|
688
|
+
channels = pd.Series(data.columns).str.match("^TEMP")
|
|
689
|
+
values = data.loc[:, channels.to_numpy()]
|
|
690
|
+
|
|
691
|
+
metadata = r.META
|
|
692
|
+
metadata['_source_file'] = file_path
|
|
693
|
+
|
|
694
|
+
t = IndexedTSP(times=times, values=values.values, metadata=metadata)
|
|
695
|
+
if "utc_offset" in list(r.META.keys()):
|
|
696
|
+
t.set_utc_offset(r.META["utc_offset"].seconds)
|
|
697
|
+
return t
|
|
698
|
+
|
|
699
|
+
|
|
700
|
+
def to_native_datetime(timestamp: pd.Timestamp) -> dt.datetime:
|
|
701
|
+
return timestamp.to_pydatetime()
|
|
702
|
+
|
|
703
|
+
|
|
704
|
+
def read_permos(filepath:str) -> TSP:
|
|
705
|
+
"""Read file from PERMOS database export
|
|
706
|
+
|
|
707
|
+
Parameters
|
|
708
|
+
----------
|
|
709
|
+
filename : str
|
|
710
|
+
Path to file.
|
|
711
|
+
|
|
712
|
+
Returns
|
|
713
|
+
-------
|
|
714
|
+
TSP
|
|
715
|
+
A TSP
|
|
716
|
+
|
|
717
|
+
Used for data obtained from PERMOS (permos.ch/data-portal/permafrost-temperature-and-active-layer)
|
|
718
|
+
"""
|
|
719
|
+
try:
|
|
720
|
+
raw = pd.read_csv(filepath,
|
|
721
|
+
index_col=0,
|
|
722
|
+
parse_dates=True)
|
|
723
|
+
except IndexError:
|
|
724
|
+
raise IndexError("There are insufficient columns, the file format is invalid.")
|
|
725
|
+
metadata = {
|
|
726
|
+
'_source_file': filepath
|
|
727
|
+
}
|
|
728
|
+
t = TSP(times=raw.index,
|
|
729
|
+
depths=[float(C) for C in raw.columns],
|
|
730
|
+
values=raw.values,
|
|
731
|
+
metadata=metadata)
|
|
732
|
+
|
|
733
|
+
return t
|
|
734
|
+
|
|
735
|
+
def read_tsp(filepath: str) -> TSP:
|
|
736
|
+
"""Read a TSP-style ground temperature file
|
|
737
|
+
|
|
738
|
+
Parameters
|
|
739
|
+
----------
|
|
740
|
+
filepath : str
|
|
741
|
+
Path to file.
|
|
742
|
+
|
|
743
|
+
Returns
|
|
744
|
+
-------
|
|
745
|
+
TSP
|
|
746
|
+
A TSP
|
|
747
|
+
"""
|
|
748
|
+
f, n, m = _tsp_format_parse(filepath)
|
|
749
|
+
t = f(filepath, n)
|
|
750
|
+
return t
|
|
751
|
+
|
|
752
|
+
def _read_tsp_wide(filepath: str, n_skip) -> TSP:
|
|
753
|
+
"""Read a wide-format TSP file
|
|
754
|
+
|
|
755
|
+
Parameters
|
|
756
|
+
----------
|
|
757
|
+
filepath : str
|
|
758
|
+
Path to file.
|
|
759
|
+
|
|
760
|
+
Returns
|
|
761
|
+
-------
|
|
762
|
+
TSP
|
|
763
|
+
A TSP
|
|
764
|
+
"""
|
|
765
|
+
t = read_csv(filepath,
|
|
766
|
+
datecol="timestamp",
|
|
767
|
+
datefmt=None,
|
|
768
|
+
skiprows=n_skip,
|
|
769
|
+
depth_pattern=r"^(-?[0-9\.]+)$")
|
|
770
|
+
return t
|
|
771
|
+
|
|
772
|
+
def _read_tsp_long(filepath: str, n_skip) -> TSP:
|
|
773
|
+
"""Read a long-format TSP file
|
|
774
|
+
|
|
775
|
+
Parameters
|
|
776
|
+
----------
|
|
777
|
+
filepath : str
|
|
778
|
+
Path to file.
|
|
779
|
+
|
|
780
|
+
Returns
|
|
781
|
+
-------
|
|
782
|
+
TSP
|
|
783
|
+
A TSP
|
|
784
|
+
"""
|
|
785
|
+
df = pd.read_csv(filepath, skiprows=n_skip)
|
|
786
|
+
time = pd.to_datetime(df['timestamp'], format=None).to_numpy()
|
|
787
|
+
depth = df['depth'].to_numpy().astype(float)
|
|
788
|
+
values = df['temperature'].to_numpy()
|
|
789
|
+
|
|
790
|
+
t = TSP.from_tidy_format(time,
|
|
791
|
+
depth,
|
|
792
|
+
values,
|
|
793
|
+
metadata={"_source_file": filepath})
|
|
794
|
+
|
|
795
|
+
return t
|
|
796
|
+
|
|
797
|
+
def _tsp_format_parse(filepath:str) -> tuple[Callable, int, list[str]]:
|
|
798
|
+
"""Determine the format of a TSP file
|
|
799
|
+
|
|
800
|
+
Parameters
|
|
801
|
+
----------
|
|
802
|
+
filepath : str
|
|
803
|
+
Path to file.
|
|
804
|
+
|
|
805
|
+
Returns
|
|
806
|
+
-------
|
|
807
|
+
function
|
|
808
|
+
The function to use to read the file
|
|
809
|
+
int
|
|
810
|
+
The number of header lines to skip
|
|
811
|
+
"""
|
|
812
|
+
func = None
|
|
813
|
+
n_skip = 0
|
|
814
|
+
metadata_lines = []
|
|
815
|
+
|
|
816
|
+
with open(filepath, 'r') as f:
|
|
817
|
+
while func is None:
|
|
818
|
+
line = f.readline()
|
|
819
|
+
if line.startswith("#"):
|
|
820
|
+
n_skip += 1
|
|
821
|
+
metadata_lines.append(line)
|
|
822
|
+
elif line.startswith("timestamp,depth"):
|
|
823
|
+
func = _read_tsp_long
|
|
824
|
+
elif line.startswith("timestamp,"):
|
|
825
|
+
func = _read_tsp_wide
|
|
826
|
+
else:
|
|
827
|
+
raise ValueError("File is not a valid TSP file")
|
|
828
|
+
|
|
829
|
+
return func, n_skip, metadata_lines
|