tsp 1.8.1__py3-none-any.whl → 1.10.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. tsp/__init__.py +11 -11
  2. tsp/__meta__.py +1 -1
  3. tsp/concatenation.py +159 -153
  4. tsp/core.py +1306 -1162
  5. tsp/data/2023-01-06_755-test-Dataset_2031-Constant_Over_Interval-Hourly-Ground_Temperature-Thermistor_Automated.timeserie.csv +4 -4
  6. tsp/data/2023-01-06_755-test.metadata.txt +208 -208
  7. tsp/data/NTGS_example_csv.csv +6 -6
  8. tsp/data/NTGS_example_slash_dates.csv +6 -6
  9. tsp/data/NTGS_gtr_example_excel.xlsx +0 -0
  10. tsp/data/example_geotop.csv +5240 -5240
  11. tsp/data/example_gtnp.csv +1298 -1298
  12. tsp/data/example_permos.csv +7 -7
  13. tsp/data/ntgs-db-multi.txt +3872 -0
  14. tsp/data/ntgs-db-single.txt +2251 -0
  15. tsp/data/test_geotop_has_space.txt +5 -5
  16. tsp/data/tsp_format_long.csv +10 -0
  17. tsp/data/tsp_format_wide_1.csv +7 -0
  18. tsp/data/tsp_format_wide_2.csv +7 -0
  19. tsp/dataloggers/AbstractReader.py +43 -43
  20. tsp/dataloggers/FG2.py +110 -110
  21. tsp/dataloggers/GP5W.py +114 -114
  22. tsp/dataloggers/Geoprecision.py +34 -34
  23. tsp/dataloggers/HOBO.py +930 -914
  24. tsp/dataloggers/RBRXL800.py +190 -190
  25. tsp/dataloggers/RBRXR420.py +371 -308
  26. tsp/dataloggers/Vemco.py +84 -0
  27. tsp/dataloggers/__init__.py +15 -15
  28. tsp/dataloggers/logr.py +196 -115
  29. tsp/dataloggers/test_files/004448.DAT +2543 -2543
  30. tsp/dataloggers/test_files/004531.DAT +17106 -17106
  31. tsp/dataloggers/test_files/004531.HEX +3587 -3587
  32. tsp/dataloggers/test_files/004534.HEX +3587 -3587
  33. tsp/dataloggers/test_files/010252.dat +1731 -1731
  34. tsp/dataloggers/test_files/010252.hex +1739 -1739
  35. tsp/dataloggers/test_files/010274.hex +1291 -1291
  36. tsp/dataloggers/test_files/010278.hex +3544 -3544
  37. tsp/dataloggers/test_files/012064.dat +1286 -1286
  38. tsp/dataloggers/test_files/012064.hex +1294 -1294
  39. tsp/dataloggers/test_files/012064_modified_start.hex +1294 -0
  40. tsp/dataloggers/test_files/012081.hex +3532 -3532
  41. tsp/dataloggers/test_files/013138_recovery_stamp.hex +1123 -0
  42. tsp/dataloggers/test_files/014037-2007.hex +95 -0
  43. tsp/dataloggers/test_files/019360_20160918_1146_SlumpIslandTopofHill.hex +11253 -0
  44. tsp/dataloggers/test_files/019360_20160918_1146_SlumpIslandTopofHill.xls +0 -0
  45. tsp/dataloggers/test_files/07B1592.DAT +1483 -1483
  46. tsp/dataloggers/test_files/07B1592.HEX +1806 -1806
  47. tsp/dataloggers/test_files/07B4450.DAT +2234 -2234
  48. tsp/dataloggers/test_files/07B4450.HEX +2559 -2559
  49. tsp/dataloggers/test_files/2022018_2025-09-18T22-16-16.txt +36 -0
  50. tsp/dataloggers/test_files/2022018_2025-09-18T22-16-16_raw.csv +2074 -0
  51. tsp/dataloggers/test_files/2022018_2025-09-18T22-16-16_temp.csv +2074 -0
  52. tsp/dataloggers/test_files/2025004_2025-12-02T17-07-28_cfg.txt +30 -0
  53. tsp/dataloggers/test_files/2025004_2025-12-02T17-07-28_raw.csv +35 -0
  54. tsp/dataloggers/test_files/2025004_2025-12-02T17-07-28_temp.csv +35 -0
  55. tsp/dataloggers/test_files/204087.xlsx +0 -0
  56. tsp/dataloggers/test_files/Asc-1455As02.000 +2982 -0
  57. tsp/dataloggers/test_files/Asc-1456As02.000 +2992 -0
  58. tsp/dataloggers/test_files/Asc-1457As02.000 +2917 -0
  59. tsp/dataloggers/test_files/BGC_BH15_019362_20140610_1253.hex +1729 -0
  60. tsp/dataloggers/test_files/Bin2944.csv +759 -0
  61. tsp/dataloggers/test_files/Bin5494.csv +2972 -0
  62. tsp/dataloggers/test_files/Bin6786.csv +272 -0
  63. tsp/dataloggers/test_files/FG2_399.csv +9881 -9881
  64. tsp/dataloggers/test_files/GP5W.csv +1121 -1121
  65. tsp/dataloggers/test_files/GP5W_260.csv +1884 -1884
  66. tsp/dataloggers/test_files/GP5W_270.csv +2210 -2210
  67. tsp/dataloggers/test_files/H08-030-08_HOBOware.csv +998 -998
  68. tsp/dataloggers/test_files/Minilog-II-T_350763_20190711_1.csv +2075 -0
  69. tsp/dataloggers/test_files/Minilog-II-T_350769_20190921_1.csv +6384 -0
  70. tsp/dataloggers/test_files/Minilog-II-T_354284_20190921_1.csv +4712 -0
  71. tsp/dataloggers/test_files/Minilog-T_7943_20140920_1.csv +5826 -0
  72. tsp/dataloggers/test_files/Minilog-T_8979_20140806_1.csv +2954 -0
  73. tsp/dataloggers/test_files/Minilog-T_975_20110824_1.csv +4343 -0
  74. tsp/dataloggers/test_files/RBR_01.dat +1046 -1046
  75. tsp/dataloggers/test_files/RBR_02.dat +2426 -2426
  76. tsp/dataloggers/test_files/RI03b_062831_20240905_1801.rsk +0 -0
  77. tsp/dataloggers/test_files/RI03b_062831_20240905_1801.xlsx +0 -0
  78. tsp/dataloggers/test_files/RSTDT2055.csv +2152 -2152
  79. tsp/dataloggers/test_files/U23-001_HOBOware.csv +1001 -1001
  80. tsp/dataloggers/test_files/hobo-negative-2.txt +6396 -6396
  81. tsp/dataloggers/test_files/hobo-negative-3.txt +5593 -5593
  82. tsp/dataloggers/test_files/hobo-positive-number-1.txt +1000 -1000
  83. tsp/dataloggers/test_files/hobo-positive-number-2.csv +1003 -1003
  84. tsp/dataloggers/test_files/hobo-positive-number-3.csv +1133 -1133
  85. tsp/dataloggers/test_files/hobo-positive-number-4.csv +1209 -1209
  86. tsp/dataloggers/test_files/hobo2.csv +8702 -8702
  87. tsp/dataloggers/test_files/hobo_1_AB.csv +21732 -21732
  88. tsp/dataloggers/test_files/hobo_1_AB_Details.txt +133 -133
  89. tsp/dataloggers/test_files/hobo_1_AB_classic.csv +4373 -4373
  90. tsp/dataloggers/test_files/hobo_1_AB_defaults.csv +21732 -21732
  91. tsp/dataloggers/test_files/hobo_1_AB_minimal.txt +1358 -1358
  92. tsp/dataloggers/test_files/hobo_1_AB_var2.csv +3189 -3189
  93. tsp/dataloggers/test_files/hobo_1_AB_var3.csv +2458 -2458
  94. tsp/dataloggers/test_files/logR_ULogC16-32_1.csv +106 -106
  95. tsp/dataloggers/test_files/logR_ULogC16-32_2.csv +100 -100
  96. tsp/dataloggers/test_files/mon_3_Ta_2010-08-18_2013-02-08.txt +21724 -21724
  97. tsp/dataloggers/test_files/rbr_001.dat +1133 -1133
  98. tsp/dataloggers/test_files/rbr_001.hex +1139 -1139
  99. tsp/dataloggers/test_files/rbr_001_no_comment.dat +1132 -1132
  100. tsp/dataloggers/test_files/rbr_001_no_comment.hex +1138 -1138
  101. tsp/dataloggers/test_files/rbr_002.dat +1179 -1179
  102. tsp/dataloggers/test_files/rbr_002.hex +1185 -1185
  103. tsp/dataloggers/test_files/rbr_003.hex +1292 -1292
  104. tsp/dataloggers/test_files/rbr_xl_001.DAT +1105 -1105
  105. tsp/dataloggers/test_files/rbr_xl_002.DAT +1126 -1126
  106. tsp/dataloggers/test_files/rbr_xl_003.DAT +4622 -4622
  107. tsp/dataloggers/test_files/rbr_xl_003.HEX +3587 -3587
  108. tsp/gtnp.py +148 -148
  109. tsp/labels.py +3 -3
  110. tsp/misc.py +90 -90
  111. tsp/physics.py +101 -101
  112. tsp/plots/static.py +388 -374
  113. tsp/readers.py +829 -548
  114. tsp/standardization/__init__.py +0 -0
  115. tsp/standardization/metadata.py +95 -0
  116. tsp/standardization/metadata_ref.py +0 -0
  117. tsp/standardization/validator.py +535 -0
  118. tsp/time.py +45 -45
  119. tsp/tspwarnings.py +27 -15
  120. tsp/utils.py +131 -101
  121. tsp/version.py +1 -1
  122. {tsp-1.8.1.dist-info → tsp-1.10.2.dist-info}/METADATA +95 -86
  123. tsp-1.10.2.dist-info/RECORD +132 -0
  124. {tsp-1.8.1.dist-info → tsp-1.10.2.dist-info}/licenses/LICENSE +674 -674
  125. {tsp-1.8.1.dist-info → tsp-1.10.2.dist-info}/top_level.txt +1 -0
  126. tsp-1.8.1.dist-info/RECORD +0 -94
  127. {tsp-1.8.1.dist-info → tsp-1.10.2.dist-info}/WHEEL +0 -0
tsp/readers.py CHANGED
@@ -1,548 +1,829 @@
1
- import datetime
2
- import numpy as np
3
- import pandas as pd
4
- import re
5
- import warnings
6
-
7
- try:
8
- import netCDF4 as nc
9
- except ModuleNotFoundError:
10
- warnings.warn("Missing netCDF4 library. Some functionality will be limited.")
11
-
12
- from pathlib import Path
13
- from typing import Union, Optional
14
-
15
- from tsp.dataloggers.Geoprecision import detect_geoprecision_type
16
- from tsp.dataloggers.HOBO import HOBO, HOBOProperties
17
- from tsp.dataloggers.logr import LogR, guessed_depths_ok
18
- from tsp.dataloggers.RBRXL800 import RBRXL800
19
- from tsp.dataloggers.RBRXR420 import RBRXR420
20
- import tsp.tspwarnings as tw
21
-
22
- from tsp.core import TSP, IndexedTSP
23
- from tsp.misc import _is_depth_column
24
- from tsp.gtnp import GtnpMetadata
25
-
26
-
27
- def read_classic(filepath: str, init_file: "Optional[str]"=None) -> TSP:
28
- """Read output from CLASSIC land surface model
29
-
30
- Depth values, if provided, represent the midpoint of the model cells.
31
-
32
- Parameters
33
- ----------
34
- filepath : str
35
- Path to an output file
36
- init_file : str
37
- Path to a classic init file. If provided, depth values will be calculated. Otherwise an :py:class:`~tsp.core.IndexedTSP` is returned
38
-
39
- Returns
40
- -------
41
- TSP
42
- An IndexedTSP. Use :py:meth:`~tsp.core.IndexedTSP.set_depths` to provide depth information if init_file is not provided.
43
- """
44
- try:
45
- nc
46
- except NameError:
47
- warnings.warn("netCDF4 library must be installed.")
48
-
49
- # tbaracc_d / tbaracc_m / tbaracc_y
50
- with nc.Dataset(filepath, 'r') as ncdf:
51
- lat = ncdf['lat'][:]
52
- lon = ncdf['lon'][:]
53
- temp = ncdf['tsl'][:] # t, z
54
-
55
- try:
56
- time = nc.num2date(ncdf['time'][:], ncdf['time'].units, ncdf['time'].calendar,
57
- only_use_cftime_datetimes=False,
58
- only_use_python_datetimes=True)
59
- except ValueError:
60
- cf_time = nc.num2date(ncdf['time'][:], ncdf['time'].units, ncdf['time'].calendar)
61
- time = np.array([datetime.datetime.fromisoformat(t.isoformat()) for t in cf_time])
62
-
63
- if init_file:
64
- with nc.Dataset(init_file, 'r') as init:
65
- delz = init["DELZ"][:]
66
- depths = np.round(np.cumsum(delz) - np.multiply(delz, 0.5), 7) # delz precision is lower so we get some very small offsets
67
-
68
- if len(lat) > 1:
69
- warnings.warn("Multiple points in file. Returning the first one found.")
70
- # TODO: return Ensemble if multiple points
71
- lat = lat[0]
72
- lon = lon[0]
73
- temp = temp[:,:,0,0]
74
- else:
75
- temp = temp[:,:,0,0]
76
-
77
- t = IndexedTSP(times=time, values=temp, latitude=lat, longitude=lon)
78
-
79
- if init_file:
80
- t.set_depths(depths)
81
-
82
- return t
83
-
84
-
85
- def read_csv(filepath: str,
86
- datecol: "Union[str, int]",
87
- datefmt: str = "%Y-%m-%d %H:%M:%S",
88
- depth_pattern: "Union[str, dict]" = r"^(-?[0-9\.]+)$",
89
- na_values:list = [],
90
- **kwargs) -> TSP:
91
- r"""Read an arbitrary CSV file
92
-
93
- Date and time must be in a single column, and the csv must be in the
94
- 'wide' data format (each depth is a separate column)
95
-
96
- Parameters
97
- ----------
98
- filepath : str
99
- Path to csv file
100
- datecol : Union[str, int]
101
- Either the numeric index (starting at 0) of date column (if int) or name of date column or regular expression (if str)
102
- datefmt : str, optional
103
- The format of the datetime values. Use `python strftime format codes <https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes>`_,
104
- by default ``"%Y-%m-%d %H:%M:%S"``
105
- depth_pattern : str or dict
106
- If string: A regular expression that matches the column names with depths. The regular expression must
107
- have a single capture group that extracts just the numeric part of the column header, by default r"^(-?[0-9\.]+)$".
108
- If column names were in the form ``"+/-1.0_m"`` (i.e. included 'm' to denote units), you could use the regular expression ``r"^(-?[0-9\.]+)_m$"``
109
- If a dictionary is passed, the keys must be the column names and the values are the depths. This is useful if the column names are not numeric.
110
- na_values : list, optional
111
- Additional strings to recognize as NA. Passed to pandas.read_csv, by default []
112
-
113
- Returns
114
- -------
115
- TSP
116
- A TSP
117
- """
118
- raw = pd.read_csv(filepath, na_values=na_values, **kwargs)
119
-
120
- if not datecol in raw.columns and isinstance(datecol, str):
121
- datecol = [re.search(datecol, c).group(1) for c in raw.columns if re.search(datecol, c)][0]
122
-
123
- if isinstance(datecol, int):
124
- datecol = raw.columns[datecol]
125
-
126
- time = pd.to_datetime(raw[datecol], format=datefmt).to_numpy()
127
-
128
- if isinstance(depth_pattern, str):
129
- depth = [re.search(depth_pattern, c).group(1) for c in raw.columns if _is_depth_column(c, depth_pattern)]
130
- depth_numeric = np.array([float(d) for d in depth])
131
-
132
- elif isinstance(depth_pattern, dict):
133
- depth = [c for c in raw.columns if c in depth_pattern.keys()]
134
- depth_numeric = [depth_pattern[c] for c in raw.columns if c in depth_pattern.keys()]
135
-
136
- else:
137
- raise ValueError("depth_pattern must be a string or dictionary")
138
-
139
- values = raw.loc[:, depth].to_numpy()
140
-
141
- t = TSP(time, depth_numeric, values)
142
-
143
- return t
144
-
145
-
146
- def read_geoprecision(filepath: str) -> IndexedTSP:
147
- """Read a Geoprecision datalogger export (text file)
148
-
149
- Reads GP5W- and FG2-style files from geoprecision.
150
-
151
- Parameters
152
- ----------
153
- filepath : str
154
- Path to file.
155
-
156
- Returns
157
- -------
158
- IndexedTSP
159
- An IndexedTSP
160
- """
161
- Reader = detect_geoprecision_type(filepath)
162
-
163
- if Reader is None:
164
- raise RuntimeError("Could not detect type of geoprecision file (GP5W or FG2 missing from header")
165
- reader = Reader()
166
-
167
- data = reader.read(filepath)
168
- t = IndexedTSP(times=data['TIME'].dt.to_pydatetime(),
169
- values=data.drop("TIME", axis=1).values)
170
-
171
- t.metadata = reader.META
172
- return t
173
-
174
-
175
- def read_geotop(file: str) -> TSP:
176
- """Read a GEOtop soil temperature output file
177
-
178
- Parameters
179
- ----------
180
- file : str
181
- Path to file.
182
-
183
- Returns
184
- -------
185
- TSP
186
- A TSP
187
-
188
- Description
189
- -----------
190
- Only the last run of the last simulation period is returned. This is because GEOtop outputs
191
- all runs of all simulation periods in the same file. This function will only return the last
192
- run of the last simulation period.
193
- """
194
- with warnings.catch_warnings():
195
- warnings.filterwarnings("ignore", category=tw.DuplicateTimesWarning)
196
-
197
- t = read_csv(file,
198
- na_values=[-9999.0],
199
- datecol="^(Date.*)",
200
- datefmt=r"%d/%m/%Y %H:%M",
201
- depth_pattern=r"^(-?[0-9\.]+\s*)$")
202
-
203
- t._depths *= 0.001 # Convert to [m]
204
-
205
- # Only use last simulation period
206
- # TODO: this could be improved
207
- raw = pd.read_csv(file)
208
-
209
- is_max_sim_period = raw['Simulation_Period'] == max( raw['Simulation_Period'])
210
- is_last_run_in_max_sim_period = raw['Run'] = raw['Run'][is_max_sim_period].max()
211
- last_run = np.logical_and(is_max_sim_period, is_last_run_in_max_sim_period)
212
-
213
- last = TSP(times = t.times[last_run],
214
- depths = t.depths,
215
- values = t.values[last_run, :],
216
- metadata={"Simulation_Period": max(raw['Simulation_Period']),
217
- "Run": max( raw['Run'] )
218
- }
219
- )
220
-
221
- return last
222
-
223
-
224
-
225
- def read_gtnp(filename: str,
226
- metadata_filepath=None,
227
- autodetect_metadata=True) -> TSP:
228
- """Read test file from GTN-P database export
229
-
230
- Parameters
231
- ----------
232
- filename : str
233
- Path to file.
234
- metadata_file : str, optional
235
- Path to GTN-P metadata file (), by default None
236
-
237
- Returns
238
- -------
239
- TSP
240
- A TSP
241
- """
242
-
243
- t = read_csv(filename,
244
- na_values=[-999.0],
245
- datecol="Date/Depth",
246
- datefmt="%Y-%m-%d %H:%M:%S",
247
- depth_pattern=r"^(-?[0-9\.]+)$")
248
-
249
- # try to automatically detect metadata file
250
- if metadata_filepath is None and autodetect_metadata:
251
- partial_name = Path(filename).stem
252
-
253
- while partial_name:
254
- test_metadata = Path(Path(filename).parent, partial_name).with_suffix(".metadata.txt")
255
-
256
- if test_metadata.is_file():
257
- metadata_filepath = test_metadata
258
- break
259
- else:
260
- partial_name = partial_name[:-1]
261
-
262
- if metadata_filepath is not None:
263
- try:
264
- meta = GtnpMetadata(metadata_filepath)
265
- except Exception as e:
266
- warnings.warn(f"Failed to read metadata file: {e}")
267
- return t
268
- t.metadata['raw'] = meta.raw
269
- t.metadata['parsed'] = meta.parsed
270
-
271
- # set time zone
272
- tz = meta.get_timezone()
273
- if tz:
274
- t.set_utc_offset(int(tz.utcoffset(datetime.datetime.now()).total_seconds()))
275
-
276
- # set location
277
- t.latitude = meta.get_latitude() if meta.get_latitude() else None
278
- t.longitude = meta.get_longitude() if meta.get_longitude() else None
279
-
280
- return t
281
-
282
-
283
- def read_gtpem(file: str) -> "list[TSP]":
284
- output = list()
285
- try:
286
- with nc.Dataset(file) as ncdf:
287
- n_sim = len(ncdf['geotop']['sitename'][:])
288
- time = 1
289
- for i, name in enumerate(ncdf['geotop']['sitename'][:]):
290
- pass
291
- #t = TSP()
292
- except NameError:
293
- warnings.warn("netCDF4 library must be installed.")
294
-
295
- return output
296
-
297
-
298
- def read_hoboware(filepath: str, hoboware_config: Optional[HOBOProperties]=None) -> IndexedTSP:
299
- """Read Onset HoboWare datalogger exports
300
-
301
- Parameters
302
- ----------
303
- filepath : str
304
- Path to a file
305
- hoboware_config : HOBOProperties, optional
306
- A HOBOProperties object with information about how the file is configured. If not
307
- provided, the configuration will be automatically detected if possible, by default None
308
-
309
- Returns
310
- -------
311
- IndexedTSP
312
- An IndexedTSP. Use the `set_depths` method to provide depth information
313
- """
314
- reader = HOBO(properties=hoboware_config)
315
- data = reader.read(filepath)
316
-
317
- t = IndexedTSP(times=data['TIME'],
318
- values=data.drop("TIME", axis=1).values)
319
-
320
- return t
321
-
322
-
323
- def read_logr(filepath: str) -> "Union[IndexedTSP,TSP]":
324
- """Read a LogR datalogger export (text file)
325
-
326
- Reads LogR ULogC16-32 files.
327
-
328
- Parameters
329
- ----------
330
- filepath : str
331
- Path to file.
332
-
333
- Returns
334
- -------
335
- IndexedTSP, TSP
336
- An IndexedTSP or TSP, depending on whether the depth labels are sensible
337
- """
338
- r = LogR()
339
- data = r.read(filepath)
340
-
341
- times = data['TIME'].dt.to_pydatetime()
342
- channels = pd.Series(data.columns).str.match("^CH")
343
- values = data.loc[:, channels.to_numpy()]
344
-
345
- if guessed_depths_ok(r.META['guessed_depths'], sum(channels)):
346
- t = TSP(times=times,
347
- depths=r.META['guessed_depths'][-sum(channels):],
348
- values=values.values,)
349
-
350
- else:
351
- warnings.warn(f"Could not convert all channel labels into numeric depths."
352
- "Use the set_depths() method to specify observation depths."
353
- "Guessed depths can be accessed from .metadata['guessed_depths'].")
354
-
355
- t = IndexedTSP(times=times,
356
- values=values.values,
357
- metadata = r.META)
358
-
359
- return t
360
-
361
-
362
- def read_netcdf(file:str, standard_name='temperature_in_ground') -> TSP:
363
- """Read a CF-compliant netCDF file
364
-
365
- Parameters
366
- ----------
367
- file : str
368
- Path to netCDF file.
369
- standard_name : str, optional
370
- The standard name of the data variable, by default 'temperature_in_ground'.
371
- 'soil_temperature' is also common.
372
-
373
- The file must represent data from a single location
374
- A single time variable (with attribute 'axis=T') must be present.
375
- A single depth variable (with attribute 'axis=Z') must be present.
376
- A single data variable (with 'temperature_in_ground' or '' 'standard name' either ) must be present.
377
-
378
- """
379
- try:
380
- with nc.Dataset(file) as ncdf:
381
- globals = {k: v for k, v in ncdf.__dict__.items() if not k.startswith("_")}
382
-
383
- # Checks - global attributes
384
- if not globals.get("featureType", "").lower() == "timeseriesprofile":
385
- warnings.warn("featureType is not a time series profile")
386
-
387
- # Checks - data
388
- time = ncdf.get_variables_by_attributes(axis='T')
389
- if len(time) == 0:
390
- raise ValueError("No time variable (with attribute 'axis=T') found")
391
- if len(time) > 1:
392
- raise ValueError("More than one time variable (with attribute 'axis=T') found")
393
-
394
- if not 'units' in time[0].ncattrs():
395
- raise ValueError("Time variable does not have a 'units' attribute")
396
- if not 'calendar' in time[0].ncattrs():
397
- raise ValueError("Time variable does not have a 'calendar' attribute")
398
-
399
- depth = ncdf.get_variables_by_attributes(axis='Z')
400
- if len(depth) == 0:
401
- raise ValueError("No depth variable (with attribute 'axis=Z') found")
402
- if len(depth) > 1:
403
- raise ValueError("More than one depth variable (with attribute 'axis=Z') found")
404
-
405
- temperature = ncdf.get_variables_by_attributes(standard_name=lambda x: x in ['temperature_in_ground', 'soil_temperature'])
406
- if len(temperature) == 0:
407
- raise ValueError("No temperature variable (with standard name 'temperature_in_ground' or 'soil_temperature') found")
408
- if len(temperature) > 1:
409
- raise ValueError("More than one temperature variable (with standard name 'temperature_in_ground' or 'soil_temperature') found")
410
-
411
- # Get data
412
- times = nc.num2date(time[0][:],
413
- units=time[0].units,
414
- calendar=time[0].calendar,
415
- only_use_cftime_datetimes=False,
416
- only_use_python_datetimes=True)
417
- depths = np.round(np.array(depth[0][:], dtype='float64'), 5)
418
- values = temperature[0][:]
419
-
420
- except NameError:
421
- warnings.warn("netCDF4 library must be installed.")
422
- return None
423
-
424
- except ValueError as e:
425
- warnings.warn(f"File does not meet formatting requirements: ({e})")
426
- return None
427
-
428
- t = TSP(times=times, depths=depths, values=values, metadata=globals)
429
- return t
430
-
431
-
432
- def read_ntgs(filename: str) -> TSP:
433
- """Read a file from the NTGS permafrost database
434
-
435
- Parameters
436
- ----------
437
- filename : str
438
- Path to file.
439
-
440
- Returns
441
- -------
442
- TSP
443
- A TSP
444
- """
445
- if Path(filename).suffix == ".csv":
446
- try:
447
- raw = pd.read_csv(filename,
448
- keep_default_na=False,na_values=[''],
449
- parse_dates={"time": ["date_YYYY-MM-DD","time_HH:MM:SS"]})
450
- except IndexError:
451
- raise IndexError("There are insufficient columns, the file format is invalid.")
452
- elif Path(filename).suffix in [".xls", ".xlsx"]:
453
- raise NotImplementedError("Convert to CSV")
454
- #try:
455
- # raw = pd.read_excel(filename, keep_default_na=False, parse_dates={"time": [4,5]}, date_parser=self.getISOFormat)
456
- #except IndexError:
457
- # raise IndexError("There are insufficient columns, the file format is invalid.")
458
- else:
459
- raise TypeError("Unsupported file extension.")
460
-
461
- metadata = {
462
- 'project_name': raw['project_name'].values[0],
463
- 'site_id': raw['site_id'].values[0],
464
- 'latitude': raw['latitude'].values[0],
465
- 'longitude': raw['longitude'].values[0]
466
- }
467
- match_depths = [c for c in [re.search(r"(-?[0-9\.]+)_m$", C) for C in raw.columns] if c]
468
- values = raw.loc[:, [d.group(0) for d in match_depths]].values
469
- times = raw['time'].dt.to_pydatetime()
470
-
471
- t = TSP(times=times,
472
- depths=[float(d.group(1)) for d in match_depths],
473
- values=values,
474
- latitude=raw['latitude'].values[0],
475
- longitude=raw['longitude'].values[0],
476
- site_id=raw['site_id'].values[0],
477
- metadata=metadata)
478
-
479
- return t
480
-
481
-
482
- def read_rbr(file_path: str) -> IndexedTSP:
483
- """
484
-
485
- Parameters
486
- ----------
487
- filepath
488
-
489
- Returns
490
- -------
491
-
492
- """
493
- file_extention = Path(file_path).suffix.lower()
494
- if file_extention in [".dat", ".hex"]:
495
- with open(file_path, "r") as f:
496
- first_line = f.readline()
497
- model = first_line.split()[1]
498
- if model == "XL-800":
499
- r = RBRXL800()
500
- elif model in ["XR-420", "XR-420-T8"]:
501
- r = RBRXR420()
502
- else:
503
- raise ValueError(f"logger model {model} unsupported")
504
- data = r.read(file_path)
505
- elif file_extention in [".xls", ".xlsx", ".rsk"]:
506
- r = RBRXR420()
507
- data = r.read(file_path)
508
- else:
509
- raise IOError("File is not .dat, .hex, .xls, .xlsx, or .rsk")
510
-
511
- times = data['TIME'].dt.to_pydatetime()
512
- channels = pd.Series(data.columns).str.match("^ch")
513
- values = data.loc[:, channels.to_numpy()]
514
-
515
- t = IndexedTSP(times=times, values=values.values, metadata=r.META)
516
- if "utc offset" in list(r.META.keys()):
517
- t.set_utc_offset(r.META["utc offset"])
518
-
519
- return t
520
-
521
-
522
- def read_permos(filepath:str) -> TSP:
523
- """Read file from PERMOS database export
524
-
525
- Parameters
526
- ----------
527
- filename : str
528
- Path to file.
529
-
530
- Returns
531
- -------
532
- TSP
533
- A TSP
534
-
535
- Used for data obtained from PERMOS (permos.ch/data-portal/permafrost-temperature-and-active-layer)
536
- """
537
- try:
538
- raw = pd.read_csv(filepath,
539
- index_col=0,
540
- parse_dates=True)
541
- except IndexError:
542
- raise IndexError("There are insufficient columns, the file format is invalid.")
543
-
544
- t = TSP(times=raw.index,
545
- depths=[float(C) for C in raw.columns],
546
- values=raw.values)
547
-
548
- return t
1
+ import datetime
2
+ import numpy as np
3
+ import pandas as pd
4
+ import datetime as dt
5
+ import re
6
+ import warnings
7
+
8
+ try:
9
+ import netCDF4 as nc
10
+ except ModuleNotFoundError:
11
+ warnings.warn("Missing netCDF4 library. Some functionality will be limited.")
12
+
13
+ from pathlib import Path
14
+ from typing import Union, Optional, Callable
15
+
16
+ from tsp.dataloggers.Geoprecision import detect_geoprecision_type
17
+ from tsp.dataloggers.HOBO import HOBO, HOBOProperties
18
+ from tsp.dataloggers.logr import LogR, guessed_depths_ok
19
+ from tsp.dataloggers.RBRXL800 import RBRXL800
20
+ from tsp.dataloggers.RBRXR420 import RBRXR420
21
+ from tsp.dataloggers.Vemco import Vemco
22
+ import tsp.tspwarnings as tw
23
+
24
+ from tsp.core import TSP, IndexedTSP
25
+ from tsp.misc import _is_depth_column
26
+ from tsp.gtnp import GtnpMetadata
27
+
28
+
29
+ def read_classic(filepath: str, init_file: "Optional[str]"=None) -> TSP:
30
+ """Read output from CLASSIC land surface model
31
+
32
+ Depth values, if provided, represent the midpoint of the model cells.
33
+
34
+ Parameters
35
+ ----------
36
+ filepath : str
37
+ Path to an output file
38
+ init_file : str
39
+ Path to a classic init file. If provided, depth values will be calculated. Otherwise an :py:class:`~tsp.core.IndexedTSP` is returned
40
+
41
+ Returns
42
+ -------
43
+ TSP
44
+ An IndexedTSP. Use :py:meth:`~tsp.core.IndexedTSP.set_depths` to provide depth information if init_file is not provided.
45
+ """
46
+ try:
47
+ nc
48
+ except NameError:
49
+ warnings.warn("netCDF4 library must be installed.")
50
+
51
+ # tbaracc_d / tbaracc_m / tbaracc_y
52
+ with nc.Dataset(filepath, 'r') as ncdf:
53
+ lat = ncdf['lat'][:]
54
+ lon = ncdf['lon'][:]
55
+ temp = ncdf['tsl'][:] # t, z
56
+
57
+ try:
58
+ time = nc.num2date(ncdf['time'][:], ncdf['time'].units, ncdf['time'].calendar,
59
+ only_use_cftime_datetimes=False,
60
+ only_use_python_datetimes=True)
61
+ except ValueError:
62
+ cf_time = nc.num2date(ncdf['time'][:], ncdf['time'].units, ncdf['time'].calendar)
63
+ time = np.array([datetime.datetime.fromisoformat(t.isoformat()) for t in cf_time])
64
+
65
+ if init_file:
66
+ with nc.Dataset(init_file, 'r') as init:
67
+ delz = init["DELZ"][:]
68
+ depths = np.round(np.cumsum(delz) - np.multiply(delz, 0.5), 7) # delz precision is lower so we get some very small offsets
69
+
70
+ if len(lat) > 1:
71
+ warnings.warn("Multiple points in file. Returning the first one found.")
72
+ # TODO: return Ensemble if multiple points
73
+ lat = lat[0]
74
+ lon = lon[0]
75
+ temp = temp[:,:,0,0]
76
+ else:
77
+ temp = temp[:,:,0,0]
78
+
79
+ t = IndexedTSP(times=time,
80
+ values=temp,
81
+ latitude=lat,
82
+ longitude=lon,
83
+ metadata={"source_file": filepath})
84
+
85
+ if init_file:
86
+ t.set_depths(depths)
87
+
88
+ return t
89
+
90
+
91
+ def read_csv(filepath: str,
92
+ datecol: "Union[str, int]",
93
+ datefmt: str = "%Y-%m-%d %H:%M:%S",
94
+ depth_pattern: "Union[str, dict]" = r"^(-?[0-9\.]+)$",
95
+ na_values:list = [],
96
+ **kwargs) -> TSP:
97
+ r"""Read an arbitrary CSV file
98
+
99
+ Date and time must be in a single column, and the csv must be in the
100
+ 'wide' data format (each depth is a separate column)
101
+
102
+ Parameters
103
+ ----------
104
+ filepath : str
105
+ Path to csv file
106
+ datecol : Union[str, int]
107
+ Either the numeric index (starting at 0) of date column (if int) or name of date column or regular expression (if str)
108
+ datefmt : str, optional
109
+ The format of the datetime values. Use `python strftime format codes <https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes>`_,
110
+ by default ``"%Y-%m-%d %H:%M:%S"``
111
+ depth_pattern : str or dict
112
+ If string: A regular expression that matches the column names with depths. The regular expression must
113
+ have a single capture group that extracts just the numeric part of the column header, by default r"^(-?[0-9\.]+)$".
114
+ If column names were in the form ``"+/-1.0_m"`` (i.e. included 'm' to denote units), you could use the regular expression ``r"^(-?[0-9\.]+)_m$"``
115
+ If a dictionary is passed, the keys must be the column names and the values are the depths. This is useful if the column names are not numeric.
116
+ na_values : list, optional
117
+ Additional strings to recognize as NA. Passed to pandas.read_csv, by default []
118
+
119
+ Returns
120
+ -------
121
+ TSP
122
+ A TSP
123
+ """
124
+ raw = pd.read_csv(filepath, na_values=na_values, **kwargs)
125
+
126
+ if not datecol in raw.columns and isinstance(datecol, str):
127
+ datecol = [re.search(datecol, c).group(1) for c in raw.columns if re.search(datecol, c)][0]
128
+
129
+ if isinstance(datecol, int):
130
+ datecol = raw.columns[datecol]
131
+
132
+ time = pd.to_datetime(raw[datecol], format=datefmt).to_numpy()
133
+
134
+ if isinstance(depth_pattern, str):
135
+ depth = [re.search(depth_pattern, c).group(1) for c in raw.columns if _is_depth_column(c, depth_pattern)]
136
+ depth_numeric = np.array([float(d) for d in depth])
137
+
138
+ elif isinstance(depth_pattern, dict):
139
+ depth = [c for c in raw.columns if c in depth_pattern.keys()]
140
+ depth_numeric = [depth_pattern[c] for c in raw.columns if c in depth_pattern.keys()]
141
+
142
+ else:
143
+ raise ValueError("depth_pattern must be a string or dictionary")
144
+
145
+ values = raw.loc[:, depth].to_numpy()
146
+
147
+ t = TSP(time,
148
+ depth_numeric,
149
+ values,
150
+ metadata={"source_file": filepath})
151
+
152
+ return t
153
+
154
+
155
+ def read_geoprecision(filepath: str) -> IndexedTSP:
156
+ """Read a Geoprecision datalogger export (text file)
157
+
158
+ Reads GP5W- and FG2-style files from geoprecision.
159
+
160
+ Parameters
161
+ ----------
162
+ filepath : str
163
+ Path to file.
164
+
165
+ Returns
166
+ -------
167
+ IndexedTSP
168
+ An IndexedTSP
169
+ """
170
+ Reader = detect_geoprecision_type(filepath)
171
+
172
+ if Reader is None:
173
+ raise RuntimeError("Could not detect type of geoprecision file (GP5W or FG2 missing from header")
174
+ reader = Reader()
175
+
176
+ data = reader.read(filepath)
177
+ metadata = reader.META
178
+ metadata['_source_file'] = filepath
179
+ t = IndexedTSP(times=np.array(data['TIME'].dt.to_pydatetime()),
180
+ values=data.drop("TIME", axis=1).values,
181
+ metadata=metadata)
182
+
183
+ return t
184
+
185
+
186
+ def read_geotop(file: str) -> TSP:
187
+ """Read a GEOtop soil temperature output file
188
+
189
+ Parameters
190
+ ----------
191
+ file : str
192
+ Path to file.
193
+
194
+ Returns
195
+ -------
196
+ TSP
197
+ A TSP
198
+
199
+ Description
200
+ -----------
201
+ Only the last run of the last simulation period is returned. This is because GEOtop outputs
202
+ all runs of all simulation periods in the same file. This function will only return the last
203
+ run of the last simulation period.
204
+ """
205
+ with warnings.catch_warnings():
206
+ warnings.filterwarnings("ignore", category=tw.DuplicateTimesWarning)
207
+
208
+ t = read_csv(file,
209
+ na_values=[-9999.0],
210
+ datecol="^(Date.*)",
211
+ datefmt=r"%d/%m/%Y %H:%M",
212
+ depth_pattern=r"^(-?[0-9\.]+\s*)$")
213
+
214
+ t._depths *= 0.001 # Convert to [m]
215
+
216
+ # Only use last simulation period
217
+ # TODO: this could be improved
218
+ raw = pd.read_csv(file)
219
+
220
+ is_max_sim_period = raw['Simulation_Period'] == max( raw['Simulation_Period'])
221
+ is_last_run_in_max_sim_period = raw['Run'] = raw['Run'][is_max_sim_period].max()
222
+ last_run = np.logical_and(is_max_sim_period, is_last_run_in_max_sim_period)
223
+
224
+ last = TSP(times = t.times[last_run],
225
+ depths = t.depths,
226
+ values = t.values[last_run, :],
227
+ metadata={"_source_file": file,
228
+ "Simulation_Period": max(raw['Simulation_Period']),
229
+ "Run": max( raw['Run'] )
230
+ }
231
+ )
232
+
233
+ return last
234
+
235
+
236
+
237
+ def read_gtnp(filename: str,
238
+ metadata_filepath=None,
239
+ autodetect_metadata=True) -> TSP:
240
+ """Read test file from GTN-P database export
241
+
242
+ Parameters
243
+ ----------
244
+ filename : str
245
+ Path to file.
246
+ metadata_file : str, optional
247
+ Path to GTN-P metadata file (), by default None
248
+
249
+ Returns
250
+ -------
251
+ TSP
252
+ A TSP
253
+ """
254
+
255
+ t = read_csv(filename,
256
+ na_values=[-999.0],
257
+ datecol="Date/Depth",
258
+ datefmt="%Y-%m-%d %H:%M:%S",
259
+ depth_pattern=r"^(-?[0-9\.]+)$")
260
+
261
+ # try to automatically detect metadata file
262
+ if metadata_filepath is None and autodetect_metadata:
263
+ partial_name = Path(filename).stem
264
+
265
+ while partial_name:
266
+ test_metadata = Path(Path(filename).parent, partial_name).with_suffix(".metadata.txt")
267
+
268
+ if test_metadata.is_file():
269
+ metadata_filepath = test_metadata
270
+ break
271
+ else:
272
+ partial_name = partial_name[:-1]
273
+
274
+ if metadata_filepath is not None:
275
+ try:
276
+ meta = GtnpMetadata(metadata_filepath)
277
+ except Exception as e:
278
+ warnings.warn(f"Failed to read metadata file: {e}")
279
+ return t
280
+ t.metadata['raw'] = meta.raw
281
+ t.metadata['parsed'] = meta.parsed
282
+
283
+ # set time zone
284
+ tz = meta.get_timezone()
285
+ if tz:
286
+ t.set_utc_offset(int(tz.utcoffset(datetime.datetime.now()).total_seconds()))
287
+
288
+ # set location
289
+ t.latitude = meta.get_latitude() if meta.get_latitude() else None
290
+ t.longitude = meta.get_longitude() if meta.get_longitude() else None
291
+
292
+ t.metadata['_source_file'] = filename
293
+
294
+ return t
295
+
296
+
297
+ def read_gtpem(file: str) -> "list[TSP]":
298
+ output = list()
299
+ try:
300
+ with nc.Dataset(file) as ncdf:
301
+ n_sim = len(ncdf['geotop']['sitename'][:])
302
+ time = 1
303
+ for i, name in enumerate(ncdf['geotop']['sitename'][:]):
304
+ pass
305
+ #t = TSP()
306
+ except NameError:
307
+ warnings.warn("netCDF4 library must be installed.")
308
+
309
+ return output
310
+
311
+
312
+ def read_hoboware(filepath: str, hoboware_config: Optional[HOBOProperties]=None) -> IndexedTSP:
313
+ """Read Onset HoboWare datalogger exports
314
+
315
+ Parameters
316
+ ----------
317
+ filepath : str
318
+ Path to a file
319
+ hoboware_config : HOBOProperties, optional
320
+ A HOBOProperties object with information about how the file is configured. If not
321
+ provided, the configuration will be automatically detected if possible, by default None
322
+
323
+ Returns
324
+ -------
325
+ IndexedTSP
326
+ An IndexedTSP. Use the `set_depths` method to provide depth information
327
+ """
328
+ reader = HOBO(properties=hoboware_config)
329
+ data = reader.read(filepath)
330
+
331
+ metadata = reader.META
332
+ metadata['_source_file'] = filepath
333
+
334
+ t = IndexedTSP(times=data['TIME'],
335
+ values=data.drop("TIME", axis=1).values,
336
+ metadata=metadata)
337
+
338
+ return t
339
+
340
+
341
+ def read_logr(filepath: str, cfg_txt: str = None) -> "Union[IndexedTSP,TSP]":
342
+ """Read a LogR datalogger export (text file)
343
+
344
+ Reads LogR ULogC16-32 files.
345
+
346
+ Parameters
347
+ ----------
348
+ filepath : str
349
+ Path to file.
350
+ cfg_txt : str, optional
351
+ Path of the config text file containing of the logger. Required if raw is True.
352
+
353
+ Returns
354
+ -------
355
+ IndexedTSP, TSP
356
+ An IndexedTSP or TSP, depending on whether the depth labels are sensible
357
+ """
358
+ r = LogR()
359
+ data = r.read(file=filepath, cfg_txt=cfg_txt)
360
+ times = np.array(np.array(data['TIME'].dt.to_pydatetime()))
361
+ channels = pd.Series(data.columns).str.match("^CH")
362
+ values = data.loc[:, channels.to_numpy()]
363
+ metadata = r.META
364
+ metadata['_source_file'] = filepath
365
+
366
+ if guessed_depths_ok(metadata['guessed_depths'], sum(channels)):
367
+ t = TSP(times=times,
368
+ depths=metadata['guessed_depths'][-sum(channels):],
369
+ values=values.values,)
370
+
371
+ else:
372
+ warnings.warn(f"Could not convert all channel labels into numeric depths."
373
+ "Use the set_depths() method to specify observation depths."
374
+ "Guessed depths can be accessed from .metadata['guessed_depths'].")
375
+
376
+ t = IndexedTSP(times=times,
377
+ values=values.values,
378
+ metadata = metadata)
379
+
380
+ return t
381
+
382
+
383
+ def read_netcdf(file:str, standard_name='temperature_in_ground') -> TSP:
384
+ """Read a CF-compliant netCDF file
385
+
386
+ Parameters
387
+ ----------
388
+ file : str
389
+ Path to netCDF file.
390
+ standard_name : str, optional
391
+ The standard name of the data variable, by default 'temperature_in_ground'.
392
+ 'soil_temperature' is also common.
393
+
394
+ The file must represent data from a single location
395
+ A single time variable (with attribute 'axis=T') must be present.
396
+ A single depth variable (with attribute 'axis=Z') must be present.
397
+ A single data variable (with 'temperature_in_ground' or '' 'standard name' either ) must be present.
398
+
399
+ """
400
+ try:
401
+ with nc.Dataset(file) as ncdf:
402
+ globals = {k: v for k, v in ncdf.__dict__.items() if not k.startswith("_")}
403
+
404
+ # Checks - global attributes
405
+ if not globals.get("featureType", "").lower() == "timeseriesprofile":
406
+ warnings.warn("featureType is not a time series profile")
407
+
408
+ # Checks - data
409
+ time = ncdf.get_variables_by_attributes(axis='T')
410
+ if len(time) == 0:
411
+ raise ValueError("No time variable (with attribute 'axis=T') found")
412
+ if len(time) > 1:
413
+ raise ValueError("More than one time variable (with attribute 'axis=T') found")
414
+
415
+ if not 'units' in time[0].ncattrs():
416
+ raise ValueError("Time variable does not have a 'units' attribute")
417
+ if not 'calendar' in time[0].ncattrs():
418
+ raise ValueError("Time variable does not have a 'calendar' attribute")
419
+
420
+ depth = ncdf.get_variables_by_attributes(axis='Z')
421
+ if len(depth) == 0:
422
+ raise ValueError("No depth variable (with attribute 'axis=Z') found")
423
+ if len(depth) > 1:
424
+ raise ValueError("More than one depth variable (with attribute 'axis=Z') found")
425
+
426
+ temperature = ncdf.get_variables_by_attributes(standard_name=lambda x: x in ['temperature_in_ground', 'soil_temperature'])
427
+ if len(temperature) == 0:
428
+ raise ValueError("No temperature variable (with standard name 'temperature_in_ground' or 'soil_temperature') found")
429
+ if len(temperature) > 1:
430
+ raise ValueError("More than one temperature variable (with standard name 'temperature_in_ground' or 'soil_temperature') found")
431
+
432
+ # Get data
433
+ times = nc.num2date(time[0][:],
434
+ units=time[0].units,
435
+ calendar=time[0].calendar,
436
+ only_use_cftime_datetimes=False,
437
+ only_use_python_datetimes=True)
438
+ depths = np.round(np.array(depth[0][:], dtype='float64'), 5)
439
+ values = temperature[0][:]
440
+
441
+ except NameError:
442
+ warnings.warn("netCDF4 library must be installed.")
443
+ return None
444
+
445
+ except ValueError as e:
446
+ warnings.warn(f"File does not meet formatting requirements: ({e})")
447
+ return None
448
+
449
+ metadata = {"CF":globals,
450
+ "_source_file": file}
451
+
452
+ t = TSP(times=times, depths=depths, values=values, metadata=metadata)
453
+
454
+ return t
455
+
456
+
457
+ def read_ntgs_gtr(filename: str) -> TSP:
458
+ """Read a file from the NTGS permafrost ground temperature report
459
+
460
+ Parameters
461
+ ----------
462
+ filename : str
463
+ Path to file.
464
+
465
+ Returns
466
+ -------
467
+ TSP
468
+ A TSP
469
+ """
470
+ if Path(filename).suffix == ".csv":
471
+ try:
472
+ raw = pd.read_csv(filename,
473
+ keep_default_na=False,na_values=[''],
474
+ parse_dates={"time": ["date_YYYY-MM-DD","time_HH:MM:SS"]})
475
+ except IndexError:
476
+ raise IndexError("There are insufficient columns, the file format is invalid.")
477
+ elif Path(filename).suffix in [".xls", ".xlsx"]:
478
+ try:
479
+ raw = pd.read_excel(filename,
480
+ sheet_name=1, parse_dates=False)
481
+ # Avoid any excel date nonsense
482
+ safe_date = raw.pop('date_YYYY-MM-DD').astype(str).str.extract(r"([0-9]{4}-[0-9]{2}-[0-9]{2})")
483
+ safe_time = raw.pop('time_HH:MM:SS').astype(str).str.extract(r"([0-9]{2}:[0-9]{2}:[0-9]{2})")
484
+ raw.insert(0, 'time', safe_date[0] + " " + safe_time[0])
485
+ raw['time'] = pd.to_datetime(raw['time'], format="%Y-%m-%d %H:%M:%S")
486
+ except IndexError:
487
+ raise IndexError("There are insufficient columns, the file format is invalid.")
488
+ else:
489
+ raise TypeError("Unsupported file extension.")
490
+
491
+ metadata = {
492
+ 'project_name': raw['project_name'].values[0],
493
+ '_site_id': raw['site_id'].values[0],
494
+ '_latitude': raw['latitude'].values[0],
495
+ '_longitude': raw['longitude'].values[0],
496
+ '_source_file': filename
497
+ }
498
+ match_depths = [c for c in [re.search(r"(-?[0-9\.]+)_m$", C) for C in raw.columns] if c]
499
+ values = raw.loc[:, [d.group(0) for d in match_depths]].values
500
+ times = np.array(raw['time'].dt.to_pydatetime())
501
+
502
+ t = TSP(times=times,
503
+ depths=[float(d.group(1)) for d in match_depths],
504
+ values=values,
505
+ latitude=raw['latitude'].values[0],
506
+ longitude=raw['longitude'].values[0],
507
+ site_id=raw['site_id'].values[0],
508
+ metadata=metadata)
509
+
510
+ return t
511
+
512
+
513
+ def read_ntgs_db(filename:str) -> dict[str, TSP]:
514
+ """Read a file from the NTGS permafrost database export
515
+
516
+ Parameters
517
+ ----------
518
+ filename : str
519
+ Path to file.
520
+
521
+ Returns
522
+ -------
523
+ dict[str, TSP]
524
+ A dictionary of TSPs, keyed by SITE_ID
525
+ """
526
+ df = pd.read_csv(filename, parse_dates=['MEASUREMENT_DATETIME'])
527
+ grouped = df.groupby("SITE_ID")
528
+ wide_dict = {name:__parse_ntgs_db_df(data, site_id=name) for name, data in grouped}
529
+
530
+ for name, tsp_obj in wide_dict.items():
531
+ tsp_obj.metadata['_source_file'] = filename
532
+ tsp_obj.metadata['_site_id'] = name
533
+
534
+ return wide_dict
535
+
536
+
537
+ def read_ntgs_db_single(filename:str,
538
+ select = None,
539
+ duplicate_depths='mean') -> TSP:
540
+ """Read a file from the NTGS permafrost database export with a single TSP output
541
+ Parameters
542
+ ----------
543
+ filename : str
544
+ Path to file.
545
+ select : str, int, optional
546
+ How to handle multiple SITE_IDs in the file. If an integer, it is treated as the index of the SITE_ID to use (0-based).
547
+ If a string, treat it as the site ID to use. If None, an error is raised if multiple SITE_IDs are found.
548
+ duplicate_depths : str, optional
549
+ How to handle duplicate depth measurements. Options are 'mean' (default), 'error'
550
+ Returns
551
+ -------
552
+ TSP
553
+ A TSP
554
+ """
555
+ df = pd.read_csv(filename)
556
+
557
+ if len(df['SITE_ID'].unique()) > 1 and select is None:
558
+ raise ValueError("Multiple SITE_IDs found in file.")
559
+ elif len(df['SITE_ID'].unique()) > 1 and isinstance(select, int):
560
+ df = df[df['SITE_ID'] == df['SITE_ID'].unique()[select]]
561
+ elif len(df['SITE_ID'].unique()) > 1 and isinstance(select, str):
562
+ df = df[df['SITE_ID'] == select]
563
+
564
+ metadata = {'_source_file': filename,
565
+ '_site_id': df['SITE_ID'].unique()[0]}
566
+
567
+ t = __parse_ntgs_db_df(df, duplicate_depths=duplicate_depths, site_id=metadata['_site_id'])
568
+ t.metadata.update(metadata)
569
+ return t
570
+
571
+
572
+ def __parse_ntgs_db_df(df:pd.DataFrame, site_id=None, duplicate_depths='mean') -> TSP:
573
+ wide = df.pivot_table(index='MEASUREMENT_DATETIME',
574
+ columns='DEPTH_M',
575
+ values='TEMPERATURE_C',
576
+ aggfunc=duplicate_depths).reset_index()
577
+
578
+ times = wide.pop('MEASUREMENT_DATETIME').to_numpy()
579
+ depths = wide.columns.to_numpy().astype(float)
580
+ values = wide.to_numpy()
581
+
582
+ t = TSP(times=times,
583
+ depths=depths,
584
+ values=values,
585
+ site_id=site_id)
586
+
587
+ return t
588
+
589
+
590
+ def read_ntgs(filename: str, allow_multiple_sites=False) -> TSP | dict[str, TSP]:
591
+ """Read a NTGS file.
592
+
593
+ Parameters
594
+ ----------
595
+ filename : str
596
+ Path to file.
597
+
598
+ Returns
599
+ -------
600
+ TSP | dict[str, TSP]
601
+ A TSP or a dictionary of TSPs with SITE_ID as keys if multiple SITE_IDs are found and `allow_multiple_sites` is True.
602
+
603
+ Description
604
+ -----------
605
+ Attempts to read the file as a ground temperature report file first. If that fails, attempts to read
606
+ it as a database export. If multiple SITE_IDs are found in the database export,
607
+ a dictionary of TSPs is returned if `allow_multiple_sites` is True.
608
+ """
609
+ try:
610
+ return read_ntgs_gtr(filename)
611
+ except Exception:
612
+ dict_t = read_ntgs_db(filename)
613
+
614
+ if len(dict_t.keys()) == 1:
615
+ return list(dict_t.values())[0]
616
+
617
+ elif allow_multiple_sites:
618
+ return dict_t
619
+
620
+ else:
621
+ raise ValueError(f"Found {len(dict_t.keys())} unique SITE_ID values in file. "
622
+ "Use read_ntgs_db() or set `allow_multiple_sites=True` to return all sites as a dictionary.")
623
+
624
+
625
+
626
+ def read_rbr(file_path: str) -> IndexedTSP:
627
+ """
628
+
629
+ Parameters
630
+ ----------
631
+ filepath
632
+
633
+ Returns
634
+ -------
635
+
636
+ """
637
+ file_extention = Path(file_path).suffix.lower()
638
+ if file_extention in [".dat", ".hex"]:
639
+ with open(file_path, "r") as f:
640
+ first_line = f.readline()
641
+ model = first_line.split()[1]
642
+ if model == "XL-800":
643
+ r = RBRXL800()
644
+ elif model in ["XR-420", "XR-420-T8"]:
645
+ r = RBRXR420()
646
+ else:
647
+ raise ValueError(f"logger model {model} unsupported")
648
+ data = r.read(file_path)
649
+ elif file_extention in [".xls", ".xlsx", ".rsk"]:
650
+ r = RBRXR420()
651
+ data = r.read(file_path)
652
+ else:
653
+ raise IOError("File is not .dat, .hex, .xls, .xlsx, or .rsk")
654
+
655
+ times = np.array(data['TIME'].dt.to_pydatetime())
656
+ channels = pd.Series(data.columns).str.match("^ch")
657
+ values = data.loc[:, channels.to_numpy()]
658
+
659
+ metadata = r.META
660
+ metadata['_source_file'] = file_path
661
+
662
+ t = IndexedTSP(times=times, values=values.values, metadata=metadata)
663
+ if "utc_offset" in list(r.META.keys()):
664
+ t.set_utc_offset(r.META["utc_offset"])
665
+
666
+ return t
667
+
668
+
669
+ def read_vemco(file_path: str) -> IndexedTSP:
670
+ """
671
+
672
+ Parameters
673
+ ----------
674
+ filepath
675
+
676
+ Returns
677
+ -------
678
+
679
+ """
680
+ file_extention = Path(file_path).suffix.lower()
681
+ if file_extention in [".000", ".csv"]:
682
+ r = Vemco()
683
+ data = r.read(file_path)
684
+ else:
685
+ raise IOError("File is not .000, .csv")
686
+
687
+ times = np.array(data['TIME'].dt.to_pydatetime())
688
+ channels = pd.Series(data.columns).str.match("^TEMP")
689
+ values = data.loc[:, channels.to_numpy()]
690
+
691
+ metadata = r.META
692
+ metadata['_source_file'] = file_path
693
+
694
+ t = IndexedTSP(times=times, values=values.values, metadata=metadata)
695
+ if "utc_offset" in list(r.META.keys()):
696
+ t.set_utc_offset(r.META["utc_offset"].seconds)
697
+ return t
698
+
699
+
700
+ def to_native_datetime(timestamp: pd.Timestamp) -> dt.datetime:
701
+ return timestamp.to_pydatetime()
702
+
703
+
704
+ def read_permos(filepath:str) -> TSP:
705
+ """Read file from PERMOS database export
706
+
707
+ Parameters
708
+ ----------
709
+ filename : str
710
+ Path to file.
711
+
712
+ Returns
713
+ -------
714
+ TSP
715
+ A TSP
716
+
717
+ Used for data obtained from PERMOS (permos.ch/data-portal/permafrost-temperature-and-active-layer)
718
+ """
719
+ try:
720
+ raw = pd.read_csv(filepath,
721
+ index_col=0,
722
+ parse_dates=True)
723
+ except IndexError:
724
+ raise IndexError("There are insufficient columns, the file format is invalid.")
725
+ metadata = {
726
+ '_source_file': filepath
727
+ }
728
+ t = TSP(times=raw.index,
729
+ depths=[float(C) for C in raw.columns],
730
+ values=raw.values,
731
+ metadata=metadata)
732
+
733
+ return t
734
+
735
+ def read_tsp(filepath: str) -> TSP:
736
+ """Read a TSP-style ground temperature file
737
+
738
+ Parameters
739
+ ----------
740
+ filepath : str
741
+ Path to file.
742
+
743
+ Returns
744
+ -------
745
+ TSP
746
+ A TSP
747
+ """
748
+ f, n, m = _tsp_format_parse(filepath)
749
+ t = f(filepath, n)
750
+ return t
751
+
752
+ def _read_tsp_wide(filepath: str, n_skip) -> TSP:
753
+ """Read a wide-format TSP file
754
+
755
+ Parameters
756
+ ----------
757
+ filepath : str
758
+ Path to file.
759
+
760
+ Returns
761
+ -------
762
+ TSP
763
+ A TSP
764
+ """
765
+ t = read_csv(filepath,
766
+ datecol="timestamp",
767
+ datefmt=None,
768
+ skiprows=n_skip,
769
+ depth_pattern=r"^(-?[0-9\.]+)$")
770
+ return t
771
+
772
+ def _read_tsp_long(filepath: str, n_skip) -> TSP:
773
+ """Read a long-format TSP file
774
+
775
+ Parameters
776
+ ----------
777
+ filepath : str
778
+ Path to file.
779
+
780
+ Returns
781
+ -------
782
+ TSP
783
+ A TSP
784
+ """
785
+ df = pd.read_csv(filepath, skiprows=n_skip)
786
+ time = pd.to_datetime(df['timestamp'], format=None).to_numpy()
787
+ depth = df['depth'].to_numpy().astype(float)
788
+ values = df['temperature'].to_numpy()
789
+
790
+ t = TSP.from_tidy_format(time,
791
+ depth,
792
+ values,
793
+ metadata={"_source_file": filepath})
794
+
795
+ return t
796
+
797
+ def _tsp_format_parse(filepath:str) -> tuple[Callable, int, list[str]]:
798
+ """Determine the format of a TSP file
799
+
800
+ Parameters
801
+ ----------
802
+ filepath : str
803
+ Path to file.
804
+
805
+ Returns
806
+ -------
807
+ function
808
+ The function to use to read the file
809
+ int
810
+ The number of header lines to skip
811
+ """
812
+ func = None
813
+ n_skip = 0
814
+ metadata_lines = []
815
+
816
+ with open(filepath, 'r') as f:
817
+ while func is None:
818
+ line = f.readline()
819
+ if line.startswith("#"):
820
+ n_skip += 1
821
+ metadata_lines.append(line)
822
+ elif line.startswith("timestamp,depth"):
823
+ func = _read_tsp_long
824
+ elif line.startswith("timestamp,"):
825
+ func = _read_tsp_wide
826
+ else:
827
+ raise ValueError("File is not a valid TSP file")
828
+
829
+ return func, n_skip, metadata_lines