tsp 1.8.1__py3-none-any.whl → 1.10.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. tsp/__init__.py +11 -11
  2. tsp/__meta__.py +1 -1
  3. tsp/concatenation.py +159 -153
  4. tsp/core.py +1306 -1162
  5. tsp/data/2023-01-06_755-test-Dataset_2031-Constant_Over_Interval-Hourly-Ground_Temperature-Thermistor_Automated.timeserie.csv +4 -4
  6. tsp/data/2023-01-06_755-test.metadata.txt +208 -208
  7. tsp/data/NTGS_example_csv.csv +6 -6
  8. tsp/data/NTGS_example_slash_dates.csv +6 -6
  9. tsp/data/NTGS_gtr_example_excel.xlsx +0 -0
  10. tsp/data/example_geotop.csv +5240 -5240
  11. tsp/data/example_gtnp.csv +1298 -1298
  12. tsp/data/example_permos.csv +7 -7
  13. tsp/data/ntgs-db-multi.txt +3872 -0
  14. tsp/data/ntgs-db-single.txt +2251 -0
  15. tsp/data/test_geotop_has_space.txt +5 -5
  16. tsp/data/tsp_format_long.csv +10 -0
  17. tsp/data/tsp_format_wide_1.csv +7 -0
  18. tsp/data/tsp_format_wide_2.csv +7 -0
  19. tsp/dataloggers/AbstractReader.py +43 -43
  20. tsp/dataloggers/FG2.py +110 -110
  21. tsp/dataloggers/GP5W.py +114 -114
  22. tsp/dataloggers/Geoprecision.py +34 -34
  23. tsp/dataloggers/HOBO.py +930 -914
  24. tsp/dataloggers/RBRXL800.py +190 -190
  25. tsp/dataloggers/RBRXR420.py +371 -308
  26. tsp/dataloggers/Vemco.py +84 -0
  27. tsp/dataloggers/__init__.py +15 -15
  28. tsp/dataloggers/logr.py +196 -115
  29. tsp/dataloggers/test_files/004448.DAT +2543 -2543
  30. tsp/dataloggers/test_files/004531.DAT +17106 -17106
  31. tsp/dataloggers/test_files/004531.HEX +3587 -3587
  32. tsp/dataloggers/test_files/004534.HEX +3587 -3587
  33. tsp/dataloggers/test_files/010252.dat +1731 -1731
  34. tsp/dataloggers/test_files/010252.hex +1739 -1739
  35. tsp/dataloggers/test_files/010274.hex +1291 -1291
  36. tsp/dataloggers/test_files/010278.hex +3544 -3544
  37. tsp/dataloggers/test_files/012064.dat +1286 -1286
  38. tsp/dataloggers/test_files/012064.hex +1294 -1294
  39. tsp/dataloggers/test_files/012064_modified_start.hex +1294 -0
  40. tsp/dataloggers/test_files/012081.hex +3532 -3532
  41. tsp/dataloggers/test_files/013138_recovery_stamp.hex +1123 -0
  42. tsp/dataloggers/test_files/014037-2007.hex +95 -0
  43. tsp/dataloggers/test_files/019360_20160918_1146_SlumpIslandTopofHill.hex +11253 -0
  44. tsp/dataloggers/test_files/019360_20160918_1146_SlumpIslandTopofHill.xls +0 -0
  45. tsp/dataloggers/test_files/07B1592.DAT +1483 -1483
  46. tsp/dataloggers/test_files/07B1592.HEX +1806 -1806
  47. tsp/dataloggers/test_files/07B4450.DAT +2234 -2234
  48. tsp/dataloggers/test_files/07B4450.HEX +2559 -2559
  49. tsp/dataloggers/test_files/2022018_2025-09-18T22-16-16.txt +36 -0
  50. tsp/dataloggers/test_files/2022018_2025-09-18T22-16-16_raw.csv +2074 -0
  51. tsp/dataloggers/test_files/2022018_2025-09-18T22-16-16_temp.csv +2074 -0
  52. tsp/dataloggers/test_files/2025004_2025-12-02T17-07-28_cfg.txt +30 -0
  53. tsp/dataloggers/test_files/2025004_2025-12-02T17-07-28_raw.csv +35 -0
  54. tsp/dataloggers/test_files/2025004_2025-12-02T17-07-28_temp.csv +35 -0
  55. tsp/dataloggers/test_files/204087.xlsx +0 -0
  56. tsp/dataloggers/test_files/Asc-1455As02.000 +2982 -0
  57. tsp/dataloggers/test_files/Asc-1456As02.000 +2992 -0
  58. tsp/dataloggers/test_files/Asc-1457As02.000 +2917 -0
  59. tsp/dataloggers/test_files/BGC_BH15_019362_20140610_1253.hex +1729 -0
  60. tsp/dataloggers/test_files/Bin2944.csv +759 -0
  61. tsp/dataloggers/test_files/Bin5494.csv +2972 -0
  62. tsp/dataloggers/test_files/Bin6786.csv +272 -0
  63. tsp/dataloggers/test_files/FG2_399.csv +9881 -9881
  64. tsp/dataloggers/test_files/GP5W.csv +1121 -1121
  65. tsp/dataloggers/test_files/GP5W_260.csv +1884 -1884
  66. tsp/dataloggers/test_files/GP5W_270.csv +2210 -2210
  67. tsp/dataloggers/test_files/H08-030-08_HOBOware.csv +998 -998
  68. tsp/dataloggers/test_files/Minilog-II-T_350763_20190711_1.csv +2075 -0
  69. tsp/dataloggers/test_files/Minilog-II-T_350769_20190921_1.csv +6384 -0
  70. tsp/dataloggers/test_files/Minilog-II-T_354284_20190921_1.csv +4712 -0
  71. tsp/dataloggers/test_files/Minilog-T_7943_20140920_1.csv +5826 -0
  72. tsp/dataloggers/test_files/Minilog-T_8979_20140806_1.csv +2954 -0
  73. tsp/dataloggers/test_files/Minilog-T_975_20110824_1.csv +4343 -0
  74. tsp/dataloggers/test_files/RBR_01.dat +1046 -1046
  75. tsp/dataloggers/test_files/RBR_02.dat +2426 -2426
  76. tsp/dataloggers/test_files/RI03b_062831_20240905_1801.rsk +0 -0
  77. tsp/dataloggers/test_files/RI03b_062831_20240905_1801.xlsx +0 -0
  78. tsp/dataloggers/test_files/RSTDT2055.csv +2152 -2152
  79. tsp/dataloggers/test_files/U23-001_HOBOware.csv +1001 -1001
  80. tsp/dataloggers/test_files/hobo-negative-2.txt +6396 -6396
  81. tsp/dataloggers/test_files/hobo-negative-3.txt +5593 -5593
  82. tsp/dataloggers/test_files/hobo-positive-number-1.txt +1000 -1000
  83. tsp/dataloggers/test_files/hobo-positive-number-2.csv +1003 -1003
  84. tsp/dataloggers/test_files/hobo-positive-number-3.csv +1133 -1133
  85. tsp/dataloggers/test_files/hobo-positive-number-4.csv +1209 -1209
  86. tsp/dataloggers/test_files/hobo2.csv +8702 -8702
  87. tsp/dataloggers/test_files/hobo_1_AB.csv +21732 -21732
  88. tsp/dataloggers/test_files/hobo_1_AB_Details.txt +133 -133
  89. tsp/dataloggers/test_files/hobo_1_AB_classic.csv +4373 -4373
  90. tsp/dataloggers/test_files/hobo_1_AB_defaults.csv +21732 -21732
  91. tsp/dataloggers/test_files/hobo_1_AB_minimal.txt +1358 -1358
  92. tsp/dataloggers/test_files/hobo_1_AB_var2.csv +3189 -3189
  93. tsp/dataloggers/test_files/hobo_1_AB_var3.csv +2458 -2458
  94. tsp/dataloggers/test_files/logR_ULogC16-32_1.csv +106 -106
  95. tsp/dataloggers/test_files/logR_ULogC16-32_2.csv +100 -100
  96. tsp/dataloggers/test_files/mon_3_Ta_2010-08-18_2013-02-08.txt +21724 -21724
  97. tsp/dataloggers/test_files/rbr_001.dat +1133 -1133
  98. tsp/dataloggers/test_files/rbr_001.hex +1139 -1139
  99. tsp/dataloggers/test_files/rbr_001_no_comment.dat +1132 -1132
  100. tsp/dataloggers/test_files/rbr_001_no_comment.hex +1138 -1138
  101. tsp/dataloggers/test_files/rbr_002.dat +1179 -1179
  102. tsp/dataloggers/test_files/rbr_002.hex +1185 -1185
  103. tsp/dataloggers/test_files/rbr_003.hex +1292 -1292
  104. tsp/dataloggers/test_files/rbr_xl_001.DAT +1105 -1105
  105. tsp/dataloggers/test_files/rbr_xl_002.DAT +1126 -1126
  106. tsp/dataloggers/test_files/rbr_xl_003.DAT +4622 -4622
  107. tsp/dataloggers/test_files/rbr_xl_003.HEX +3587 -3587
  108. tsp/gtnp.py +148 -148
  109. tsp/labels.py +3 -3
  110. tsp/misc.py +90 -90
  111. tsp/physics.py +101 -101
  112. tsp/plots/static.py +388 -374
  113. tsp/readers.py +829 -548
  114. tsp/standardization/__init__.py +0 -0
  115. tsp/standardization/metadata.py +95 -0
  116. tsp/standardization/metadata_ref.py +0 -0
  117. tsp/standardization/validator.py +535 -0
  118. tsp/time.py +45 -45
  119. tsp/tspwarnings.py +27 -15
  120. tsp/utils.py +131 -101
  121. tsp/version.py +1 -1
  122. {tsp-1.8.1.dist-info → tsp-1.10.2.dist-info}/METADATA +95 -86
  123. tsp-1.10.2.dist-info/RECORD +132 -0
  124. {tsp-1.8.1.dist-info → tsp-1.10.2.dist-info}/licenses/LICENSE +674 -674
  125. {tsp-1.8.1.dist-info → tsp-1.10.2.dist-info}/top_level.txt +1 -0
  126. tsp-1.8.1.dist-info/RECORD +0 -94
  127. {tsp-1.8.1.dist-info → tsp-1.10.2.dist-info}/WHEEL +0 -0
@@ -1,308 +1,371 @@
1
- import sqlite3
2
- import pathlib
3
- import warnings
4
- import numpy as np
5
- import pandas as pd
6
- import datetime as dt
7
- try:
8
- from pyrsktools import RSK
9
- except ModuleNotFoundError:
10
- warnings.warn("Missing pyRSKtools library. .rsk files can not be imported.")
11
- from .AbstractReader import AbstractReader
12
-
13
-
14
- class RBRXR420(AbstractReader):
15
-
16
- def read(self, file_path: str) -> "pd.DataFrame":
17
- """
18
-
19
- Parameters
20
- ----------
21
- file_path
22
-
23
- Returns
24
- -------
25
-
26
- """
27
- file_extention = pathlib.Path(file_path).suffix.lower()
28
- if file_extention in [".dat", ".hex"]:
29
- with open(file_path, "r") as f:
30
- first_50 = [next(f) for i in range(50)]
31
- for line_num in range(len(first_50)):
32
- if first_50[line_num].lower().startswith("logger start:"):
33
- header_length = line_num + 1
34
- break
35
-
36
- with open(file_path, "r") as f:
37
- header_lines = [next(f) for i in range(header_length)]
38
- self._parse_meta(header_lines)
39
-
40
- data_lines = f.readlines()
41
- if file_extention == ".dat":
42
- line_num = 0
43
- for line_num in range(len(data_lines)):
44
- if data_lines[line_num] != "\n":
45
- split_line = data_lines[line_num].split()
46
- else:
47
- split_line = ["no data"]
48
- if split_line[0].lower() == "temp":
49
- break
50
- if line_num == len(data_lines) - 1:
51
- raise RuntimeError("No column names found")
52
- data_lines = data_lines[line_num:]
53
- first_line = data_lines[0].split()
54
- second_line = data_lines[1].split()
55
-
56
- if len(first_line) == len(second_line):
57
- self._read_standard_dat_format(data_lines[1:], False)
58
- elif len(first_line) + 2 == len(second_line):
59
- try:
60
- is_datetime = bool(dt.datetime.strptime(" ".join(second_line[:2]), "%Y/%m/%d %H:%M:%S"))
61
- except ValueError:
62
- is_datetime = False
63
- if is_datetime:
64
- self._read_standard_dat_format(data_lines[1:], True)
65
- else:
66
- raise RuntimeError("Error, expected date time with format %Y/%m/%d %H:%M:%S at start of"
67
- "row.")
68
- else:
69
- raise RuntimeError("Error: Number of column names and number of columns do not match any"
70
- "expected pattern.")
71
-
72
- else:
73
- self._read_standard_hex_format(data_lines)
74
- elif file_extention == ".xls":
75
- self._read_standard_xls_format(file_path)
76
- elif file_extention == ".xlsx":
77
- self._read_standard_xlsx_format(file_path)
78
- elif file_extention == ".rsk":
79
- self._read_standard_rsk_format(file_path)
80
- else:
81
- raise IOError("Unrecognised file. File is not a .dat, .hex, .xls, .xlsx, or .rsk.")
82
- return self.DATA
83
-
84
- def _parse_meta(self, header_lines: list):
85
- self.META["logger model"] = header_lines[0].split()[1]
86
- self.META["logger SN"] = header_lines[0].split()[3]
87
- sample_interval = dt.datetime.strptime(header_lines[5].split()[-1], "%H:%M:%S")
88
- self.META["download date"] = dt.datetime.strptime(header_lines[1][14:31], "%y/%m/%d %H:%M:%S")
89
- self.META["sample interval"] = dt.timedelta(hours=sample_interval.hour, minutes=sample_interval.minute,
90
- seconds=sample_interval.second)
91
- self.META["logging start"] = dt.datetime.strptime(" ".join(header_lines[3].split()[-2:]),
92
- "%y/%m/%d %H:%M:%S")
93
- line_7_info = header_lines[6].split(",")
94
- self.META["num channels"] = int(line_7_info[0].split()[-1])
95
- self.META["num samples"] = int(line_7_info[1].split()[-1])
96
- formatting = header_lines[7].split("%")[1]
97
- if formatting.endswith("\n"):
98
- self.META["precision"] = int(formatting[-3])
99
- else:
100
- self.META["precision"] = int(formatting[-2])
101
-
102
- self.META["calibration parameters"] = {}
103
- calibration_start_line = 8
104
- for i in range(self.META["num channels"]):
105
- self.META["calibration parameters"][f"channel {i + 1}"] = {}
106
- for j in range(4):
107
- line_num = calibration_start_line + 4 * i + j
108
- if header_lines[line_num].lower().startswith("calibration"):
109
- self.META["calibration parameters"][f"channel {i + 1}"][chr(ord("a") + j)]\
110
- = float(header_lines[line_num].split()[-1])
111
- else:
112
- self.META["calibration parameters"][f"channel {i + 1}"][chr(ord("a") + j)] \
113
- = float(header_lines[line_num].split()[0])
114
-
115
- self.META['raw'] = "".join(header_lines)
116
- return
117
-
118
- def _read_standard_dat_format(self, raw_data: list, time_stamps: bool = False):
119
- """
120
-
121
- Parameters
122
- ----------
123
- raw_data
124
- line_numbers
125
-
126
- Returns
127
- -------
128
-
129
- """
130
- self.DATA = pd.DataFrame(columns=[f"channel {i + 1}" for i in range(self.META["num channels"])])
131
- line_num = 0
132
- for line in raw_data:
133
- line_data = line.split()
134
- if time_stamps:
135
- self.DATA.loc[dt.datetime.strptime(" ".join(line_data[:2]), "%Y/%m/%d %H:%M:%S")] = line_data[2:]
136
- else:
137
- self.DATA.loc[self.META["logging start"] + self.META["sample interval"] * line_num] = line_data
138
- line_num += 1
139
- for col in self.DATA:
140
- self.DATA[col] = pd.to_numeric(self.DATA[col], errors='coerce')
141
- self.DATA.reset_index(inplace=True)
142
- self.DATA.rename(columns={"index": "TIME"}, inplace=True)
143
- return
144
-
145
- def _read_standard_hex_format(self, raw_data: list):
146
- """
147
-
148
- Parameters
149
- ----------
150
- raw_data
151
-
152
- Returns
153
- -------
154
-
155
- """
156
- for line_num in range(len(raw_data)):
157
- if raw_data[line_num].lower().startswith("number of bytes of data"):
158
- hex_header_length = line_num + 2
159
- break
160
- elif raw_data[line_num].lower().startswith("number of bytes in header"):
161
- header_bytes = int(raw_data[line_num].split()[-1])
162
- num_hex_header_values = int(header_bytes / 3)
163
- hex_vals = []
164
- raw_data = raw_data[hex_header_length:]
165
- for line_num in range(len(raw_data)):
166
- line = raw_data[line_num]
167
- line_hex_vals = [line[i: i + 6] for i in range(0, len(line), 6)][:-1]
168
- for hex_val in line_hex_vals:
169
- hex_vals.append(hex_val)
170
- hex_vals = hex_vals[num_hex_header_values:]
171
-
172
- self.DATA = pd.DataFrame(columns=[f"channel {i + 1}" for i in range(self.META["num channels"])])
173
- line_num = 0
174
- hex_num = 0
175
- for line in range(self.META["num samples"]):
176
- line_time = self.META["logging start"] + self.META["sample interval"] * line_num
177
- time_hex_vals = hex_vals[hex_num: hex_num + 8]
178
- line_vals = [int(h, 16) / int("FFFFFF", 16) for h in time_hex_vals]
179
- line_temps = []
180
- for channel in range(len(line_vals)):
181
- val = line_vals[channel]
182
- if val not in [0, 1]:
183
- a = self.META["calibration parameters"][f"channel {channel + 1}"]["a"]
184
- b = self.META["calibration parameters"][f"channel {channel + 1}"]["b"]
185
- c = self.META["calibration parameters"][f"channel {channel + 1}"]["c"]
186
- d = self.META["calibration parameters"][f"channel {channel + 1}"]["d"]
187
- x = np.log((1 / val) - 1)
188
- temp = 1 / (a + b * x + c * x**2 + d * x**3) - 273.15
189
- line_temps.append(round(temp, self.META["precision"]))
190
- else:
191
- line_temps.append(np.nan)
192
- self.DATA.loc[line_time] = line_temps
193
- line_num += 1
194
- hex_num += 8
195
- for col in self.DATA:
196
- self.DATA[col] = pd.to_numeric(self.DATA[col], errors='coerce')
197
- self.DATA.reset_index(inplace=True)
198
- self.DATA.rename(columns={"index": "TIME"}, inplace=True)
199
- return
200
-
201
- def _read_standard_xls_format(self, file_path: str):
202
- xls = pd.ExcelFile(file_path)
203
- sheet = xls.sheet_names[0]
204
- xls.close()
205
- raw_data = pd.read_excel(file_path, sheet, header=None)
206
- raw_meta = raw_data.iloc[:5].copy()
207
- if raw_meta.iloc[0, 0] != "RBR data file":
208
- raise IOError("Not a valid .xls file")
209
- meta = {}
210
- for i, r in raw_meta.iterrows():
211
- for j in range(0, len(r) - 1, 2):
212
- if not pd.isna(raw_meta.iloc[i, j]):
213
- meta[raw_meta.iloc[i, j]] = raw_meta.iloc[i, j + 1]
214
- self.META["logger model"] = meta["Model:"]
215
- self.META["logger SN"] = meta["Serial Number:"]
216
- self.META["sample interval"] = dt.timedelta(seconds=int(meta["Logging sampling period (s):"]))
217
- self.META["logging start"] = dt.datetime.strptime(meta["Logging start time:"], "%Y/%m/%d")
218
-
219
- column_names = {}
220
- for col in raw_data:
221
- if col == 0:
222
- col_name = "TIME"
223
- else:
224
- col_name = f"channel {col}"
225
- column_names[col] = col_name
226
- self.DATA = raw_data.iloc[6:].copy()
227
- self.DATA.reset_index(drop=True, inplace=True)
228
- self.DATA.rename(columns=column_names, inplace=True)
229
- for col in self.DATA:
230
- if col == "TIME":
231
- self.DATA["TIME"] = pd.to_datetime(self.DATA["TIME"], format="%d/%m/%Y %H:%M:%S.%f")
232
- else:
233
- self.DATA[col] = pd.to_numeric(self.DATA[col], errors='coerce')
234
- return
235
-
236
- def _read_standard_xlsx_format(self, file_path: str):
237
- meta_table = {"Instrument": pd.read_excel(file_path, sheet_name="Metadata", header=9, nrows=1),
238
- "Schedule": pd.read_excel(file_path, sheet_name="Metadata", header=24, nrows=1),
239
- "Sampling": pd.read_excel(file_path, sheet_name="Metadata", header=28, nrows=1)}
240
- self.META["logger model"] = meta_table["Instrument"]["Model"].loc[0]
241
- self.META["logger SN"] = meta_table["Instrument"]["Serial"].loc[0]
242
- self.META["sample interval"] = dt.timedelta(seconds=int(meta_table["Sampling"]["Period"].loc[0]))
243
- self.META["logging start"] = meta_table["Schedule"]["Start time"].loc[0]
244
-
245
- self.DATA = pd.read_excel(file_path, sheet_name="Data", header=1)
246
-
247
- column_names = {}
248
- for col in self.DATA:
249
- if col == "Time":
250
- col_name = "TIME"
251
- elif col == "Temperature":
252
- col_name = "channel 1"
253
- else:
254
- col_name = f"channel {int(col.split('.')[-1]) + 1}"
255
- column_names[col] = col_name
256
- self.DATA.rename(columns=column_names, inplace=True)
257
-
258
- for col in self.DATA:
259
- if col == "TIME":
260
- self.DATA["TIME"] = pd.to_datetime(self.DATA["TIME"], format="%Y-%m-%d %H:%M:%S.%f")
261
- else:
262
- self.DATA[col] = pd.to_numeric(self.DATA[col], errors='coerce')
263
- return
264
-
265
- def _read_standard_rsk_format(self, file_path: str):
266
- raw_meta = {}
267
- try:
268
- with RSK(file_path) as rsk:
269
- rsk.open()
270
- rsk.readdata()
271
- rsk_data = rsk.data
272
- raw_meta["calibration"] = rsk.calibrations
273
- raw_meta["instrument"] = rsk.instrument
274
- raw_meta["schedule"] = rsk.scheduleInfo
275
- raw_meta["parameter key"] = rsk.parameterKeys
276
- raw_meta["epoch"] = rsk.epoch
277
- except NameError:
278
- raise ModuleNotFoundError("You must install pyRSKtools")
279
- except sqlite3.OperationalError:
280
- raise RuntimeError("An error occurred when opening the .rsk file. Try opening the .rsk file in the ruskin\n"
281
- " software then rerunning the code.")
282
- self.DATA = pd.DataFrame(rsk_data)
283
-
284
- self.META["logger model"] = raw_meta["instrument"].model
285
- self.META["logger SN"] = raw_meta["instrument"].serialID
286
- self.META["sample interval"] = dt.timedelta(seconds=raw_meta["schedule"].samplingPeriod/1000)
287
- self.META["logging start"] = raw_meta["epoch"].startTime
288
- self.META["utc offset"] = [int(float(element.value) * 3600) for element in raw_meta["parameter key"]
289
- if element.key == "OFFSET_FROM_UTC"][0]
290
- self.META["calibration parameters"] = {}
291
- for cal in raw_meta["calibration"]:
292
- self.META["calibration parameters"][f"channel {cal.channelOrder}"] = {}
293
- self.META["calibration parameters"][f"channel {cal.channelOrder}"]["a"] = cal.c[0]
294
- self.META["calibration parameters"][f"channel {cal.channelOrder}"]["b"] = cal.c[1]
295
- self.META["calibration parameters"][f"channel {cal.channelOrder}"]["c"] = cal.c[2]
296
- self.META["calibration parameters"][f"channel {cal.channelOrder}"]["d"] = cal.c[3]
297
-
298
- column_names = {}
299
- for col in self.DATA:
300
- if col == "timestamp":
301
- col_name = "TIME"
302
- elif col == "temperature":
303
- col_name = "channel 1"
304
- else:
305
- col_name = f"channel {int(col[-1]) + 1}"
306
- column_names[col] = col_name
307
- self.DATA.rename(columns=column_names, inplace=True)
308
- return
1
+ import sqlite3
2
+ import re
3
+ import pathlib
4
+ import warnings
5
+ import numpy as np
6
+ import pandas as pd
7
+ import datetime as dt
8
+ from copy import deepcopy
9
+
10
+ try:
11
+ from pyrsktools import RSK
12
+ except ModuleNotFoundError:
13
+ warnings.warn("Missing pyRSKtools library. .rsk files can not be imported.")
14
+ from .AbstractReader import AbstractReader
15
+
16
+
17
+ class RBRXR420(AbstractReader):
18
+
19
+ def read(self, file_path: str) -> "pd.DataFrame":
20
+ """
21
+
22
+ Parameters
23
+ ----------
24
+ file_path
25
+
26
+ Returns
27
+ -------
28
+
29
+ """
30
+ file_extention = pathlib.Path(file_path).suffix.lower()
31
+ if file_extention in [".dat", ".hex"]:
32
+ with open(file_path, "r") as f:
33
+ num_lines = len(f.readlines())
34
+ with open(file_path, "r") as f:
35
+ first_75 = [next(f) for i in range(min([75, num_lines]))]
36
+ for line_num in range(len(first_75)):
37
+ if first_75[line_num].lower().startswith("logger start:"):
38
+ header_length = line_num + 1
39
+ break
40
+ with open(file_path, "r") as f:
41
+ header_lines = [next(f) for i in range(header_length)]
42
+ self._parse_meta(header_lines)
43
+
44
+ data_lines = f.readlines()
45
+ if file_extention == ".dat":
46
+ line_num = 0
47
+ for line_num in range(len(data_lines)):
48
+ if data_lines[line_num] != "\n":
49
+ split_line = data_lines[line_num].split()
50
+ else:
51
+ split_line = ["no data"]
52
+ if split_line[0].lower() == "temp":
53
+ break
54
+ if line_num == len(data_lines) - 1:
55
+ raise RuntimeError("No column names found")
56
+ data_lines = data_lines[line_num:]
57
+ first_line = data_lines[0].split()
58
+ second_line = data_lines[1].split()
59
+
60
+ if len(first_line) == len(second_line):
61
+ self._read_standard_dat_format(data_lines[1:], False)
62
+ elif len(first_line) + 2 == len(second_line):
63
+ try:
64
+ is_datetime = bool(dt.datetime.strptime(" ".join(second_line[:2]), "%Y/%m/%d %H:%M:%S"))
65
+ except ValueError:
66
+ is_datetime = False
67
+ if is_datetime:
68
+ self._read_standard_dat_format(data_lines[1:], True)
69
+ else:
70
+ raise RuntimeError("Error, expected date time with format %Y/%m/%d %H:%M:%S at start of"
71
+ "row.")
72
+ else:
73
+ raise RuntimeError("Error: Number of column names and number of columns do not match any"
74
+ "expected pattern.")
75
+
76
+ else:
77
+ self._read_standard_hex_format(data_lines)
78
+ elif file_extention == ".xls":
79
+ self._read_standard_xls_format(file_path)
80
+ elif file_extention == ".xlsx":
81
+ self._read_standard_xlsx_format(file_path)
82
+ elif file_extention == ".rsk":
83
+ self._read_standard_rsk_format(file_path)
84
+ else:
85
+ raise IOError("Unrecognised file. File is not a .dat, .hex, .xls, .xlsx, or .rsk.")
86
+ return self.DATA
87
+
88
+ def _parse_meta(self, header_lines: list):
89
+ self.META["logger_model"] = header_lines[0].split()[1]
90
+ self.META["logger_sn"] = header_lines[0].split()[3]
91
+ sample_interval = dt.datetime.strptime(header_lines[5].split()[-1], "%H:%M:%S")
92
+ self.META["download_date"] = dt.datetime.strptime(header_lines[1][14:31], "%y/%m/%d %H:%M:%S")
93
+ self.META["sample_interval"] = dt.timedelta(hours=sample_interval.hour, minutes=sample_interval.minute,
94
+ seconds=sample_interval.second)
95
+ self.META["logging_start"] = dt.datetime.strptime(" ".join(header_lines[3].split()[-2:]),
96
+ "%y/%m/%d %H:%M:%S")
97
+ line_7_info = header_lines[6].split(",")
98
+ self.META["num_channels"] = int(line_7_info[0].split()[-1])
99
+ self.META["num_samples"] = int(line_7_info[1].split()[-1])
100
+ formatting = header_lines[7].split("%")[1]
101
+ if formatting.endswith("\n"):
102
+ self.META["precision"] = int(formatting[-3])
103
+ else:
104
+ self.META["precision"] = int(formatting[-2])
105
+
106
+ self.META["calibration_parameters"] = {}
107
+ calibration_start_line = 8
108
+ for i in range(self.META["num_channels"]):
109
+ self.META["calibration_parameters"][f"channel_{i + 1}"] = {}
110
+ for j in range(4):
111
+ line_num = calibration_start_line + 4 * i + j
112
+ if header_lines[line_num].lower().startswith("calibration"):
113
+ self.META["calibration_parameters"][f"channel_{i + 1}"][chr(ord("a") + j)] \
114
+ = float(header_lines[line_num].split()[-1])
115
+ else:
116
+ self.META["calibration_parameters"][f"channel_{i + 1}"][chr(ord("a") + j)] \
117
+ = float(header_lines[line_num].split()[0])
118
+
119
+ self.META['raw'] = "".join(header_lines)
120
+ self.META["internal_log"] = []
121
+ return
122
+
123
+ def _read_standard_dat_format(self, raw_data: list, time_stamps: bool = False):
124
+ """
125
+
126
+ Parameters
127
+ ----------
128
+ raw_data
129
+ line_numbers
130
+
131
+ Returns
132
+ -------
133
+
134
+ """
135
+ self.DATA = pd.DataFrame(columns=[f"channel_{i + 1}" for i in range(self.META["num_channels"])])
136
+ line_num = 0
137
+ for line in raw_data:
138
+ line_data = line.split()
139
+ if time_stamps:
140
+ self.DATA.loc[dt.datetime.strptime(" ".join(line_data[:2]), "%Y/%m/%d %H:%M:%S")] = line_data[2:]
141
+ else:
142
+ self.DATA.loc[self.META["logging_start"] + self.META["sample_interval"] * line_num] = line_data
143
+ line_num += 1
144
+ for col in self.DATA:
145
+ self.DATA[col] = pd.to_numeric(self.DATA[col], errors='coerce')
146
+ self.DATA.reset_index(inplace=True)
147
+ self.DATA.rename(columns={"index": "TIME"}, inplace=True)
148
+ return
149
+
150
+ def _read_standard_hex_format(self, raw_data: list):
151
+ """
152
+
153
+ Parameters
154
+ ----------
155
+ raw_data
156
+
157
+ Returns
158
+ -------
159
+
160
+ """
161
+ log_line_numbers = []
162
+ for line_num in range(len(raw_data)):
163
+ if raw_data[line_num].lower().startswith("number of bytes of data"):
164
+ hex_header_length = line_num + 2
165
+ break
166
+ elif raw_data[line_num].lower().startswith("number of bytes in header"):
167
+ header_bytes = int(raw_data[line_num].split()[-1])
168
+ elif raw_data[line_num].lower().startswith(" ") or raw_data[line_num].lower().startswith("\n"):
169
+ pass
170
+ elif raw_data[line_num].lower().startswith("extended data range"):
171
+ self.META["extended data range"] = raw_data[line_num].split()[-1]
172
+ else:
173
+ raw_log = raw_data[line_num][:-1]
174
+ time_stamp_regex = re.search(r"\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}", raw_log)
175
+ time_stamp = time_stamp_regex.group(0)
176
+ # action = raw_log[:time_stamp_regex.start() - 1]
177
+ sample_num = int(re.search(r"\d+", raw_log[time_stamp_regex.end():]).group(0))
178
+ action_type = re.search(": .+", raw_log).group(0)[2:]
179
+ self.META["internal_log"].append({"timestamp": time_stamp, "sample_num": sample_num,
180
+ "action": action_type})
181
+ log_line_numbers.append(sample_num)
182
+ num_hex_header_values = int(header_bytes / 3)
183
+ hex_vals = []
184
+ raw_data = raw_data[hex_header_length:]
185
+ for line_num in range(len(raw_data)):
186
+ line = raw_data[line_num].replace(" ", "")
187
+ line_hex_vals = [line[i: i + 6] for i in range(0, len(line), 6)][:-1]
188
+ for hex_val in line_hex_vals:
189
+ hex_vals.append(hex_val)
190
+ hex_vals = hex_vals[num_hex_header_values:]
191
+
192
+ self.DATA = pd.DataFrame(columns=[f"channel_{i + 1}" for i in range(self.META["num_channels"])])
193
+ line_num = 0
194
+ hex_num = 0
195
+ reference_time = deepcopy(self.META["logging_start"])
196
+ interval_num = 0
197
+ for line in range(self.META["num_samples"]):
198
+ if line_num + 1 not in log_line_numbers:
199
+ line_time = reference_time + self.META["sample_interval"] * interval_num
200
+ else:
201
+ log_records = [ele for ele in self.META["internal_log"] if ele["sample_num"] == line_num + 1]
202
+ for ele in log_records:
203
+ if ele["action"] in ["TIME STAMP", "RTC STAMP"]:
204
+ line_time = dt.datetime.strptime(ele["timestamp"], "%Y/%m/%d %H:%M:%S")
205
+ interval_num = 0
206
+ reference_time = line_time
207
+ hex_num += 4
208
+ elif ele["action"] == "RECOVERY STAMP":
209
+ if ele["sample_num"] != self.META["num_samples"]:
210
+ raise RuntimeError("Recovery stamp logged part way through data. Logger may have been "
211
+ "reset then continued logging. Open file in RBR software and "
212
+ "investigate.")
213
+ else:
214
+ warnings.warn(f"{ele}")
215
+ time_hex_vals = hex_vals[hex_num: hex_num + 8]
216
+ line_vals = [int(h, 16) / int("FFFFFF", 16) for h in time_hex_vals]
217
+ line_temps = []
218
+ for channel in range(len(line_vals)):
219
+ val = line_vals[channel]
220
+ if val not in [0, 1]:
221
+ a = self.META["calibration_parameters"][f"channel_{channel + 1}"]["a"]
222
+ b = self.META["calibration_parameters"][f"channel_{channel + 1}"]["b"]
223
+ c = self.META["calibration_parameters"][f"channel_{channel + 1}"]["c"]
224
+ d = self.META["calibration_parameters"][f"channel_{channel + 1}"]["d"]
225
+ x = np.log((1 / val) - 1)
226
+ temp = 1 / (a + b * x + c * x ** 2 + d * x ** 3) - 273.15
227
+ line_temps.append(round(temp, self.META["precision"]))
228
+ else:
229
+ line_temps.append(np.nan)
230
+ self.DATA.loc[line_time] = line_temps
231
+ line_num += 1
232
+ interval_num += 1
233
+ hex_num += 8
234
+ for col in self.DATA:
235
+ self.DATA[col] = pd.to_numeric(self.DATA[col], errors='coerce')
236
+ self.DATA.reset_index(inplace=True)
237
+ self.DATA.rename(columns={"index": "TIME"}, inplace=True)
238
+ return
239
+
240
+ def _read_standard_xls_format(self, file_path: str):
241
+ xls = pd.ExcelFile(file_path)
242
+ sheet = xls.sheet_names[0]
243
+ xls.close()
244
+ raw_data = pd.read_excel(file_path, sheet, header=None)
245
+ raw_meta = raw_data.iloc[:5].copy()
246
+ if raw_meta.iloc[0, 0] != "RBR data file":
247
+ raise IOError("Not a valid .xls file")
248
+ meta = {}
249
+ for i, r in raw_meta.iterrows():
250
+ for j in range(0, len(r) - 1, 2):
251
+ if not pd.isna(raw_meta.iloc[i, j]):
252
+ meta[raw_meta.iloc[i, j]] = raw_meta.iloc[i, j + 1]
253
+ self.META["logger_model"] = meta["Model:"]
254
+ self.META["logger_sn"] = meta["Serial Number:"]
255
+ self.META["sample_interval"] = dt.timedelta(seconds=int(meta["Logging sampling period (s):"]))
256
+ self.META["logging_start"] = dt.datetime.strptime(meta["Logging start time:"], "%Y/%m/%d")
257
+
258
+ column_names = {}
259
+ for col in raw_data:
260
+ if col == 0:
261
+ col_name = "TIME"
262
+ else:
263
+ col_name = f"channel_{col}"
264
+ column_names[col] = col_name
265
+ self.DATA = raw_data.iloc[6:].copy()
266
+ self.DATA.reset_index(drop=True, inplace=True)
267
+ self.DATA.rename(columns=column_names, inplace=True)
268
+ for col in self.DATA:
269
+ if col == "TIME":
270
+ self.DATA["TIME"] = pd.to_datetime(self.DATA["TIME"], format="%d/%m/%Y %H:%M:%S.%f")
271
+ else:
272
+ self.DATA[col] = pd.to_numeric(self.DATA[col], errors='coerce')
273
+ return
274
+
275
+ def _read_standard_xlsx_format(self, file_path: str):
276
+ with warnings.catch_warnings():
277
+ warnings.simplefilter("ignore")
278
+ version = pd.read_excel(file_path, sheet_name="Metadata", header=1, nrows=1)
279
+ known_formats = {"2.7.3": [9, 24, 28, 5], "2.19.1": [12, 62, 66, 5]}
280
+ meta_table = None
281
+ for k, v in known_formats.items():
282
+ try:
283
+ with warnings.catch_warnings():
284
+ warnings.simplefilter("ignore")
285
+ mt = {"Instrument": pd.read_excel(file_path, sheet_name="Metadata", header=v[0], nrows=1),
286
+ "Schedule": pd.read_excel(file_path, sheet_name="Metadata", header=v[1], nrows=1),
287
+ "Sampling": pd.read_excel(file_path, sheet_name="Metadata", header=v[2], nrows=1),
288
+ "Export": pd.read_excel(file_path, sheet_name="Metadata", header=v[3], nrows=1)}
289
+ except:
290
+ pass
291
+ else:
292
+ if "Model" in mt["Instrument"]:
293
+ meta_table = mt
294
+ print(f"RBR xlsx version {version.loc[0, 'Ruskin']} read as version {k}")
295
+ break
296
+
297
+ if meta_table is None:
298
+ raise ValueError(f"Unrecognized formatting (version {version.loc[0, 'Ruskin']}). this code has been tested "
299
+ f"on v1.12.1, v2.7.3, and v2.19.1")
300
+ self.META["logger_model"] = meta_table["Instrument"]["Model"].loc[0]
301
+ self.META["logger_sn"] = meta_table["Instrument"]["Serial"].loc[0]
302
+ self.META["sample_interval"] = dt.timedelta(seconds=int(meta_table["Sampling"]["Period"].loc[0]))
303
+ self.META["logging_start"] = meta_table["Schedule"]["Start time"].loc[0]
304
+ self.META["download_date"] = meta_table["Export"].loc[0, "Export Time"]
305
+
306
+ with warnings.catch_warnings():
307
+ warnings.simplefilter("ignore")
308
+ self.DATA = pd.read_excel(file_path, sheet_name="Data", header=1)
309
+ column_names = {}
310
+ for col in self.DATA:
311
+ if col == "Time":
312
+ col_name = "TIME"
313
+ elif col == "Temperature":
314
+ col_name = "channel_1"
315
+ else:
316
+ col_name = f"channel_{int(col.split('.')[-1]) + 1}"
317
+ column_names[col] = col_name
318
+ self.DATA.rename(columns=column_names, inplace=True)
319
+
320
+ for col in self.DATA:
321
+ if col == "TIME":
322
+ self.DATA["TIME"] = pd.to_datetime(self.DATA["TIME"], format="%Y-%m-%d %H:%M:%S.%f")
323
+ else:
324
+ self.DATA[col] = pd.to_numeric(self.DATA[col], errors='coerce')
325
+ return
326
+
327
+ def _read_standard_rsk_format(self, file_path: str):
328
+ raw_meta = {}
329
+ try:
330
+ with RSK(file_path) as rsk:
331
+ rsk.open()
332
+ rsk.readdata()
333
+ rsk_data = rsk.data
334
+ raw_meta["calibration"] = rsk.calibrations
335
+ raw_meta["deployment"] = rsk.deployment
336
+ raw_meta["instrument"] = rsk.instrument
337
+ raw_meta["schedule"] = rsk.scheduleInfo
338
+ raw_meta["parameter key"] = rsk.parameterKeys
339
+ raw_meta["epoch"] = rsk.epoch
340
+ except NameError:
341
+ raise ModuleNotFoundError("You must install pyRSKtools")
342
+ except sqlite3.OperationalError:
343
+ raise RuntimeError("An error occurred when opening the .rsk file. Try opening the .rsk file in the ruskin\n"
344
+ " software then rerunning the code.")
345
+ self.DATA = pd.DataFrame(rsk_data)
346
+ self.META["logger_model"] = raw_meta["instrument"].model
347
+ self.META["logger_sn"] = str(raw_meta["instrument"].serialID)
348
+ self.META["download_date"] = raw_meta["deployment"].timeOfDownload.astype(dt.datetime)
349
+ self.META["sample_interval"] = dt.timedelta(seconds=raw_meta["schedule"].samplingPeriod / 1000)
350
+ self.META["logging_start"] = raw_meta["epoch"].startTime
351
+ utc_offset = [element.value for element in raw_meta["parameter key"] if element.key == "OFFSET_FROM_UTC"][0]
352
+ if pd.notna(utc_offset) and str(utc_offset).lower() != "nan":
353
+ self.META["utc_offset"] = int(round(float(utc_offset) * 3600, 0))
354
+ self.META["calibration_parameters"] = {}
355
+ for cal in raw_meta["calibration"]:
356
+ self.META["calibration_parameters"][f"channel_{cal.channelOrder}"] = {}
357
+ self.META["calibration_parameters"][f"channel_{cal.channelOrder}"]["a"] = cal.c[0]
358
+ self.META["calibration_parameters"][f"channel_{cal.channelOrder}"]["b"] = cal.c[1]
359
+ self.META["calibration_parameters"][f"channel_{cal.channelOrder}"]["c"] = cal.c[2]
360
+ self.META["calibration_parameters"][f"channel_{cal.channelOrder}"]["d"] = cal.c[3]
361
+ column_names = {}
362
+ for col in self.DATA:
363
+ if col == "timestamp":
364
+ col_name = "TIME"
365
+ elif col == "temperature":
366
+ col_name = "channel_1"
367
+ else:
368
+ col_name = f"channel_{int(col[-1]) + 1}"
369
+ column_names[col] = col_name
370
+ self.DATA.rename(columns=column_names, inplace=True)
371
+ return