tsp 1.7.7__py3-none-any.whl → 1.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tsp might be problematic. Click here for more details.

Files changed (92) hide show
  1. tsp/__init__.py +11 -11
  2. tsp/__meta__.py +1 -1
  3. tsp/concatenation.py +153 -0
  4. tsp/core.py +1108 -1035
  5. tsp/data/2023-01-06_755-test-Dataset_2031-Constant_Over_Interval-Hourly-Ground_Temperature-Thermistor_Automated.timeserie.csv +4 -4
  6. tsp/data/2023-01-06_755-test.metadata.txt +208 -208
  7. tsp/data/NTGS_example_csv.csv +6 -6
  8. tsp/data/NTGS_example_slash_dates.csv +6 -6
  9. tsp/data/example_geotop.csv +5240 -5240
  10. tsp/data/example_gtnp.csv +1298 -1298
  11. tsp/data/example_permos.csv +7 -7
  12. tsp/data/test_geotop_has_space.txt +5 -5
  13. tsp/dataloggers/AbstractReader.py +43 -43
  14. tsp/dataloggers/FG2.py +110 -110
  15. tsp/dataloggers/GP5W.py +114 -114
  16. tsp/dataloggers/Geoprecision.py +34 -34
  17. tsp/dataloggers/HOBO.py +914 -914
  18. tsp/dataloggers/RBRXL800.py +190 -190
  19. tsp/dataloggers/RBRXR420.py +308 -308
  20. tsp/dataloggers/__init__.py +15 -15
  21. tsp/dataloggers/logr.py +115 -115
  22. tsp/dataloggers/test_files/004448.DAT +2543 -2543
  23. tsp/dataloggers/test_files/004531.DAT +17106 -17106
  24. tsp/dataloggers/test_files/004531.HEX +3587 -3587
  25. tsp/dataloggers/test_files/004534.HEX +3587 -3587
  26. tsp/dataloggers/test_files/010252.dat +1731 -1731
  27. tsp/dataloggers/test_files/010252.hex +1739 -1739
  28. tsp/dataloggers/test_files/010274.hex +1291 -1291
  29. tsp/dataloggers/test_files/010278.hex +3544 -3544
  30. tsp/dataloggers/test_files/012064.dat +1286 -1286
  31. tsp/dataloggers/test_files/012064.hex +1294 -1294
  32. tsp/dataloggers/test_files/012081.hex +3532 -3532
  33. tsp/dataloggers/test_files/07B1592.DAT +1483 -1483
  34. tsp/dataloggers/test_files/07B1592.HEX +1806 -1806
  35. tsp/dataloggers/test_files/07B4450.DAT +2234 -2234
  36. tsp/dataloggers/test_files/07B4450.HEX +2559 -2559
  37. tsp/dataloggers/test_files/FG2_399.csv +9881 -9881
  38. tsp/dataloggers/test_files/GP5W.csv +1121 -1121
  39. tsp/dataloggers/test_files/GP5W_260.csv +1884 -1884
  40. tsp/dataloggers/test_files/GP5W_270.csv +2210 -2210
  41. tsp/dataloggers/test_files/H08-030-08_HOBOware.csv +998 -998
  42. tsp/dataloggers/test_files/RBR_01.dat +1046 -1046
  43. tsp/dataloggers/test_files/RBR_02.dat +2426 -2426
  44. tsp/dataloggers/test_files/RSTDT2055.csv +2152 -2152
  45. tsp/dataloggers/test_files/U23-001_HOBOware.csv +1001 -1001
  46. tsp/dataloggers/test_files/hobo-negative-2.txt +6396 -6396
  47. tsp/dataloggers/test_files/hobo-negative-3.txt +5593 -5593
  48. tsp/dataloggers/test_files/hobo-positive-number-1.txt +1000 -1000
  49. tsp/dataloggers/test_files/hobo-positive-number-2.csv +1003 -1003
  50. tsp/dataloggers/test_files/hobo-positive-number-3.csv +1133 -1133
  51. tsp/dataloggers/test_files/hobo-positive-number-4.csv +1209 -1209
  52. tsp/dataloggers/test_files/hobo2.csv +8702 -8702
  53. tsp/dataloggers/test_files/hobo_1_AB.csv +21732 -21732
  54. tsp/dataloggers/test_files/hobo_1_AB_Details.txt +133 -133
  55. tsp/dataloggers/test_files/hobo_1_AB_classic.csv +4373 -4373
  56. tsp/dataloggers/test_files/hobo_1_AB_defaults.csv +21732 -21732
  57. tsp/dataloggers/test_files/hobo_1_AB_minimal.txt +1358 -1358
  58. tsp/dataloggers/test_files/hobo_1_AB_var2.csv +3189 -3189
  59. tsp/dataloggers/test_files/hobo_1_AB_var3.csv +2458 -2458
  60. tsp/dataloggers/test_files/logR_ULogC16-32_1.csv +106 -106
  61. tsp/dataloggers/test_files/logR_ULogC16-32_2.csv +100 -100
  62. tsp/dataloggers/test_files/mon_3_Ta_2010-08-18_2013-02-08.txt +21724 -21724
  63. tsp/dataloggers/test_files/rbr_001.dat +1133 -1133
  64. tsp/dataloggers/test_files/rbr_001.hex +1139 -1139
  65. tsp/dataloggers/test_files/rbr_001_no_comment.dat +1132 -1132
  66. tsp/dataloggers/test_files/rbr_001_no_comment.hex +1138 -1138
  67. tsp/dataloggers/test_files/rbr_002.dat +1179 -1179
  68. tsp/dataloggers/test_files/rbr_002.hex +1185 -1185
  69. tsp/dataloggers/test_files/rbr_003.hex +1292 -1292
  70. tsp/dataloggers/test_files/rbr_003.xls +0 -0
  71. tsp/dataloggers/test_files/rbr_xl_001.DAT +1105 -1105
  72. tsp/dataloggers/test_files/rbr_xl_002.DAT +1126 -1126
  73. tsp/dataloggers/test_files/rbr_xl_003.DAT +4622 -4622
  74. tsp/dataloggers/test_files/rbr_xl_003.HEX +3587 -3587
  75. tsp/gtnp.py +148 -148
  76. tsp/labels.py +3 -3
  77. tsp/misc.py +90 -90
  78. tsp/physics.py +101 -101
  79. tsp/plots/static.py +373 -373
  80. tsp/readers.py +548 -548
  81. tsp/time.py +45 -45
  82. tsp/tspwarnings.py +14 -14
  83. tsp/utils.py +101 -101
  84. tsp/version.py +1 -1
  85. {tsp-1.7.7.dist-info → tsp-1.8.0.dist-info}/METADATA +30 -23
  86. tsp-1.8.0.dist-info/RECORD +94 -0
  87. {tsp-1.7.7.dist-info → tsp-1.8.0.dist-info}/WHEEL +5 -5
  88. {tsp-1.7.7.dist-info → tsp-1.8.0.dist-info/licenses}/LICENSE +674 -674
  89. tsp/dataloggers/test_files/CSc_CR1000_1.dat +0 -295
  90. tsp/scratch.py +0 -6
  91. tsp-1.7.7.dist-info/RECORD +0 -95
  92. {tsp-1.7.7.dist-info → tsp-1.8.0.dist-info}/top_level.txt +0 -0
@@ -1,308 +1,308 @@
1
- import sqlite3
2
- import pathlib
3
- import warnings
4
- import numpy as np
5
- import pandas as pd
6
- import datetime as dt
7
- try:
8
- from pyrsktools import RSK
9
- except ModuleNotFoundError:
10
- warnings.warn("Missing pyRSKtools library. .rsk files can not be imported.")
11
- from .AbstractReader import AbstractReader
12
-
13
-
14
- class RBRXR420(AbstractReader):
15
-
16
- def read(self, file_path: str) -> "pd.DataFrame":
17
- """
18
-
19
- Parameters
20
- ----------
21
- file_path
22
-
23
- Returns
24
- -------
25
-
26
- """
27
- file_extention = pathlib.Path(file_path).suffix.lower()
28
- if file_extention in [".dat", ".hex"]:
29
- with open(file_path, "r") as f:
30
- first_50 = [next(f) for i in range(50)]
31
- for line_num in range(len(first_50)):
32
- if first_50[line_num].lower().startswith("logger start:"):
33
- header_length = line_num + 1
34
- break
35
-
36
- with open(file_path, "r") as f:
37
- header_lines = [next(f) for i in range(header_length)]
38
- self._parse_meta(header_lines)
39
-
40
- data_lines = f.readlines()
41
- if file_extention == ".dat":
42
- line_num = 0
43
- for line_num in range(len(data_lines)):
44
- if data_lines[line_num] != "\n":
45
- split_line = data_lines[line_num].split()
46
- else:
47
- split_line = ["no data"]
48
- if split_line[0].lower() == "temp":
49
- break
50
- if line_num == len(data_lines) - 1:
51
- raise RuntimeError("No column names found")
52
- data_lines = data_lines[line_num:]
53
- first_line = data_lines[0].split()
54
- second_line = data_lines[1].split()
55
-
56
- if len(first_line) == len(second_line):
57
- self._read_standard_dat_format(data_lines[1:], False)
58
- elif len(first_line) + 2 == len(second_line):
59
- try:
60
- is_datetime = bool(dt.datetime.strptime(" ".join(second_line[:2]), "%Y/%m/%d %H:%M:%S"))
61
- except ValueError:
62
- is_datetime = False
63
- if is_datetime:
64
- self._read_standard_dat_format(data_lines[1:], True)
65
- else:
66
- raise RuntimeError("Error, expected date time with format %Y/%m/%d %H:%M:%S at start of"
67
- "row.")
68
- else:
69
- raise RuntimeError("Error: Number of column names and number of columns do not match any"
70
- "expected pattern.")
71
-
72
- else:
73
- self._read_standard_hex_format(data_lines)
74
- elif file_extention == ".xls":
75
- self._read_standard_xls_format(file_path)
76
- elif file_extention == ".xlsx":
77
- self._read_standard_xlsx_format(file_path)
78
- elif file_extention == ".rsk":
79
- self._read_standard_rsk_format(file_path)
80
- else:
81
- raise IOError("Unrecognised file. File is not a .dat, .hex, .xls, .xlsx, or .rsk.")
82
- return self.DATA
83
-
84
- def _parse_meta(self, header_lines: list):
85
- self.META["logger model"] = header_lines[0].split()[1]
86
- self.META["logger SN"] = header_lines[0].split()[3]
87
- sample_interval = dt.datetime.strptime(header_lines[5].split()[-1], "%H:%M:%S")
88
- self.META["download date"] = dt.datetime.strptime(header_lines[1][14:31], "%y/%m/%d %H:%M:%S")
89
- self.META["sample interval"] = dt.timedelta(hours=sample_interval.hour, minutes=sample_interval.minute,
90
- seconds=sample_interval.second)
91
- self.META["logging start"] = dt.datetime.strptime(" ".join(header_lines[3].split()[-2:]),
92
- "%y/%m/%d %H:%M:%S")
93
- line_7_info = header_lines[6].split(",")
94
- self.META["num channels"] = int(line_7_info[0].split()[-1])
95
- self.META["num samples"] = int(line_7_info[1].split()[-1])
96
- formatting = header_lines[7].split("%")[1]
97
- if formatting.endswith("\n"):
98
- self.META["precision"] = int(formatting[-3])
99
- else:
100
- self.META["precision"] = int(formatting[-2])
101
-
102
- self.META["calibration parameters"] = {}
103
- calibration_start_line = 8
104
- for i in range(self.META["num channels"]):
105
- self.META["calibration parameters"][f"channel {i + 1}"] = {}
106
- for j in range(4):
107
- line_num = calibration_start_line + 4 * i + j
108
- if header_lines[line_num].lower().startswith("calibration"):
109
- self.META["calibration parameters"][f"channel {i + 1}"][chr(ord("a") + j)]\
110
- = float(header_lines[line_num].split()[-1])
111
- else:
112
- self.META["calibration parameters"][f"channel {i + 1}"][chr(ord("a") + j)] \
113
- = float(header_lines[line_num].split()[0])
114
-
115
- self.META['raw'] = "".join(header_lines)
116
- return
117
-
118
- def _read_standard_dat_format(self, raw_data: list, time_stamps: bool = False):
119
- """
120
-
121
- Parameters
122
- ----------
123
- raw_data
124
- line_numbers
125
-
126
- Returns
127
- -------
128
-
129
- """
130
- self.DATA = pd.DataFrame(columns=[f"channel {i + 1}" for i in range(self.META["num channels"])])
131
- line_num = 0
132
- for line in raw_data:
133
- line_data = line.split()
134
- if time_stamps:
135
- self.DATA.loc[dt.datetime.strptime(" ".join(line_data[:2]), "%Y/%m/%d %H:%M:%S")] = line_data[2:]
136
- else:
137
- self.DATA.loc[self.META["logging start"] + self.META["sample interval"] * line_num] = line_data
138
- line_num += 1
139
- for col in self.DATA:
140
- self.DATA[col] = pd.to_numeric(self.DATA[col], errors='coerce')
141
- self.DATA.reset_index(inplace=True)
142
- self.DATA.rename(columns={"index": "TIME"}, inplace=True)
143
- return
144
-
145
- def _read_standard_hex_format(self, raw_data: list):
146
- """
147
-
148
- Parameters
149
- ----------
150
- raw_data
151
-
152
- Returns
153
- -------
154
-
155
- """
156
- for line_num in range(len(raw_data)):
157
- if raw_data[line_num].lower().startswith("number of bytes of data"):
158
- hex_header_length = line_num + 2
159
- break
160
- elif raw_data[line_num].lower().startswith("number of bytes in header"):
161
- header_bytes = int(raw_data[line_num].split()[-1])
162
- num_hex_header_values = int(header_bytes / 3)
163
- hex_vals = []
164
- raw_data = raw_data[hex_header_length:]
165
- for line_num in range(len(raw_data)):
166
- line = raw_data[line_num]
167
- line_hex_vals = [line[i: i + 6] for i in range(0, len(line), 6)][:-1]
168
- for hex_val in line_hex_vals:
169
- hex_vals.append(hex_val)
170
- hex_vals = hex_vals[num_hex_header_values:]
171
-
172
- self.DATA = pd.DataFrame(columns=[f"channel {i + 1}" for i in range(self.META["num channels"])])
173
- line_num = 0
174
- hex_num = 0
175
- for line in range(self.META["num samples"]):
176
- line_time = self.META["logging start"] + self.META["sample interval"] * line_num
177
- time_hex_vals = hex_vals[hex_num: hex_num + 8]
178
- line_vals = [int(h, 16) / int("FFFFFF", 16) for h in time_hex_vals]
179
- line_temps = []
180
- for channel in range(len(line_vals)):
181
- val = line_vals[channel]
182
- if val not in [0, 1]:
183
- a = self.META["calibration parameters"][f"channel {channel + 1}"]["a"]
184
- b = self.META["calibration parameters"][f"channel {channel + 1}"]["b"]
185
- c = self.META["calibration parameters"][f"channel {channel + 1}"]["c"]
186
- d = self.META["calibration parameters"][f"channel {channel + 1}"]["d"]
187
- x = np.log((1 / val) - 1)
188
- temp = 1 / (a + b * x + c * x**2 + d * x**3) - 273.15
189
- line_temps.append(round(temp, self.META["precision"]))
190
- else:
191
- line_temps.append(np.nan)
192
- self.DATA.loc[line_time] = line_temps
193
- line_num += 1
194
- hex_num += 8
195
- for col in self.DATA:
196
- self.DATA[col] = pd.to_numeric(self.DATA[col], errors='coerce')
197
- self.DATA.reset_index(inplace=True)
198
- self.DATA.rename(columns={"index": "TIME"}, inplace=True)
199
- return
200
-
201
- def _read_standard_xls_format(self, file_path: str):
202
- xls = pd.ExcelFile(file_path)
203
- sheet = xls.sheet_names[0]
204
- xls.close()
205
- raw_data = pd.read_excel(file_path, sheet, header=None)
206
- raw_meta = raw_data.iloc[:5].copy()
207
- if raw_meta.iloc[0, 0] != "RBR data file":
208
- raise IOError("Not a valid .xls file")
209
- meta = {}
210
- for i, r in raw_meta.iterrows():
211
- for j in range(0, len(r) - 1, 2):
212
- if not pd.isna(raw_meta.iloc[i, j]):
213
- meta[raw_meta.iloc[i, j]] = raw_meta.iloc[i, j + 1]
214
- self.META["logger model"] = meta["Model:"]
215
- self.META["logger SN"] = meta["Serial Number:"]
216
- self.META["sample interval"] = dt.timedelta(seconds=int(meta["Logging sampling period (s):"]))
217
- self.META["logging start"] = dt.datetime.strptime(meta["Logging start time:"], "%Y/%m/%d")
218
-
219
- column_names = {}
220
- for col in raw_data:
221
- if col == 0:
222
- col_name = "TIME"
223
- else:
224
- col_name = f"channel {col}"
225
- column_names[col] = col_name
226
- self.DATA = raw_data.iloc[6:].copy()
227
- self.DATA.reset_index(drop=True, inplace=True)
228
- self.DATA.rename(columns=column_names, inplace=True)
229
- for col in self.DATA:
230
- if col == "TIME":
231
- self.DATA["TIME"] = pd.to_datetime(self.DATA["TIME"], format="%d/%m/%Y %H:%M:%S.%f")
232
- else:
233
- self.DATA[col] = pd.to_numeric(self.DATA[col], errors='coerce')
234
- return
235
-
236
- def _read_standard_xlsx_format(self, file_path: str):
237
- meta_table = {"Instrument": pd.read_excel(file_path, sheet_name="Metadata", header=9, nrows=1),
238
- "Schedule": pd.read_excel(file_path, sheet_name="Metadata", header=24, nrows=1),
239
- "Sampling": pd.read_excel(file_path, sheet_name="Metadata", header=28, nrows=1)}
240
- self.META["logger model"] = meta_table["Instrument"]["Model"].loc[0]
241
- self.META["logger SN"] = meta_table["Instrument"]["Serial"].loc[0]
242
- self.META["sample interval"] = dt.timedelta(seconds=int(meta_table["Sampling"]["Period"].loc[0]))
243
- self.META["logging start"] = meta_table["Schedule"]["Start time"].loc[0]
244
-
245
- self.DATA = pd.read_excel(file_path, sheet_name="Data", header=1)
246
-
247
- column_names = {}
248
- for col in self.DATA:
249
- if col == "Time":
250
- col_name = "TIME"
251
- elif col == "Temperature":
252
- col_name = "channel 1"
253
- else:
254
- col_name = f"channel {int(col.split('.')[-1]) + 1}"
255
- column_names[col] = col_name
256
- self.DATA.rename(columns=column_names, inplace=True)
257
-
258
- for col in self.DATA:
259
- if col == "TIME":
260
- self.DATA["TIME"] = pd.to_datetime(self.DATA["TIME"], format="%Y-%m-%d %H:%M:%S.%f")
261
- else:
262
- self.DATA[col] = pd.to_numeric(self.DATA[col], errors='coerce')
263
- return
264
-
265
- def _read_standard_rsk_format(self, file_path: str):
266
- raw_meta = {}
267
- try:
268
- with RSK(file_path) as rsk:
269
- rsk.open()
270
- rsk.readdata()
271
- rsk_data = rsk.data
272
- raw_meta["calibration"] = rsk.calibrations
273
- raw_meta["instrument"] = rsk.instrument
274
- raw_meta["schedule"] = rsk.scheduleInfo
275
- raw_meta["parameter key"] = rsk.parameterKeys
276
- raw_meta["epoch"] = rsk.epoch
277
- except NameError:
278
- raise ModuleNotFoundError("You must install pyRSKtools")
279
- except sqlite3.OperationalError:
280
- raise RuntimeError("An error occurred when opening the .rsk file. Try opening the .rsk file in the ruskin\n"
281
- " software then rerunning the code.")
282
- self.DATA = pd.DataFrame(rsk_data)
283
-
284
- self.META["logger model"] = raw_meta["instrument"].model
285
- self.META["logger SN"] = raw_meta["instrument"].serialID
286
- self.META["sample interval"] = dt.timedelta(seconds=raw_meta["schedule"].samplingPeriod/1000)
287
- self.META["logging start"] = raw_meta["epoch"].startTime
288
- self.META["utc offset"] = [int(float(element.value) * 3600) for element in raw_meta["parameter key"]
289
- if element.key == "OFFSET_FROM_UTC"][0]
290
- self.META["calibration parameters"] = {}
291
- for cal in raw_meta["calibration"]:
292
- self.META["calibration parameters"][f"channel {cal.channelOrder}"] = {}
293
- self.META["calibration parameters"][f"channel {cal.channelOrder}"]["a"] = cal.c[0]
294
- self.META["calibration parameters"][f"channel {cal.channelOrder}"]["b"] = cal.c[1]
295
- self.META["calibration parameters"][f"channel {cal.channelOrder}"]["c"] = cal.c[2]
296
- self.META["calibration parameters"][f"channel {cal.channelOrder}"]["d"] = cal.c[3]
297
-
298
- column_names = {}
299
- for col in self.DATA:
300
- if col == "timestamp":
301
- col_name = "TIME"
302
- elif col == "temperature":
303
- col_name = "channel 1"
304
- else:
305
- col_name = f"channel {int(col[-1]) + 1}"
306
- column_names[col] = col_name
307
- self.DATA.rename(columns=column_names, inplace=True)
308
- return
1
+ import sqlite3
2
+ import pathlib
3
+ import warnings
4
+ import numpy as np
5
+ import pandas as pd
6
+ import datetime as dt
7
+ try:
8
+ from pyrsktools import RSK
9
+ except ModuleNotFoundError:
10
+ warnings.warn("Missing pyRSKtools library. .rsk files can not be imported.")
11
+ from .AbstractReader import AbstractReader
12
+
13
+
14
+ class RBRXR420(AbstractReader):
15
+
16
+ def read(self, file_path: str) -> "pd.DataFrame":
17
+ """
18
+
19
+ Parameters
20
+ ----------
21
+ file_path
22
+
23
+ Returns
24
+ -------
25
+
26
+ """
27
+ file_extention = pathlib.Path(file_path).suffix.lower()
28
+ if file_extention in [".dat", ".hex"]:
29
+ with open(file_path, "r") as f:
30
+ first_50 = [next(f) for i in range(50)]
31
+ for line_num in range(len(first_50)):
32
+ if first_50[line_num].lower().startswith("logger start:"):
33
+ header_length = line_num + 1
34
+ break
35
+
36
+ with open(file_path, "r") as f:
37
+ header_lines = [next(f) for i in range(header_length)]
38
+ self._parse_meta(header_lines)
39
+
40
+ data_lines = f.readlines()
41
+ if file_extention == ".dat":
42
+ line_num = 0
43
+ for line_num in range(len(data_lines)):
44
+ if data_lines[line_num] != "\n":
45
+ split_line = data_lines[line_num].split()
46
+ else:
47
+ split_line = ["no data"]
48
+ if split_line[0].lower() == "temp":
49
+ break
50
+ if line_num == len(data_lines) - 1:
51
+ raise RuntimeError("No column names found")
52
+ data_lines = data_lines[line_num:]
53
+ first_line = data_lines[0].split()
54
+ second_line = data_lines[1].split()
55
+
56
+ if len(first_line) == len(second_line):
57
+ self._read_standard_dat_format(data_lines[1:], False)
58
+ elif len(first_line) + 2 == len(second_line):
59
+ try:
60
+ is_datetime = bool(dt.datetime.strptime(" ".join(second_line[:2]), "%Y/%m/%d %H:%M:%S"))
61
+ except ValueError:
62
+ is_datetime = False
63
+ if is_datetime:
64
+ self._read_standard_dat_format(data_lines[1:], True)
65
+ else:
66
+ raise RuntimeError("Error, expected date time with format %Y/%m/%d %H:%M:%S at start of"
67
+ "row.")
68
+ else:
69
+ raise RuntimeError("Error: Number of column names and number of columns do not match any"
70
+ "expected pattern.")
71
+
72
+ else:
73
+ self._read_standard_hex_format(data_lines)
74
+ elif file_extention == ".xls":
75
+ self._read_standard_xls_format(file_path)
76
+ elif file_extention == ".xlsx":
77
+ self._read_standard_xlsx_format(file_path)
78
+ elif file_extention == ".rsk":
79
+ self._read_standard_rsk_format(file_path)
80
+ else:
81
+ raise IOError("Unrecognised file. File is not a .dat, .hex, .xls, .xlsx, or .rsk.")
82
+ return self.DATA
83
+
84
+ def _parse_meta(self, header_lines: list):
85
+ self.META["logger model"] = header_lines[0].split()[1]
86
+ self.META["logger SN"] = header_lines[0].split()[3]
87
+ sample_interval = dt.datetime.strptime(header_lines[5].split()[-1], "%H:%M:%S")
88
+ self.META["download date"] = dt.datetime.strptime(header_lines[1][14:31], "%y/%m/%d %H:%M:%S")
89
+ self.META["sample interval"] = dt.timedelta(hours=sample_interval.hour, minutes=sample_interval.minute,
90
+ seconds=sample_interval.second)
91
+ self.META["logging start"] = dt.datetime.strptime(" ".join(header_lines[3].split()[-2:]),
92
+ "%y/%m/%d %H:%M:%S")
93
+ line_7_info = header_lines[6].split(",")
94
+ self.META["num channels"] = int(line_7_info[0].split()[-1])
95
+ self.META["num samples"] = int(line_7_info[1].split()[-1])
96
+ formatting = header_lines[7].split("%")[1]
97
+ if formatting.endswith("\n"):
98
+ self.META["precision"] = int(formatting[-3])
99
+ else:
100
+ self.META["precision"] = int(formatting[-2])
101
+
102
+ self.META["calibration parameters"] = {}
103
+ calibration_start_line = 8
104
+ for i in range(self.META["num channels"]):
105
+ self.META["calibration parameters"][f"channel {i + 1}"] = {}
106
+ for j in range(4):
107
+ line_num = calibration_start_line + 4 * i + j
108
+ if header_lines[line_num].lower().startswith("calibration"):
109
+ self.META["calibration parameters"][f"channel {i + 1}"][chr(ord("a") + j)]\
110
+ = float(header_lines[line_num].split()[-1])
111
+ else:
112
+ self.META["calibration parameters"][f"channel {i + 1}"][chr(ord("a") + j)] \
113
+ = float(header_lines[line_num].split()[0])
114
+
115
+ self.META['raw'] = "".join(header_lines)
116
+ return
117
+
118
+ def _read_standard_dat_format(self, raw_data: list, time_stamps: bool = False):
119
+ """
120
+
121
+ Parameters
122
+ ----------
123
+ raw_data
124
+ line_numbers
125
+
126
+ Returns
127
+ -------
128
+
129
+ """
130
+ self.DATA = pd.DataFrame(columns=[f"channel {i + 1}" for i in range(self.META["num channels"])])
131
+ line_num = 0
132
+ for line in raw_data:
133
+ line_data = line.split()
134
+ if time_stamps:
135
+ self.DATA.loc[dt.datetime.strptime(" ".join(line_data[:2]), "%Y/%m/%d %H:%M:%S")] = line_data[2:]
136
+ else:
137
+ self.DATA.loc[self.META["logging start"] + self.META["sample interval"] * line_num] = line_data
138
+ line_num += 1
139
+ for col in self.DATA:
140
+ self.DATA[col] = pd.to_numeric(self.DATA[col], errors='coerce')
141
+ self.DATA.reset_index(inplace=True)
142
+ self.DATA.rename(columns={"index": "TIME"}, inplace=True)
143
+ return
144
+
145
+ def _read_standard_hex_format(self, raw_data: list):
146
+ """
147
+
148
+ Parameters
149
+ ----------
150
+ raw_data
151
+
152
+ Returns
153
+ -------
154
+
155
+ """
156
+ for line_num in range(len(raw_data)):
157
+ if raw_data[line_num].lower().startswith("number of bytes of data"):
158
+ hex_header_length = line_num + 2
159
+ break
160
+ elif raw_data[line_num].lower().startswith("number of bytes in header"):
161
+ header_bytes = int(raw_data[line_num].split()[-1])
162
+ num_hex_header_values = int(header_bytes / 3)
163
+ hex_vals = []
164
+ raw_data = raw_data[hex_header_length:]
165
+ for line_num in range(len(raw_data)):
166
+ line = raw_data[line_num]
167
+ line_hex_vals = [line[i: i + 6] for i in range(0, len(line), 6)][:-1]
168
+ for hex_val in line_hex_vals:
169
+ hex_vals.append(hex_val)
170
+ hex_vals = hex_vals[num_hex_header_values:]
171
+
172
+ self.DATA = pd.DataFrame(columns=[f"channel {i + 1}" for i in range(self.META["num channels"])])
173
+ line_num = 0
174
+ hex_num = 0
175
+ for line in range(self.META["num samples"]):
176
+ line_time = self.META["logging start"] + self.META["sample interval"] * line_num
177
+ time_hex_vals = hex_vals[hex_num: hex_num + 8]
178
+ line_vals = [int(h, 16) / int("FFFFFF", 16) for h in time_hex_vals]
179
+ line_temps = []
180
+ for channel in range(len(line_vals)):
181
+ val = line_vals[channel]
182
+ if val not in [0, 1]:
183
+ a = self.META["calibration parameters"][f"channel {channel + 1}"]["a"]
184
+ b = self.META["calibration parameters"][f"channel {channel + 1}"]["b"]
185
+ c = self.META["calibration parameters"][f"channel {channel + 1}"]["c"]
186
+ d = self.META["calibration parameters"][f"channel {channel + 1}"]["d"]
187
+ x = np.log((1 / val) - 1)
188
+ temp = 1 / (a + b * x + c * x**2 + d * x**3) - 273.15
189
+ line_temps.append(round(temp, self.META["precision"]))
190
+ else:
191
+ line_temps.append(np.nan)
192
+ self.DATA.loc[line_time] = line_temps
193
+ line_num += 1
194
+ hex_num += 8
195
+ for col in self.DATA:
196
+ self.DATA[col] = pd.to_numeric(self.DATA[col], errors='coerce')
197
+ self.DATA.reset_index(inplace=True)
198
+ self.DATA.rename(columns={"index": "TIME"}, inplace=True)
199
+ return
200
+
201
+ def _read_standard_xls_format(self, file_path: str):
202
+ xls = pd.ExcelFile(file_path)
203
+ sheet = xls.sheet_names[0]
204
+ xls.close()
205
+ raw_data = pd.read_excel(file_path, sheet, header=None)
206
+ raw_meta = raw_data.iloc[:5].copy()
207
+ if raw_meta.iloc[0, 0] != "RBR data file":
208
+ raise IOError("Not a valid .xls file")
209
+ meta = {}
210
+ for i, r in raw_meta.iterrows():
211
+ for j in range(0, len(r) - 1, 2):
212
+ if not pd.isna(raw_meta.iloc[i, j]):
213
+ meta[raw_meta.iloc[i, j]] = raw_meta.iloc[i, j + 1]
214
+ self.META["logger model"] = meta["Model:"]
215
+ self.META["logger SN"] = meta["Serial Number:"]
216
+ self.META["sample interval"] = dt.timedelta(seconds=int(meta["Logging sampling period (s):"]))
217
+ self.META["logging start"] = dt.datetime.strptime(meta["Logging start time:"], "%Y/%m/%d")
218
+
219
+ column_names = {}
220
+ for col in raw_data:
221
+ if col == 0:
222
+ col_name = "TIME"
223
+ else:
224
+ col_name = f"channel {col}"
225
+ column_names[col] = col_name
226
+ self.DATA = raw_data.iloc[6:].copy()
227
+ self.DATA.reset_index(drop=True, inplace=True)
228
+ self.DATA.rename(columns=column_names, inplace=True)
229
+ for col in self.DATA:
230
+ if col == "TIME":
231
+ self.DATA["TIME"] = pd.to_datetime(self.DATA["TIME"], format="%d/%m/%Y %H:%M:%S.%f")
232
+ else:
233
+ self.DATA[col] = pd.to_numeric(self.DATA[col], errors='coerce')
234
+ return
235
+
236
+ def _read_standard_xlsx_format(self, file_path: str):
237
+ meta_table = {"Instrument": pd.read_excel(file_path, sheet_name="Metadata", header=9, nrows=1),
238
+ "Schedule": pd.read_excel(file_path, sheet_name="Metadata", header=24, nrows=1),
239
+ "Sampling": pd.read_excel(file_path, sheet_name="Metadata", header=28, nrows=1)}
240
+ self.META["logger model"] = meta_table["Instrument"]["Model"].loc[0]
241
+ self.META["logger SN"] = meta_table["Instrument"]["Serial"].loc[0]
242
+ self.META["sample interval"] = dt.timedelta(seconds=int(meta_table["Sampling"]["Period"].loc[0]))
243
+ self.META["logging start"] = meta_table["Schedule"]["Start time"].loc[0]
244
+
245
+ self.DATA = pd.read_excel(file_path, sheet_name="Data", header=1)
246
+
247
+ column_names = {}
248
+ for col in self.DATA:
249
+ if col == "Time":
250
+ col_name = "TIME"
251
+ elif col == "Temperature":
252
+ col_name = "channel 1"
253
+ else:
254
+ col_name = f"channel {int(col.split('.')[-1]) + 1}"
255
+ column_names[col] = col_name
256
+ self.DATA.rename(columns=column_names, inplace=True)
257
+
258
+ for col in self.DATA:
259
+ if col == "TIME":
260
+ self.DATA["TIME"] = pd.to_datetime(self.DATA["TIME"], format="%Y-%m-%d %H:%M:%S.%f")
261
+ else:
262
+ self.DATA[col] = pd.to_numeric(self.DATA[col], errors='coerce')
263
+ return
264
+
265
+ def _read_standard_rsk_format(self, file_path: str):
266
+ raw_meta = {}
267
+ try:
268
+ with RSK(file_path) as rsk:
269
+ rsk.open()
270
+ rsk.readdata()
271
+ rsk_data = rsk.data
272
+ raw_meta["calibration"] = rsk.calibrations
273
+ raw_meta["instrument"] = rsk.instrument
274
+ raw_meta["schedule"] = rsk.scheduleInfo
275
+ raw_meta["parameter key"] = rsk.parameterKeys
276
+ raw_meta["epoch"] = rsk.epoch
277
+ except NameError:
278
+ raise ModuleNotFoundError("You must install pyRSKtools")
279
+ except sqlite3.OperationalError:
280
+ raise RuntimeError("An error occurred when opening the .rsk file. Try opening the .rsk file in the ruskin\n"
281
+ " software then rerunning the code.")
282
+ self.DATA = pd.DataFrame(rsk_data)
283
+
284
+ self.META["logger model"] = raw_meta["instrument"].model
285
+ self.META["logger SN"] = raw_meta["instrument"].serialID
286
+ self.META["sample interval"] = dt.timedelta(seconds=raw_meta["schedule"].samplingPeriod/1000)
287
+ self.META["logging start"] = raw_meta["epoch"].startTime
288
+ self.META["utc offset"] = [int(float(element.value) * 3600) for element in raw_meta["parameter key"]
289
+ if element.key == "OFFSET_FROM_UTC"][0]
290
+ self.META["calibration parameters"] = {}
291
+ for cal in raw_meta["calibration"]:
292
+ self.META["calibration parameters"][f"channel {cal.channelOrder}"] = {}
293
+ self.META["calibration parameters"][f"channel {cal.channelOrder}"]["a"] = cal.c[0]
294
+ self.META["calibration parameters"][f"channel {cal.channelOrder}"]["b"] = cal.c[1]
295
+ self.META["calibration parameters"][f"channel {cal.channelOrder}"]["c"] = cal.c[2]
296
+ self.META["calibration parameters"][f"channel {cal.channelOrder}"]["d"] = cal.c[3]
297
+
298
+ column_names = {}
299
+ for col in self.DATA:
300
+ if col == "timestamp":
301
+ col_name = "TIME"
302
+ elif col == "temperature":
303
+ col_name = "channel 1"
304
+ else:
305
+ col_name = f"channel {int(col[-1]) + 1}"
306
+ column_names[col] = col_name
307
+ self.DATA.rename(columns=column_names, inplace=True)
308
+ return
@@ -1,15 +1,15 @@
1
- from .HOBO import HOBO, HOBOProperties
2
- from .FG2 import FG2
3
- from .GP5W import GP5W
4
- from .Geoprecision import detect_geoprecision_type
5
- from .logr import LogR
6
-
7
- HOBO.__module__ = __name__
8
- HOBOProperties.__module__ = __name__
9
- FG2.__module__ =__name__
10
- GP5W.__module__ = __name__
11
- LogR.__module__ = __name__
12
-
13
- __all__ = ['HOBO','HOBOProperties',
14
- 'FG2','GP5W', 'detect_geoprecision_type',
15
- 'LogR']
1
+ from .HOBO import HOBO, HOBOProperties
2
+ from .FG2 import FG2
3
+ from .GP5W import GP5W
4
+ from .Geoprecision import detect_geoprecision_type
5
+ from .logr import LogR
6
+
7
+ HOBO.__module__ = __name__
8
+ HOBOProperties.__module__ = __name__
9
+ FG2.__module__ =__name__
10
+ GP5W.__module__ = __name__
11
+ LogR.__module__ = __name__
12
+
13
+ __all__ = ['HOBO','HOBOProperties',
14
+ 'FG2','GP5W', 'detect_geoprecision_type',
15
+ 'LogR']