tsp 1.4.6__py3-none-any.whl → 1.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tsp might be problematic. Click here for more details.

Files changed (45) hide show
  1. tsp/__init__.py +4 -2
  2. tsp/__meta__.py +1 -1
  3. tsp/core.py +26 -10
  4. tsp/dataloggers/FG2.py +13 -0
  5. tsp/dataloggers/GP5W.py +14 -1
  6. tsp/dataloggers/RBRXL800.py +190 -0
  7. tsp/dataloggers/RBRXR420.py +307 -0
  8. tsp/dataloggers/test_files/004448.DAT +2543 -0
  9. tsp/dataloggers/test_files/004531.DAT +17106 -0
  10. tsp/dataloggers/test_files/004531.HEX +3587 -0
  11. tsp/dataloggers/test_files/004534.HEX +3587 -0
  12. tsp/dataloggers/test_files/010252.dat +1731 -0
  13. tsp/dataloggers/test_files/010252.hex +1740 -0
  14. tsp/dataloggers/test_files/010274.hex +1292 -0
  15. tsp/dataloggers/test_files/010278.hex +3545 -0
  16. tsp/dataloggers/test_files/012064.dat +1286 -0
  17. tsp/dataloggers/test_files/012064.hex +1294 -0
  18. tsp/dataloggers/test_files/012081.hex +3533 -0
  19. tsp/dataloggers/test_files/062834_20220904_2351.rsk +0 -0
  20. tsp/dataloggers/test_files/062834_20220904_2351.xlsx +0 -0
  21. tsp/dataloggers/test_files/07B1592.DAT +1483 -0
  22. tsp/dataloggers/test_files/07B1592.HEX +1806 -0
  23. tsp/dataloggers/test_files/07B4450.DAT +2234 -0
  24. tsp/dataloggers/test_files/07B4450.HEX +2559 -0
  25. tsp/dataloggers/test_files/rbr_001.dat +1133 -0
  26. tsp/dataloggers/test_files/rbr_001.hex +1140 -0
  27. tsp/dataloggers/test_files/rbr_001_no_comment.dat +1132 -0
  28. tsp/dataloggers/test_files/rbr_001_no_comment.hex +1139 -0
  29. tsp/dataloggers/test_files/rbr_002.dat +1179 -0
  30. tsp/dataloggers/test_files/rbr_002.hex +1186 -0
  31. tsp/dataloggers/test_files/rbr_003.hex +1292 -0
  32. tsp/dataloggers/test_files/rbr_003.xls +0 -0
  33. tsp/dataloggers/test_files/rbr_xl_001.DAT +1105 -0
  34. tsp/dataloggers/test_files/rbr_xl_002.DAT +1126 -0
  35. tsp/dataloggers/test_files/rbr_xl_003.DAT +4622 -0
  36. tsp/dataloggers/test_files/rbr_xl_003.HEX +3587 -0
  37. tsp/gtnp.py +20 -12
  38. tsp/physics.py +5 -10
  39. tsp/readers.py +84 -10
  40. tsp/utils.py +101 -0
  41. {tsp-1.4.6.dist-info → tsp-1.5.3.dist-info}/METADATA +4 -1
  42. {tsp-1.4.6.dist-info → tsp-1.5.3.dist-info}/RECORD +45 -13
  43. {tsp-1.4.6.dist-info → tsp-1.5.3.dist-info}/WHEEL +1 -1
  44. {tsp-1.4.6.dist-info → tsp-1.5.3.dist-info}/LICENSE +0 -0
  45. {tsp-1.4.6.dist-info → tsp-1.5.3.dist-info}/top_level.txt +0 -0
tsp/__init__.py CHANGED
@@ -1,8 +1,10 @@
1
1
  from tsp.core import TSP, IndexedTSP
2
2
  from tsp.misc import _is_depth_column
3
+
3
4
  from tsp.plots.static import trumpet_curve, time_series, colour_contour
4
- from tsp.readers import read_gtnp, read_geotop, read_geoprecision, read_hoboware, read_ntgs, read_logr, read_csv
5
+ from tsp.readers import read_gtnp, read_geotop, read_geoprecision, read_hoboware, read_ntgs, read_logr, read_csv, read_rbr
6
+ from tsp.utils import resolve_duplicate_times
5
7
 
6
8
  #TSP.__module__ = "teaspoon"
7
9
 
8
- __all__ = ["TSP", "IndexedTSP"]
10
+ __all__ = ["TSP", "IndexedTSP"]
tsp/__meta__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  # Automatically created. Please do not edit.
2
- __version__ = '1.4.6'
2
+ __version__ = '1.5.3'
3
3
  __author__ = 'Nick Brown'
tsp/core.py CHANGED
@@ -12,10 +12,10 @@ try:
12
12
  try:
13
13
  from pfit.pfnet_standard import make_temperature_base
14
14
  except ModuleNotFoundError:
15
- warnings.warn("Missing pfit library. Some functionality will be limited.")
15
+ warnings.warn("Missing pfit library. Some functionality will be limited.", stacklevel=2)
16
16
 
17
17
  except ModuleNotFoundError:
18
- warnings.warn("Missing netCDF4 library. Some functionality will be limited.")
18
+ warnings.warn("Missing netCDF4 library. Some functionality will be limited.", stacklevel=2)
19
19
 
20
20
  from typing import Union, Optional
21
21
  from datetime import datetime, tzinfo, timezone, timedelta
@@ -77,9 +77,12 @@ class TSP:
77
77
  latitude: Optional[float]=None,
78
78
  longitude: Optional[float]=None,
79
79
  site_id: Optional[str]=None,
80
- metadata:dict={}):
80
+ metadata: dict={}):
81
81
 
82
82
  self._times = handle_incoming_times(times)
83
+ if self._times.duplicated().any():
84
+ warnings.warn(f"Duplicate timestamps found: {self._times[np.where(self._times.duplicated())[0]]}. That's bad.", stacklevel=2)
85
+
83
86
  if self.utc_offset:
84
87
  self._output_utc_offset = self.utc_offset
85
88
  else:
@@ -130,8 +133,14 @@ class TSP:
130
133
 
131
134
  df = pd.DataFrame({"times": times, "depths": depths, "temperature_in_ground": values, "number_of_observations": number_of_observations})
132
135
  df.set_index(["times", "depths"], inplace=True)
133
-
134
- unstacked = df.unstack()
136
+
137
+ try:
138
+ unstacked = df.unstack()
139
+ except ValueError as e:
140
+ if np.any(df.index.duplicated()):
141
+ print(f"Duplicate data found at {df.iloc[np.where(df.index.duplicated())[0], :].index.get_level_values(0).unique()}")
142
+ raise e
143
+
135
144
  number_of_observations = unstacked.get('number_of_observations')
136
145
  temps = unstacked.get('temperature_in_ground')
137
146
 
@@ -294,7 +303,7 @@ class TSP:
294
303
  agg_avg = grouped['temperature_in_ground'].mean().unstack()
295
304
  agg_counts = grouped['number_of_observations'].sum().unstack()
296
305
  times = pd.to_datetime(agg_avg.index, format=freq_fmt)
297
-
306
+
298
307
  # apply masks
299
308
  count_mask = _observation_count_mask(number_of_observations=agg_counts,
300
309
  min_count=min_count)
@@ -434,6 +443,13 @@ class TSP:
434
443
  TSP
435
444
  A TSP object with data aggregated to daily averages
436
445
  """
446
+ # if the data is already daily +/- 1min , just return it
447
+ if np.all(np.isclose(np.diff(self.times).astype('float64'),
448
+ 8.64e13,
449
+ atol=8.64e13 / (24 * 60 ))):
450
+
451
+ return self
452
+
437
453
  t = self.__nly(freq_fmt="%Y%m%d",
438
454
  min_count=min_count,
439
455
  max_gap=max_gap,
@@ -578,7 +594,7 @@ class TSP:
578
594
  try:
579
595
  ncf = make_temperature_base(file, len(self.depths))
580
596
  except NameError:
581
- warnings.warn("Missing required packages. Try installing with `pip install tsp[nc]`")
597
+ warnings.warn("Missing required packages. Try installing with `pip install tsp[nc]`", stacklevel=2)
582
598
  return
583
599
 
584
600
  with nc.Dataset(ncf, 'a') as ncd:
@@ -601,7 +617,7 @@ class TSP:
601
617
  try:
602
618
  ncd.setncattr(key, value)
603
619
  except Exception:
604
- warnings.warn(f"Could not set metadata item: {key}")
620
+ warnings.warn(f"Could not set metadata item: {key}", stacklevel=2)
605
621
 
606
622
  def to_json(self, file: str) -> None:
607
623
  """ Write the data to a serialized json file """
@@ -761,7 +777,7 @@ class IndexedTSP(TSP):
761
777
  numpy.ndarray
762
778
  An array of depth indices
763
779
  """
764
- warnings.warn("This TSP uses indices (1,2,3,...) instad of depths. Use set_depths() to use measured depths.")
780
+ warnings.warn("This TSP uses indices (1,2,3,...) instad of depths. Use set_depths() to use measured depths.", stacklevel=2)
765
781
  return self._depths
766
782
 
767
783
  @depths.setter
@@ -799,7 +815,7 @@ def _temporal_gap_mask(grouped: "pd.core.groupby.DataFrameGroupBy", max_gap: int
799
815
  numpy.ndarray
800
816
  boolean array with ``True`` where measurement spacing or range in group does not satisfy tolerances
801
817
  """
802
- max_diff = grouped.time.apply(np.diff).apply(lambda x: np.max(x, initial=0)).apply(lambda x: x.total_seconds())
818
+ max_diff = grouped.time.apply(np.diff).apply(lambda x: np.max(x, initial=np.timedelta64(0))).apply(lambda x: x.total_seconds())
803
819
  max_diff = max_diff.unstack().to_numpy()
804
820
  diff_mask = np.where((max_diff == 0) | (max_diff >= max_gap), True, False)
805
821
 
tsp/dataloggers/FG2.py CHANGED
@@ -75,6 +75,8 @@ class FG2(AbstractReader):
75
75
  self.DATA = self.DATA.drop(["NO"], axis=1)
76
76
  self.DATA = self.drop_hk(self.DATA)
77
77
 
78
+ self.META.update(parse_fg2_meta_lines(self.META['raw']))
79
+
78
80
  return self.DATA
79
81
 
80
82
  def _is_metadata(self, line) -> bool:
@@ -96,3 +98,14 @@ class FG2(AbstractReader):
96
98
 
97
99
  def drop_hk(self, df: "pd.DataFrame") -> "pd.DataFrame":
98
100
  return df.drop([c for c in df if self._is_hk(c)], axis=1)
101
+
102
+
103
+ def parse_fg2_meta_lines(meta: "list[str]") -> dict:
104
+ parsed = dict()
105
+ serial = re.compile(r"LOGGER: \$([\w]{6})")
106
+
107
+ for line in meta:
108
+ if serial.match(line):
109
+ parsed["logger_serial_number"] = serial.match(line).group(1)
110
+
111
+ return parsed
tsp/dataloggers/GP5W.py CHANGED
@@ -83,6 +83,8 @@ class GP5W(AbstractReader):
83
83
  self.DATA = self.DATA.drop(["No"], axis=1)
84
84
  self.DATA = self.drop_hk(self.DATA)
85
85
 
86
+ self.META.update(parse_gp5w_meta_lines(self.META['raw']))
87
+
86
88
  return self.DATA
87
89
 
88
90
  def _is_observation(self, line: str) -> bool:
@@ -99,4 +101,15 @@ class GP5W(AbstractReader):
99
101
  return False
100
102
 
101
103
  def drop_hk(self, df: "pd.DataFrame") -> "pd.DataFrame":
102
- return df.drop([c for c in df if self._is_hk(c)], axis=1)
104
+ return df.drop([c for c in df if self._is_hk(c)], axis=1)
105
+
106
+
107
+ def parse_gp5w_meta_lines(meta: "list[str]") -> dict:
108
+ parsed = dict()
109
+ serial = re.compile(r"Logger: \#([\w]{6})")
110
+
111
+ for line in meta:
112
+ if serial.match(line):
113
+ parsed["logger_serial_number"] = serial.match(line).group(1)
114
+
115
+ return parsed
@@ -0,0 +1,190 @@
1
+ import pathlib
2
+ import warnings
3
+ import numpy as np
4
+ import pandas as pd
5
+ import datetime as dt
6
+ from .AbstractReader import AbstractReader
7
+
8
+
9
+ class RBRXL800(AbstractReader):
10
+
11
+ def read(self, file_path: str) -> "pd.DataFrame":
12
+ """
13
+
14
+ Parameters
15
+ ----------
16
+ file
17
+
18
+ Returns
19
+ -------
20
+
21
+ """
22
+ file_extention = pathlib.Path(file_path).suffix.lower()
23
+ if file_extention not in [".dat", ".hex"]:
24
+ raise IOError("Unrecognised file. File is not a .dat or .hex")
25
+
26
+ with open(file_path, "r") as f:
27
+ header_lines = [next(f) for i in range(18)]
28
+ self._parse_meta(header_lines)
29
+
30
+ data_lines = f.readlines()
31
+ if file_extention == ".dat":
32
+ if data_lines[0] == "\n" or len(data_lines[0].split()) == self.META["num channels"] + 2:
33
+ self._read_daily_dat_format(data_lines)
34
+ else:
35
+ if len(data_lines[0].split()) == 1 + self.META["num channels"]:
36
+ self._read_standard_dat_format(data_lines, True)
37
+ elif len(data_lines[0].split()) == self.META["num channels"]:
38
+ self._read_standard_dat_format(data_lines, False)
39
+ else:
40
+ raise RuntimeError("Error: Number of column names and number of columns do not match any"
41
+ "expected pattern.")
42
+
43
+ elif file_extention == ".hex":
44
+ self.META["num bytes"] = int(data_lines[0].split()[-1])
45
+ data_lines = data_lines[1:]
46
+ self._read_standard_hex_format(data_lines)
47
+
48
+ if len(self.DATA.index) != self.META["num samples"]:
49
+ warnings.warn(f"{file_path} Mismatch between number of samples in specified header "
50
+ f"({self.META['num samples']}) and number of samples read {len(self.DATA.index)}. Some "
51
+ "data may be missing")
52
+ return self.DATA
53
+
54
+ def _parse_meta(self, header_lines: list):
55
+ self.META["logger model"] = header_lines[0].split()[1]
56
+ self.META["logger SN"] = header_lines[0].split()[3]
57
+ sample_interval = dt.datetime.strptime(header_lines[5].split()[-1], "%H:%M:%S")
58
+ self.META["sample interval"] = dt.timedelta(hours=sample_interval.hour, minutes=sample_interval.minute,
59
+ seconds=sample_interval.second)
60
+ # try:
61
+ self.META["logging start"] = dt.datetime.strptime(" ".join(header_lines[3].split()[-2:]), "%y/%m/%d %H:%M:%S")
62
+ """
63
+ except ValueError:
64
+ date = header_lines[3].split()[-2]
65
+ if "00" in date.split("/"):
66
+ warnings.warn("Invalid logging start date given in header. Logger may have experienced power issues and"
67
+ "data may be corrupt")"""
68
+
69
+ line_7_info = header_lines[6].split(",")
70
+ self.META["num channels"] = int(line_7_info[0].split()[-1])
71
+ self.META["num samples"] = int(line_7_info[1].split()[-1])
72
+ self.META["precision"] = int(header_lines[9].split("%")[1][-2])
73
+
74
+ self.META["calibration parameters"] = {}
75
+ calibration_start_line = 10
76
+ for i in range(self.META["num channels"]):
77
+ self.META["calibration parameters"][f"channel {i + 1}"] = {}
78
+ line_num = calibration_start_line + i
79
+ raw_calibration = header_lines[line_num].split()
80
+ if raw_calibration[1] != "2":
81
+ raise ValueError(f"Calibration equation #{raw_calibration[1]} currently unsupported.")
82
+ self.META["calibration parameters"][f"channel {i + 1}"]["a0"] = float(raw_calibration[2])
83
+ self.META["calibration parameters"][f"channel {i + 1}"]["a1"] = float(raw_calibration[3])
84
+ self.META["calibration parameters"][f"channel {i + 1}"]["a2"] = float(raw_calibration[4])
85
+ if raw_calibration[5] == "0":
86
+ self.META["calibration parameters"][f"channel {i + 1}"]["a3"] = 1
87
+ else:
88
+ self.META["calibration parameters"][f"channel {i + 1}"]["a3"] = float(raw_calibration[2])
89
+ self.META['raw'] = "".join(header_lines)
90
+ return
91
+
92
+ def _read_daily_dat_format(self, raw_data: list):
93
+ """
94
+
95
+ Parameters
96
+ ----------
97
+ raw_data
98
+
99
+ Returns
100
+ -------
101
+
102
+ """
103
+ self.DATA = pd.DataFrame(columns=[f"channel {i + 1}" for i in range(self.META["num channels"])])
104
+ for line in raw_data:
105
+ if line != "\n":
106
+ if len(line) == 20 or len(line.split()) == self.META["num channels"] + 2:
107
+ date_stamp = dt.datetime.strptime(" ".join(line.split()[0:2]), "%Y/%m/%d %H:%M:%S")
108
+ interval_num = 0
109
+ elif len(line.split()) == self.META["num channels"] + 1:
110
+ self.DATA.loc[date_stamp + self.META["sample interval"] * interval_num] = line.split()[1:]
111
+ interval_num += 1
112
+ else:
113
+ self.DATA.loc[date_stamp + self.META["sample interval"] * interval_num] = line.split()
114
+ interval_num += 1
115
+ for col in self.DATA:
116
+ self.DATA[col] = pd.to_numeric(self.DATA[col], errors='coerce')
117
+ self.DATA.reset_index(inplace=True)
118
+ self.DATA.rename(columns={"index": "TIME"}, inplace=True)
119
+ return
120
+
121
+ def _read_standard_hex_format(self, raw_data: list):
122
+ byte_list = []
123
+ for line in raw_data:
124
+ eight_bytes = [line[i: i + 4] for i in range(0, len(line), 4)][:-1]
125
+ for byte in eight_bytes:
126
+ byte_list.append(byte)
127
+ byte_num = 0
128
+ self.DATA = pd.DataFrame(columns=[f"channel {i + 1}" for i in range(self.META["num channels"])])
129
+ line_num = 0
130
+ prev_line_day = 0
131
+ for line in range(self.META["num samples"]):
132
+ line_time = self.META["logging start"] + self.META["sample interval"] * line_num
133
+ if line_time.day != prev_line_day:
134
+ byte_num += 7
135
+ prev_line_day = line_time.day
136
+ line_bytes = byte_list[byte_num: byte_num + 8]
137
+ line_temps = []
138
+ for channel in range(len(line_bytes)):
139
+ hex_val = line_bytes[channel]
140
+ first_digit = hex_val[0]
141
+ if first_digit == "0":
142
+ data_val = -int(hex_val[1:], 16)
143
+ if first_digit == "2":
144
+ data_val = int(hex_val[1:], 16)
145
+ elif first_digit in ["1", "3"]:
146
+ data_val = np.nan
147
+ if not np.isnan(data_val) and data_val > 0:
148
+ a0 = self.META["calibration parameters"][f"channel {channel + 1}"]["a0"]
149
+ a1 = self.META["calibration parameters"][f"channel {channel + 1}"]["a1"]
150
+ a2 = self.META["calibration parameters"][f"channel {channel + 1}"]["a2"]
151
+ a3 = self.META["calibration parameters"][f"channel {channel + 1}"]["a3"]
152
+ y = a2 * ((2048 * (a3 / data_val)) - 1)
153
+ temp = (a1 / ((a1 / 273.15) - np.log(a0 / y))) - 273.15
154
+ line_temps.append(round(temp, self.META["precision"]))
155
+ else:
156
+ line_temps.append(np.nan)
157
+ self.DATA.loc[line_time] = line_temps
158
+ byte_num += 8
159
+ line_num += 1
160
+ for col in self.DATA:
161
+ self.DATA[col] = pd.to_numeric(self.DATA[col], errors='coerce')
162
+ self.DATA.reset_index(inplace=True)
163
+ self.DATA.rename(columns={"index": "TIME"}, inplace=True)
164
+ return
165
+
166
+ def _read_standard_dat_format(self, raw_data: list, line_numbers=False):
167
+ """
168
+
169
+ Parameters
170
+ ----------
171
+ raw_data
172
+ line_numbers
173
+
174
+ Returns
175
+ -------
176
+
177
+ """
178
+ self.DATA = pd.DataFrame(columns=[f"channel {i + 1}" for i in range(self.META["num channels"])])
179
+ line_num = 0
180
+ for line in raw_data:
181
+ line_data = line.split()
182
+ if line_numbers:
183
+ line_data = line_data[1:]
184
+ self.DATA.loc[self.META["logging start"] + self.META["sample interval"] * line_num] = line_data
185
+ line_num += 1
186
+ for col in self.DATA:
187
+ self.DATA[col] = pd.to_numeric(self.DATA[col], errors='coerce')
188
+ self.DATA.reset_index(inplace=True)
189
+ self.DATA.rename(columns={"index": "TIME"}, inplace=True)
190
+ return
@@ -0,0 +1,307 @@
1
+ import sqlite3
2
+ import pathlib
3
+ import warnings
4
+ import numpy as np
5
+ import pandas as pd
6
+ import datetime as dt
7
+ try:
8
+ from pyrsktools import RSK
9
+ except ModuleNotFoundError:
10
+ warnings.warn("Missing pyRSKtools library. .rsk files can not be imported.")
11
+ from .AbstractReader import AbstractReader
12
+
13
+
14
+ class RBRXR420(AbstractReader):
15
+
16
+ def read(self, file_path: str) -> "pd.DataFrame":
17
+ """
18
+
19
+ Parameters
20
+ ----------
21
+ file_path
22
+
23
+ Returns
24
+ -------
25
+
26
+ """
27
+ file_extention = pathlib.Path(file_path).suffix.lower()
28
+ if file_extention in [".dat", ".hex"]:
29
+ with open(file_path, "r") as f:
30
+ first_50 = [next(f) for i in range(50)]
31
+ for line_num in range(len(first_50)):
32
+ if first_50[line_num].lower().startswith("logger start:"):
33
+ header_length = line_num + 1
34
+ break
35
+
36
+ with open(file_path, "r") as f:
37
+ header_lines = [next(f) for i in range(header_length)]
38
+ self._parse_meta(header_lines)
39
+
40
+ data_lines = f.readlines()
41
+ if file_extention == ".dat":
42
+ line_num = 0
43
+ for line_num in range(len(data_lines)):
44
+ if data_lines[line_num] != "\n":
45
+ split_line = data_lines[line_num].split()
46
+ else:
47
+ split_line = ["no data"]
48
+ if split_line[0].lower() == "temp":
49
+ break
50
+ if line_num == len(data_lines) - 1:
51
+ raise RuntimeError("No column names found")
52
+ data_lines = data_lines[line_num:]
53
+ first_line = data_lines[0].split()
54
+ second_line = data_lines[1].split()
55
+
56
+ if len(first_line) == len(second_line):
57
+ self._read_standard_dat_format(data_lines[1:], False)
58
+ elif len(first_line) + 2 == len(second_line):
59
+ try:
60
+ is_datetime = bool(dt.datetime.strptime(" ".join(second_line[:2]), "%Y/%m/%d %H:%M:%S"))
61
+ except ValueError:
62
+ is_datetime = False
63
+ if is_datetime:
64
+ self._read_standard_dat_format(data_lines[1:], True)
65
+ else:
66
+ raise RuntimeError("Error, expected date time with format %Y/%m/%d %H:%M:%S at start of"
67
+ "row.")
68
+ else:
69
+ raise RuntimeError("Error: Number of column names and number of columns do not match any"
70
+ "expected pattern.")
71
+
72
+ else:
73
+ self._read_standard_hex_format(data_lines)
74
+ elif file_extention == ".xls":
75
+ self._read_standard_xls_format(file_path)
76
+ elif file_extention == ".xlsx":
77
+ self._read_standard_xlsx_format(file_path)
78
+ elif file_extention == ".rsk":
79
+ self._read_standard_rsk_format(file_path)
80
+ else:
81
+ raise IOError("Unrecognised file. File is not a .dat, .hex, .xls, .xlsx, or .rsk.")
82
+ return self.DATA
83
+
84
+ def _parse_meta(self, header_lines: list):
85
+ self.META["logger model"] = header_lines[0].split()[1]
86
+ self.META["logger SN"] = header_lines[0].split()[3]
87
+ sample_interval = dt.datetime.strptime(header_lines[5].split()[-1], "%H:%M:%S")
88
+ self.META["sample interval"] = dt.timedelta(hours=sample_interval.hour, minutes=sample_interval.minute,
89
+ seconds=sample_interval.second)
90
+ self.META["logging start"] = dt.datetime.strptime(" ".join(header_lines[3].split()[-2:]),
91
+ "%y/%m/%d %H:%M:%S")
92
+ line_7_info = header_lines[6].split(",")
93
+ self.META["num channels"] = int(line_7_info[0].split()[-1])
94
+ self.META["num samples"] = int(line_7_info[1].split()[-1])
95
+ formatting = header_lines[7].split("%")[1]
96
+ if formatting.endswith("\n"):
97
+ self.META["precision"] = int(formatting[-3])
98
+ else:
99
+ self.META["precision"] = int(formatting[-2])
100
+
101
+ self.META["calibration parameters"] = {}
102
+ calibration_start_line = 8
103
+ for i in range(self.META["num channels"]):
104
+ self.META["calibration parameters"][f"channel {i + 1}"] = {}
105
+ for j in range(4):
106
+ line_num = calibration_start_line + 4 * i + j
107
+ if header_lines[line_num].lower().startswith("calibration"):
108
+ self.META["calibration parameters"][f"channel {i + 1}"][chr(ord("a") + j)]\
109
+ = float(header_lines[line_num].split()[-1])
110
+ else:
111
+ self.META["calibration parameters"][f"channel {i + 1}"][chr(ord("a") + j)] \
112
+ = float(header_lines[line_num].split()[0])
113
+
114
+ self.META['raw'] = "".join(header_lines)
115
+ return
116
+
117
+ def _read_standard_dat_format(self, raw_data: list, time_stamps: bool = False):
118
+ """
119
+
120
+ Parameters
121
+ ----------
122
+ raw_data
123
+ line_numbers
124
+
125
+ Returns
126
+ -------
127
+
128
+ """
129
+ self.DATA = pd.DataFrame(columns=[f"channel {i + 1}" for i in range(self.META["num channels"])])
130
+ line_num = 0
131
+ for line in raw_data:
132
+ line_data = line.split()
133
+ if time_stamps:
134
+ self.DATA.loc[dt.datetime.strptime(" ".join(line_data[:2]), "%Y/%m/%d %H:%M:%S")] = line_data[2:]
135
+ else:
136
+ self.DATA.loc[self.META["logging start"] + self.META["sample interval"] * line_num] = line_data
137
+ line_num += 1
138
+ for col in self.DATA:
139
+ self.DATA[col] = pd.to_numeric(self.DATA[col], errors='coerce')
140
+ self.DATA.reset_index(inplace=True)
141
+ self.DATA.rename(columns={"index": "TIME"}, inplace=True)
142
+ return
143
+
144
+ def _read_standard_hex_format(self, raw_data: list):
145
+ """
146
+
147
+ Parameters
148
+ ----------
149
+ raw_data
150
+
151
+ Returns
152
+ -------
153
+
154
+ """
155
+ for line_num in range(len(raw_data)):
156
+ if raw_data[line_num].lower().startswith("number of bytes of data"):
157
+ hex_header_length = line_num + 2
158
+ break
159
+ elif raw_data[line_num].lower().startswith("number of bytes in header"):
160
+ header_bytes = int(raw_data[line_num].split()[-1])
161
+ num_hex_header_values = int(header_bytes / 3)
162
+ hex_vals = []
163
+ raw_data = raw_data[hex_header_length:]
164
+ for line_num in range(len(raw_data)):
165
+ line = raw_data[line_num]
166
+ line_hex_vals = [line[i: i + 6] for i in range(0, len(line), 6)][:-1]
167
+ for hex_val in line_hex_vals:
168
+ hex_vals.append(hex_val)
169
+ hex_vals = hex_vals[num_hex_header_values:]
170
+
171
+ self.DATA = pd.DataFrame(columns=[f"channel {i + 1}" for i in range(self.META["num channels"])])
172
+ line_num = 0
173
+ hex_num = 0
174
+ for line in range(self.META["num samples"]):
175
+ line_time = self.META["logging start"] + self.META["sample interval"] * line_num
176
+ time_hex_vals = hex_vals[hex_num: hex_num + 8]
177
+ line_vals = [int(h, 16) / int("FFFFFF", 16) for h in time_hex_vals]
178
+ line_temps = []
179
+ for channel in range(len(line_vals)):
180
+ val = line_vals[channel]
181
+ if val not in [0, 1]:
182
+ a = self.META["calibration parameters"][f"channel {channel + 1}"]["a"]
183
+ b = self.META["calibration parameters"][f"channel {channel + 1}"]["b"]
184
+ c = self.META["calibration parameters"][f"channel {channel + 1}"]["c"]
185
+ d = self.META["calibration parameters"][f"channel {channel + 1}"]["d"]
186
+ x = np.log((1 / val) - 1)
187
+ temp = 1 / (a + b * x + c * x**2 + d * x**3) - 273.15
188
+ line_temps.append(round(temp, self.META["precision"]))
189
+ else:
190
+ line_temps.append(np.nan)
191
+ self.DATA.loc[line_time] = line_temps
192
+ line_num += 1
193
+ hex_num += 8
194
+ for col in self.DATA:
195
+ self.DATA[col] = pd.to_numeric(self.DATA[col], errors='coerce')
196
+ self.DATA.reset_index(inplace=True)
197
+ self.DATA.rename(columns={"index": "TIME"}, inplace=True)
198
+ return
199
+
200
+ def _read_standard_xls_format(self, file_path: str):
201
+ xls = pd.ExcelFile(file_path)
202
+ sheet = xls.sheet_names[0]
203
+ xls.close()
204
+ raw_data = pd.read_excel(file_path, sheet, header=None)
205
+ raw_meta = raw_data.iloc[:5].copy()
206
+ if raw_meta.iloc[0, 0] != "RBR data file":
207
+ raise IOError("Not a valid .xls file")
208
+ meta = {}
209
+ for i, r in raw_meta.iterrows():
210
+ for j in range(0, len(r) - 1, 2):
211
+ if not pd.isna(raw_meta.iloc[i, j]):
212
+ meta[raw_meta.iloc[i, j]] = raw_meta.iloc[i, j + 1]
213
+ self.META["logger model"] = meta["Model:"]
214
+ self.META["logger SN"] = meta["Serial Number:"]
215
+ self.META["sample interval"] = dt.timedelta(seconds=int(meta["Logging sampling period (s):"]))
216
+ self.META["logging start"] = dt.datetime.strptime(meta["Logging start time:"], "%Y/%m/%d")
217
+
218
+ column_names = {}
219
+ for col in raw_data:
220
+ if col == 0:
221
+ col_name = "TIME"
222
+ else:
223
+ col_name = f"channel {col}"
224
+ column_names[col] = col_name
225
+ self.DATA = raw_data.iloc[6:].copy()
226
+ self.DATA.reset_index(drop=True, inplace=True)
227
+ self.DATA.rename(columns=column_names, inplace=True)
228
+ for col in self.DATA:
229
+ if col == "TIME":
230
+ self.DATA["TIME"] = pd.to_datetime(self.DATA["TIME"], format="%d/%m/%Y %H:%M:%S.%f")
231
+ else:
232
+ self.DATA[col] = pd.to_numeric(self.DATA[col], errors='coerce')
233
+ return
234
+
235
+ def _read_standard_xlsx_format(self, file_path: str):
236
+ meta_table = {"Instrument": pd.read_excel(file_path, sheet_name="Metadata", header=9, nrows=1),
237
+ "Schedule": pd.read_excel(file_path, sheet_name="Metadata", header=24, nrows=1),
238
+ "Sampling": pd.read_excel(file_path, sheet_name="Metadata", header=28, nrows=1)}
239
+ self.META["logger model"] = meta_table["Instrument"]["Model"].loc[0]
240
+ self.META["logger SN"] = meta_table["Instrument"]["Serial"].loc[0]
241
+ self.META["sample interval"] = dt.timedelta(seconds=int(meta_table["Sampling"]["Period"].loc[0]))
242
+ self.META["logging start"] = meta_table["Schedule"]["Start time"].loc[0]
243
+
244
+ self.DATA = pd.read_excel(file_path, sheet_name="Data", header=1)
245
+
246
+ column_names = {}
247
+ for col in self.DATA:
248
+ if col == "Time":
249
+ col_name = "TIME"
250
+ elif col == "Temperature":
251
+ col_name = "channel 1"
252
+ else:
253
+ col_name = f"channel {int(col.split('.')[-1]) + 1}"
254
+ column_names[col] = col_name
255
+ self.DATA.rename(columns=column_names, inplace=True)
256
+
257
+ for col in self.DATA:
258
+ if col == "TIME":
259
+ self.DATA["TIME"] = pd.to_datetime(self.DATA["TIME"], format="%Y-%m-%d %H:%M:%S.%f")
260
+ else:
261
+ self.DATA[col] = pd.to_numeric(self.DATA[col], errors='coerce')
262
+ return
263
+
264
+ def _read_standard_rsk_format(self, file_path: str):
265
+ raw_meta = {}
266
+ try:
267
+ with RSK(file_path) as rsk:
268
+ rsk.open()
269
+ rsk.readdata()
270
+ rsk_data = rsk.data
271
+ raw_meta["calibration"] = rsk.calibrations
272
+ raw_meta["instrument"] = rsk.instrument
273
+ raw_meta["schedule"] = rsk.scheduleInfo
274
+ raw_meta["parameter key"] = rsk.parameterKeys
275
+ raw_meta["epoch"] = rsk.epoch
276
+ except NameError:
277
+ raise ModuleNotFoundError("You must install pyRSKtools")
278
+ except sqlite3.OperationalError:
279
+ raise RuntimeError("An error occurred when opening the .rsk file. Try opening the .rsk file in the ruskin\n"
280
+ " software then rerunning the code.")
281
+ self.DATA = pd.DataFrame(rsk_data)
282
+
283
+ self.META["logger model"] = raw_meta["instrument"].model
284
+ self.META["logger SN"] = raw_meta["instrument"].serialID
285
+ self.META["sample interval"] = dt.timedelta(seconds=raw_meta["schedule"].samplingPeriod/1000)
286
+ self.META["logging start"] = raw_meta["epoch"].startTime
287
+ self.META["utc offset"] = [int(float(element.value) * 3600) for element in raw_meta["parameter key"]
288
+ if element.key == "OFFSET_FROM_UTC"][0]
289
+ self.META["calibration parameters"] = {}
290
+ for cal in raw_meta["calibration"]:
291
+ self.META["calibration parameters"][f"channel {cal.channelOrder}"] = {}
292
+ self.META["calibration parameters"][f"channel {cal.channelOrder}"]["a"] = cal.c[0]
293
+ self.META["calibration parameters"][f"channel {cal.channelOrder}"]["b"] = cal.c[1]
294
+ self.META["calibration parameters"][f"channel {cal.channelOrder}"]["c"] = cal.c[2]
295
+ self.META["calibration parameters"][f"channel {cal.channelOrder}"]["d"] = cal.c[3]
296
+
297
+ column_names = {}
298
+ for col in self.DATA:
299
+ if col == "timestamp":
300
+ col_name = "TIME"
301
+ elif col == "temperature":
302
+ col_name = "channel 1"
303
+ else:
304
+ col_name = f"channel {int(col[-1]) + 1}"
305
+ column_names[col] = col_name
306
+ self.DATA.rename(columns=column_names, inplace=True)
307
+ return