tsp 1.7.1__py3-none-any.whl → 1.7.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tsp might be problematic. Click here for more details.

Files changed (91) hide show
  1. tsp/__init__.py +11 -11
  2. tsp/__meta__.py +1 -1
  3. tsp/core.py +1035 -1010
  4. tsp/data/2023-01-06_755-test-Dataset_2031-Constant_Over_Interval-Hourly-Ground_Temperature-Thermistor_Automated.timeserie.csv +4 -4
  5. tsp/data/2023-01-06_755-test.metadata.txt +208 -208
  6. tsp/data/NTGS_example_csv.csv +6 -0
  7. tsp/data/NTGS_example_slash_dates.csv +6 -0
  8. tsp/data/example_geotop.csv +5240 -5240
  9. tsp/data/example_gtnp.csv +1298 -1298
  10. tsp/data/example_permos.csv +8 -0
  11. tsp/data/test_geotop_has_space.txt +5 -0
  12. tsp/dataloggers/AbstractReader.py +43 -43
  13. tsp/dataloggers/FG2.py +110 -110
  14. tsp/dataloggers/GP5W.py +114 -114
  15. tsp/dataloggers/Geoprecision.py +34 -34
  16. tsp/dataloggers/HOBO.py +914 -914
  17. tsp/dataloggers/RBRXL800.py +190 -190
  18. tsp/dataloggers/RBRXR420.py +308 -307
  19. tsp/dataloggers/__init__.py +15 -15
  20. tsp/dataloggers/logr.py +115 -115
  21. tsp/dataloggers/test_files/004448.DAT +2543 -2543
  22. tsp/dataloggers/test_files/004531.DAT +17106 -17106
  23. tsp/dataloggers/test_files/004531.HEX +3587 -3587
  24. tsp/dataloggers/test_files/004534.HEX +3587 -3587
  25. tsp/dataloggers/test_files/010252.dat +1731 -1731
  26. tsp/dataloggers/test_files/010252.hex +1739 -1739
  27. tsp/dataloggers/test_files/010274.hex +1291 -1291
  28. tsp/dataloggers/test_files/010278.hex +3544 -3544
  29. tsp/dataloggers/test_files/012064.dat +1286 -1286
  30. tsp/dataloggers/test_files/012064.hex +1294 -1294
  31. tsp/dataloggers/test_files/012081.hex +3532 -3532
  32. tsp/dataloggers/test_files/07B1592.DAT +1483 -1483
  33. tsp/dataloggers/test_files/07B1592.HEX +1806 -1806
  34. tsp/dataloggers/test_files/07B4450.DAT +2234 -2234
  35. tsp/dataloggers/test_files/07B4450.HEX +2559 -2559
  36. tsp/dataloggers/test_files/CSc_CR1000_1.dat +295 -0
  37. tsp/dataloggers/test_files/FG2_399.csv +9881 -9881
  38. tsp/dataloggers/test_files/GP5W.csv +1121 -1121
  39. tsp/dataloggers/test_files/GP5W_260.csv +1884 -1884
  40. tsp/dataloggers/test_files/GP5W_270.csv +2210 -2210
  41. tsp/dataloggers/test_files/H08-030-08_HOBOware.csv +998 -998
  42. tsp/dataloggers/test_files/RBR_01.dat +1046 -1046
  43. tsp/dataloggers/test_files/RBR_02.dat +2426 -2426
  44. tsp/dataloggers/test_files/RSTDT2055.csv +2152 -2152
  45. tsp/dataloggers/test_files/U23-001_HOBOware.csv +1001 -1001
  46. tsp/dataloggers/test_files/hobo-negative-2.txt +6396 -6396
  47. tsp/dataloggers/test_files/hobo-negative-3.txt +5593 -5593
  48. tsp/dataloggers/test_files/hobo-positive-number-1.txt +1000 -1000
  49. tsp/dataloggers/test_files/hobo-positive-number-2.csv +1003 -1003
  50. tsp/dataloggers/test_files/hobo-positive-number-3.csv +1133 -1133
  51. tsp/dataloggers/test_files/hobo-positive-number-4.csv +1209 -1209
  52. tsp/dataloggers/test_files/hobo2.csv +8702 -8702
  53. tsp/dataloggers/test_files/hobo_1_AB.csv +21732 -21732
  54. tsp/dataloggers/test_files/hobo_1_AB_Details.txt +133 -133
  55. tsp/dataloggers/test_files/hobo_1_AB_classic.csv +4373 -4373
  56. tsp/dataloggers/test_files/hobo_1_AB_defaults.csv +21732 -21732
  57. tsp/dataloggers/test_files/hobo_1_AB_minimal.txt +1358 -1358
  58. tsp/dataloggers/test_files/hobo_1_AB_var2.csv +3189 -3189
  59. tsp/dataloggers/test_files/hobo_1_AB_var3.csv +2458 -2458
  60. tsp/dataloggers/test_files/logR_ULogC16-32_1.csv +106 -106
  61. tsp/dataloggers/test_files/logR_ULogC16-32_2.csv +100 -100
  62. tsp/dataloggers/test_files/mon_3_Ta_2010-08-18_2013-02-08.txt +21724 -21724
  63. tsp/dataloggers/test_files/rbr_001.dat +1133 -1133
  64. tsp/dataloggers/test_files/rbr_001.hex +1139 -1139
  65. tsp/dataloggers/test_files/rbr_001_no_comment.dat +1132 -1132
  66. tsp/dataloggers/test_files/rbr_001_no_comment.hex +1138 -1138
  67. tsp/dataloggers/test_files/rbr_002.dat +1179 -1179
  68. tsp/dataloggers/test_files/rbr_002.hex +1185 -1185
  69. tsp/dataloggers/test_files/rbr_003.hex +1292 -1292
  70. tsp/dataloggers/test_files/rbr_003.xls +0 -0
  71. tsp/dataloggers/test_files/rbr_xl_001.DAT +1105 -1105
  72. tsp/dataloggers/test_files/rbr_xl_002.DAT +1126 -1126
  73. tsp/dataloggers/test_files/rbr_xl_003.DAT +4622 -4622
  74. tsp/dataloggers/test_files/rbr_xl_003.HEX +3587 -3587
  75. tsp/gtnp.py +148 -141
  76. tsp/labels.py +3 -3
  77. tsp/misc.py +90 -90
  78. tsp/physics.py +101 -101
  79. tsp/plots/static.py +374 -305
  80. tsp/readers.py +548 -536
  81. tsp/scratch.py +6 -0
  82. tsp/time.py +45 -45
  83. tsp/tspwarnings.py +15 -0
  84. tsp/utils.py +101 -101
  85. tsp/version.py +1 -1
  86. {tsp-1.7.1.dist-info → tsp-1.7.7.dist-info}/LICENSE +674 -674
  87. {tsp-1.7.1.dist-info → tsp-1.7.7.dist-info}/METADATA +10 -6
  88. tsp-1.7.7.dist-info/RECORD +95 -0
  89. {tsp-1.7.1.dist-info → tsp-1.7.7.dist-info}/WHEEL +5 -5
  90. tsp-1.7.1.dist-info/RECORD +0 -88
  91. {tsp-1.7.1.dist-info → tsp-1.7.7.dist-info}/top_level.txt +0 -0
tsp/dataloggers/HOBO.py CHANGED
@@ -1,914 +1,914 @@
1
- import pandas as pd
2
- import regex as re
3
- import json
4
- import pprint
5
-
6
- from statistics import mode, StatisticsError
7
- from typing import Optional
8
-
9
- from .AbstractReader import AbstractReader
10
-
11
-
12
- DATA_HEADERS = [ # Taken from HOBOware help manual. Not Complete.
13
- "x accel", "y accel", "z accel",
14
- "watt-hours", "kilowatt-hours",
15
- "watts",
16
- "rh", "temp", "wind speed", "wind dir", "soil moisture", "amps", "volts"
17
- ]
18
-
19
- DETAILS_KEYWORDS = ["First Sample Time", "Battery at Launch", "Device Info", "Deployment Info"]
20
- DETAILS_HEADERS = ["Series", "Event Type"]
21
- DETAILS_SUBHEADERS = ["Devices", "Device Info", "Deployment Info", "Series Statistics"]
22
-
23
- # ==== ASSUMPTIONS ====
24
- MAX_HEADER_LINES = 40
25
-
26
- # =====================
27
-
28
-
29
- class HOBO(AbstractReader):
30
-
31
- TZ_REGEX = re.compile(r"GMT\s?[-+]\d\d:\d\d")
32
- MAX_LINES = 200 # How many lines to check for header, date, etc.
33
-
34
- def __init__(self, properties: "Optional[HOBOProperties]"=None):
35
- """A class for reading HOBOWare exports
36
-
37
- Because of the variability of HOBOWare CSV exports, the HOBO reader
38
- relies on on a HOBOProperties configuration object. This can be
39
- configured manually (most reliable) or autodetected from a file.
40
-
41
- Parameters
42
- ----------
43
- properties : HOBOProperties, optional
44
- A :py:class:`~tsp.dataloggers.HOBO.HOBOProperties` object that provides information about how the csv export was configured.. If ``None``, the software will attempt to figure out the configuration properties using :py:meth:`~tsp.dataloggers.HOBO.HOBOProperties.autodetect` , by default ``None``
45
-
46
- Examples
47
- --------
48
-
49
- .. code-block:: python
50
-
51
- from teaspoon.dataloggers import HOBO, HOBOProperties
52
- from pathlib import Path
53
- from pkg_resources import resource_filename
54
-
55
- classic_file = resource_filename("teaspoon", "dataloggers/test_files/hobo_1_AB_classic.csv")
56
- defaults_file = resource_filename("teaspoon", "dataloggers/test_files/hobo_1_AB_defaults.csv")
57
-
58
- # To autodetect HOBOWare Properties:
59
- data = HOBO().read(defaults_file)
60
-
61
- # To manually specify a the HOBOWare configuration, initialize the HOBO reader with a HOBOProperties object
62
- classic_file = resource_filename("teaspoon", "dataloggers/test_files/hobo_1_AB_classic.csv")
63
- classic_properties = HOBOProperties.classic()
64
- hobo = HOBO(classic_properties)
65
- data = hobo.read(classic_file)
66
-
67
- """
68
- super().__init__()
69
- self.tz = None
70
- self.properties = properties
71
-
72
- def read(self, file:str) -> "pd.DataFrame":
73
- """Read a HOBOWare CSV export using the properties
74
-
75
- Parameters
76
- ----------
77
- file : str
78
- Path to HoboWare CSV export
79
-
80
- Returns
81
- -------
82
- pandas.DataFrame
83
- A pandas dataframe with only the data
84
- """
85
- if self.properties is None:
86
- print("Attempting to detect file properties")
87
- self.properties = HOBOProperties.autodetect(file)
88
-
89
- with open(file, encoding="UTF-8") as f: # Get header info
90
- lines = f.readlines()
91
- self._extract_header_from_lines(lines)
92
-
93
- if self.properties.include_plot_details:
94
- self.META['details'] = self._read_details(lines)
95
-
96
- self._set_tz_offset()
97
-
98
- # Read remaining data as pd DataFrame
99
- self.raw_table = self._safe_read(file, delimiter=self.properties.separator,
100
- headerline_i=self.headerline_i)
101
-
102
- time_df = self._create_datetime_column(self.raw_table)
103
- data_df = self._extract_data_columns(self.raw_table)
104
-
105
- self._convert_number_format(data_df)
106
-
107
- self.DATA = pd.concat([time_df, data_df], axis=1)
108
- self.DATA.columns = ["TIME"] + list(data_df.columns)
109
-
110
- return self.DATA
111
-
112
- def _safe_read(self, file, delimiter, headerline_i):
113
- """ handle edge cases when reading csv """
114
- if (self.properties.no_quotes_or_commas
115
- and self.properties.separator == ','
116
- and self.properties.include_logger_serial
117
- and self.properties.include_sensor_serial):
118
- raise IOError("Bad file (can't have comma separators, no quotes in header, and both logger and sensor serial numbers)")
119
- # pattern = re.compile(r"LGR S/N:\s*(?P<serial>\d+),\s*#(?P=serial))")
120
- # check header, replace, handle extra rows with details
121
-
122
- else:
123
- return pd.read_csv(file, delimiter=delimiter,
124
- skiprows=headerline_i, index_col=False)
125
-
126
- def _extract_header_from_lines(self, lines):
127
- """ Get the text and row index for the header row """
128
- for i, line in enumerate(lines):
129
- if self._is_header(line):
130
- self.headerline = line
131
- self.headerline_i = i
132
- break
133
-
134
- if i > self.MAX_LINES:
135
- raise Exception
136
-
137
- def _is_header(self, line):
138
- """ Determine whether a line is a header row """
139
- pattern = self.properties._header_regex()
140
- match = pattern.search(line)
141
- return bool(match)
142
-
143
- def _set_tz_offset(self):
144
- """ Find and set time zone offset """
145
- if self.tz:
146
- return
147
- elif self.properties.include_plot_details and self.properties.no_quotes_or_commas and self.META.get('details'):
148
- tz = self._detect_time_zone_from_details(self.META.get('details'))
149
- else:
150
- tz = self._detect_time_zone_from_header_line(self.headerline)
151
-
152
- self.tz = tz
153
- self.META['tz_offset'] = tz
154
-
155
- def _detect_time_zone_from_header_line(self, line):
156
- """ Extract time zone from header line """
157
- tz_match = self.TZ_REGEX.search(line)
158
- tz = tz_match.group()[-6:].replace(":", "") if tz_match else None
159
- return tz
160
-
161
- def _detect_time_zone_from_details(self, details):
162
- """ Extract time zone from details column as a list of dicts"""
163
- tz_match = self.TZ_REGEX.search(self.META.get('details')[0]['First Sample Time'])
164
- tz = tz_match.group()[-6:].replace(":", "") if tz_match else None
165
- return tz
166
-
167
- def _is_data_header(self, text):
168
- """ Determine whether a string represents a column name with data """
169
- if self.properties.no_quotes_or_commas:
170
- pattern = re.compile(rf"({'|'.join(DATA_HEADERS)}) \(.{{1,5}}\)", re.IGNORECASE)
171
- else:
172
- pattern = re.compile(f"({'|'.join(DATA_HEADERS)}), ", re.IGNORECASE)
173
-
174
- return pattern.findall(text)
175
-
176
- def _is_datetime_header(self, text):
177
- """ Determine whether a string represents a column name for date or time """
178
- return bool(re.search("(Date Time|Date|Time)$", text))
179
-
180
- def _extract_data_columns(self, df):
181
- """ Return a subset of a dataframe containing only data columns """
182
- keep = list()
183
-
184
- for column_name in df.columns:
185
- if self._is_data_header(column_name):
186
- keep.append(column_name)
187
-
188
- return df.loc[:, keep]
189
-
190
- def _create_datetime_column(self, df):
191
- """ Create a pandas datetime Series from a HOBO dataframe """
192
- tzfmt = "%z" if self.tz else ""
193
- tz = self.tz if self.tz else ""
194
-
195
- if self.properties.separate_date_time:
196
- _date_pattern = re.compile("Date")
197
- date_header = next(filter(_date_pattern.search, df.columns))
198
-
199
- _time_pattern = re.compile("Time")
200
- time_header = next(filter(_time_pattern.search, df.columns))
201
-
202
- full_date = df.loc[:, date_header] + df.loc[:, time_header] + tz
203
-
204
- date_fmt = self.properties._date_pattern() + self.properties._time_pattern() + tzfmt
205
- TIME = pd.to_datetime(full_date, format=date_fmt)
206
-
207
- else:
208
- date_time_pattern = re.compile("Date Time")
209
- datetime_header = next(filter(date_time_pattern.search, df.columns))
210
-
211
- full_date = df.loc[:, datetime_header] + tz
212
- date_fmt = self.properties._date_pattern() + tzfmt
213
- TIME = pd.to_datetime(full_date, format=date_fmt)
214
-
215
- return TIME
216
-
217
- def _read_details(self, lines):
218
- """ Read series details from last column (if they are included)."""
219
- meta_pattern = re.compile("(.*?):(.*)$")
220
- details = list()
221
- current = dict()
222
-
223
- for line in lines:
224
- info_column = line.split(self.properties.separator)[-1]
225
- match = meta_pattern.search(info_column)
226
-
227
- if match:
228
- key, value = match.groups()
229
-
230
- if current != {} and key.strip() in DETAILS_HEADERS:
231
- details.append(current)
232
- current = dict()
233
-
234
- current[key.strip()] = value.strip()
235
-
236
- if details != [] and re.search(r"^\s*$", info_column): # Stop once details block is over
237
- break
238
-
239
- return details
240
-
241
- def _convert_number_format(self, df):
242
- """ Convert numeric-style text to strings """
243
- # map(lambda x: self._convert_series_number_format(df[x]), df)
244
- for col in df.columns:
245
- df.loc[:, col] = self._convert_series_number_format(df.loc[:, col])
246
-
247
- def _convert_series_number_format(self, series):
248
- """ Convert pandas series to numeric after """
249
- if hasattr(series, 'str'):
250
- if self.properties.thousands_separator:
251
- series = series.str.replace(self.properties.thousands_separator, "")
252
- if self.properties.decimal_separator != '.':
253
- series = series.str.replace(self.properties.decimal_separator, ".")
254
-
255
- series = series.str.replace(r"(\((\d*\.\d*)\)|(\d*\.\d*)-)", r"-\2", regex=True)
256
-
257
- return pd.to_numeric(series)
258
-
259
-
260
- class HOBOProperties:
261
-
262
- DATE_FORMATS = ["MDY", "YMD", "DMY"]
263
- POS_N_FMT = [1,2,3,4]
264
- NEG_N_FMT = [1,2,3]
265
-
266
- DEFAULTS = {"separator": ",",
267
- "include_line_number": True,
268
- "include_plot_title_in_header": True,
269
- "always_show_fractional_seconds": False,
270
- "separate_date_time": False,
271
- "no_quotes_or_commas": False,
272
- "include_logger_serial": True,
273
- "include_sensor_serial": True,
274
- "date_format": "MDY",
275
- "date_separator": "/",
276
- "time_format_24hr": False,
277
- "positive_number_format": 1,
278
- "negative_number_format": 1,
279
- "include_plot_details": False
280
- }
281
-
282
- CLASSIC = {"separator": "\t",
283
- "include_line_number": False,
284
- "include_plot_title_in_header": False,
285
- "always_show_fractional_seconds": True,
286
- "separate_date_time": False,
287
- "no_quotes_or_commas": True,
288
- "include_logger_serial": False,
289
- "include_sensor_serial": True,
290
- "date_format": "MDY",
291
- "date_separator": "/",
292
- "time_format_24hr": True,
293
- "positive_number_format": 1,
294
- "negative_number_format": 1,
295
- "include_plot_details": False
296
- }
297
-
298
- def __str__(self):
299
- return pprint.pformat(self.get_properties())
300
-
301
- def __init__(self, separator:str=",",
302
- include_line_number:bool=True,
303
- include_plot_title_in_header:bool=True,
304
- always_show_fractional_seconds:bool=False,
305
- separate_date_time:bool=False,
306
- no_quotes_or_commas:bool=False,
307
- include_logger_serial:bool=True,
308
- include_sensor_serial:bool=True,
309
- date_format:str="MDY",
310
- date_separator:str="/",
311
- time_format_24hr:bool=False,
312
- positive_number_format:int=1,
313
- negative_number_format:int=1,
314
- include_plot_details:bool=False):
315
-
316
- r""" An object used to describe the CSV export configuration from HOBOWare.
317
-
318
- Parameters
319
- ----------
320
- separator : str, optional
321
- Whether data are comma, semicolon or tab-separated ``",", ";", "\t"``, by default ``","``
322
- include_line_number : bool, optional
323
- Whether the CSV file has line numbers as the first column, by default True
324
- include_plot_title_in_header : bool, optional
325
- Whether the plot title is included in the CSV header, by default True
326
- always_show_fractional_seconds : bool, optional
327
- Whether fractinoal seconds are always included in the data export, by default False
328
- separate_date_time : bool, optional
329
- Whether date and time are in separate columns, by default False
330
- no_quotes_or_commas : bool, optional
331
- Whether quotes and commas are omitted from the header row, by default False
332
- include_logger_serial : bool, optional
333
- Whether logger serial number is included, by default True
334
- include_sensor_serial : bool, optional
335
- Whether sensor serial number is included, by default True
336
- date_format : str, optional
337
- Chosen from ``"MDY", "YMD", "DMY"``, by default "MDY"
338
- date_separator : str, optional
339
- What token separates year, month, and day in the date. Chosen from ``"/", "-"``, by default ``"/"``
340
- time_format_24hr : bool, optional
341
- Whether 24 hour time format is used, by default False
342
- positive_number_format : int, optional
343
- Format for positive numbers. See Notes, by default 1
344
- negative_number_format : int, optional
345
- Format for negative numbers. See Notes, by default 1
346
- include_plot_details : bool, optional
347
- Whether plot details are included in the file, by default False
348
-
349
- Notes
350
- -----
351
- Integers are used to define the positive and negative number formats. The corresponding formats are in the tables below:
352
-
353
- Positive number format
354
-
355
- ===== ======== =================== =================
356
- value example thousands separator decimal separator
357
- ===== ======== =================== =================
358
- 1 1,234.56 comma period
359
- 2 1 234,56 space comma
360
- 3 1.234,56 period comma
361
- 4 1.234 56 period space
362
- ===== ======== =================== =================
363
-
364
- Negative number format
365
-
366
- ===== ======== =================== =================
367
- value example prefix suffix
368
- ===== ======== =================== =================
369
- 1 -123 minus sign (\-) None
370
- 2 123- None minus sign (\-)
371
- 3 \(123\) ( )
372
- ===== ======== =================== =================
373
-
374
- Examples
375
- --------
376
-
377
- .. code-block:: python
378
-
379
- from teaspoon.dataloggers import HOBOProperties
380
- from pkg_resources import resource_filename
381
- from pathlib import Path
382
-
383
- # Autodetect file structure
384
- hobo_file = resource_filename("teaspoon", "dataloggers/test_files/hobo_1_AB_classic.csv")
385
- P = HOBOProperties.autodetect(hobo_file)
386
- print(P)
387
-
388
- # HOBOWare 'default' format
389
- print(HOBOProperties.defaults())
390
-
391
- # HOBOWare 'classic' format
392
- print(HOBOProperties.classic())
393
-
394
- # Custom format (unspecified properties are defaults)
395
- P = HOBOProperties(date_separator=",",
396
- date_format="YMD",
397
- include_line_number=True,
398
- include_plot_details=False,
399
- no_quotes_or_commas=False,
400
- separate_date_time=False)
401
- print(P)
402
-
403
- # Save custom properties
404
- savepath = Path(Path.home(), "custom_hobo_properties.json")
405
- P.write(savepath)
406
-
407
- # Read from a saved file
408
- Q = HOBOProperties.from_file(savepath) # Read from a file
409
- print(Q)
410
-
411
- """
412
-
413
- self.separator = separator
414
- self.include_line_number = include_line_number
415
- self.include_plot_title_in_header = include_plot_title_in_header
416
- self.always_show_fractional_seconds = always_show_fractional_seconds
417
- self.separate_date_time = separate_date_time
418
- self.no_quotes_or_commas = no_quotes_or_commas
419
- self.include_logger_serial = include_logger_serial
420
- self.include_sensor_serial = include_sensor_serial
421
- self.date_format = date_format
422
- self.date_separator = date_separator
423
- self.time_format_24hr = time_format_24hr
424
- self.positive_number_format = positive_number_format
425
- self.negative_number_format = negative_number_format
426
- self.include_plot_details = include_plot_details
427
-
428
- @classmethod
429
- def defaults(cls):
430
- """ Create a HOBO Properties object using HOBOWare defaults"""
431
- hobo_properties = cls(**cls.DEFAULTS)
432
- return hobo_properties
433
-
434
- @classmethod
435
- def classic(cls):
436
- """ Create a HOBO Properties object using HOBOWare 'classic' settings """
437
- hobo_properties = cls(**cls.CLASSIC)
438
- return hobo_properties
439
-
440
- @classmethod
441
- def from_file(cls, file):
442
- """ Create a HOBO Properties object from a text file """
443
- data = cls.read(file)
444
- hobo_properties = cls(**data)
445
-
446
- return hobo_properties
447
-
448
- @classmethod
449
- def autodetect(cls, file, n_lines=400):
450
- """ Automatically detect properties from a file """
451
- print("Detecting file properties, this may take some time...")
452
-
453
- with open(file, encoding="UTF-8") as f:
454
- lines = f.readlines()
455
- lines = lines[:n_lines] + lines[n_lines::1000]
456
-
457
- thou_sep, deci_sep, col_sep, negative_open, negative_term = cls._parse_number_format(lines)
458
-
459
- hobo = cls(separator=cls._detect_separator(lines),
460
- include_line_number=cls._detect_line_number(lines),
461
- # include_plot_title_in_header=True,
462
- always_show_fractional_seconds=cls._detect_always_show_fractional_seconds(lines),
463
- separate_date_time=cls._detect_separate_date_time(lines),
464
- no_quotes_or_commas=cls._detect_no_quotes_or_commas(lines),
465
- # include_logger_serial=True,
466
- # include_sensor_serial=True,
467
- date_format=cls._detect_date_format(lines),
468
- date_separator=cls._detect_date_separator(lines),
469
- time_format_24hr=cls._detect_time_format_24hr(lines),
470
- positive_number_format=cls._evaluate_positive_number_format(thou_sep, deci_sep),
471
- negative_number_format=cls._evaluate_negative_number_format(negative_open, negative_term),
472
- include_plot_details=cls._detect_include_plot_details(lines))
473
-
474
- if hobo.positive_number_format is None:
475
- hobo.thousands_separator = thou_sep
476
- hobo.decimal_separator = deci_sep
477
-
478
- return hobo
479
-
480
- def _date_pattern(self):
481
- """ Return the appropriate strptime string to read dates from a HOBO file."""
482
- if self.date_format not in self.DATE_FORMATS:
483
- raise ValueError(f"Incorrect date pattern. Choose from {self.DATE_FORMATS}")
484
-
485
- if self.date_format == "YMD":
486
- pattern = "%y{0}%m{0}%d".format(self.date_separator)
487
-
488
- elif self.date_format == "MDY":
489
- pattern = "%m{0}%d{0}%y".format(self.date_separator)
490
-
491
- elif self.date_format == "DMY":
492
- pattern = "%d{0}%m{0}%y".format(self.date_separator)
493
-
494
- if not self.separate_date_time:
495
- pattern += f" {self._time_pattern()}"
496
-
497
- return pattern
498
-
499
- def _time_pattern(self):
500
- """ Return the appropriate strptime string to read time from a HOBO file."""
501
-
502
- if self.time_format_24hr:
503
- fmt = "%H:%M:%S"
504
- else:
505
- fmt = "%I:%M:%S %p"
506
-
507
- if self.always_show_fractional_seconds:
508
- fmt = fmt.replace("S", "S.%f")
509
-
510
- return fmt
511
-
512
- def _header_regex(self):
513
- """ Return the regular expression to match a header row. """
514
- if self.separate_date_time:
515
-
516
- if self.no_quotes_or_commas:
517
- return re.compile(f"Date{self.separator}Time")
518
- else:
519
- return re.compile(f'Date"{self.separator}"Time')
520
- else:
521
- return re.compile("Date Time")
522
-
523
- @staticmethod
524
- def read(file):
525
- """ Read HOBO file properties from a text file."""
526
- with open(file) as json_file:
527
- data = json.load(json_file)
528
- return data
529
-
530
- def write(self, file):
531
- """ Write HOBO properties to a text file."""
532
- with open(file, 'w') as json_file:
533
- json.dump(self.get_properties(), json_file)
534
-
535
- def get_properties(self):
536
- """ Create dictionary-formatted properties """
537
- return {x: getattr(self, x) for x in self.DEFAULTS.keys()}
538
-
539
- @staticmethod
540
- def _detect_date_separator(lines):
541
- """ Detect the 'date_separator' property from a file."""
542
-
543
- pattern = re.compile(r"(\d{2})(.)(\d{2}).(\d{2}).\d{2}:\d{2}:\d{2}")
544
- date_sep = list()
545
-
546
- for line in lines:
547
- match = pattern.search(line)
548
-
549
- if match:
550
- date_sep.append(match[2])
551
-
552
- return mode(date_sep)
553
-
554
- @staticmethod
555
- def _detect_separator(lines):
556
- """ Detect the 'separator' property from a file."""
557
- pattern = re.compile(r"\d{2}.\d{2}.\d{2}.\d{2}:\d{2}:\d{2}")
558
- choices = [",", ";", "\t"]
559
- splits = {key:[] for key in choices}
560
-
561
- for line in lines:
562
- match = pattern.search(line)
563
-
564
- if match:
565
- for sep in choices:
566
- columns = len(line.split(sep))
567
-
568
- if columns == 1:
569
- choices.remove(sep)
570
-
571
- if len(splits[sep]) != 0 and splits[sep][-1] != columns:
572
- choices.remove(sep)
573
-
574
- splits[sep].append(columns)
575
-
576
- if len(choices) < 1:
577
- raise RuntimeError("No possible separators")
578
-
579
- elif len(choices) == 1:
580
- return(choices[0])
581
-
582
- else: # Two or more choices remaining? Use first occurring separator
583
- pattern_2 = re.compile(f"({'|'.join(choices)})")
584
- for line in lines:
585
- match = pattern.search(line)
586
-
587
- if match:
588
- return pattern_2.search(line)[0]
589
-
590
- @staticmethod
591
- def _detect_date_format(lines):
592
- """ Detect whether dates are MDY, YMD, or DMY.
593
-
594
- Based on heuristics and the assumption of evenly distributed sampling at
595
- frequency greater than monthly.
596
- """
597
-
598
- pattern = re.compile(r"(\d{2}).(\d{2}).(\d{2}).\d{2}:\d{2}:\d{2}")
599
-
600
- p1 = list()
601
- p2 = list()
602
- p3 = list()
603
-
604
- for line in lines:
605
- match = pattern.search(line)
606
-
607
- if match:
608
- p1.append(int(match[1]))
609
- p2.append(int(match[2]))
610
- p3.append(int(match[3]))
611
-
612
- if max(p2) > 12: # Day in middle slot
613
- fmt = "MDY"
614
-
615
- else:
616
- if len(set(p1)) > len(set(p3)): # Which is more 'diverse'
617
- fmt = "DMY"
618
-
619
- else:
620
- fmt = "YMD"
621
-
622
- return fmt
623
-
624
- @staticmethod
625
- def _detect_separate_date_time(lines):
626
- """ Look for one of two patterns """
627
- separate = re.compile("Date[^ ].*Time")
628
- combined = re.compile("Date Time")
629
-
630
- sep_match = len(list(filter(separate.search, lines)))
631
- com_match = len(list(filter(combined.search, lines)))
632
-
633
- if sep_match + com_match > 1:
634
- raise ValueError("Duplicate Date or Time headers")
635
-
636
- if sep_match == 1:
637
- return True # True, they are separate
638
-
639
- elif com_match == 1:
640
- return False # False, they are not separate
641
-
642
- else:
643
- raise ValueError("Could not find Date, Time headers")
644
-
645
- @staticmethod
646
- def _detect_time_format_24hr(lines):
647
- """ Look for AM/PM string
648
- - Header rows up top will not include AM/PM strings
649
- - "plot details" may contain AM/PM strings
650
- """
651
- pattern = re.compile(r" (AM|PM).")
652
- matches = list(filter(pattern.search, lines))
653
- if (len(matches) < MAX_HEADER_LINES):
654
- return True
655
- else:
656
- return False
657
-
658
- @staticmethod
659
- def _detect_always_show_fractional_seconds(lines):
660
- """ Once you find a fractional second, check if all subsequent lines have them"""
661
- detected = False
662
- pattern = re.compile(r"(\d{2}:\d{2}:\d{2}\.\d|^[^\d]*$)") # decimal seconds OR no numbers.
663
-
664
- iterate = iter(lines)
665
-
666
- while not detected: # Get to the first matching line
667
- try:
668
- line = next(iterate)
669
- except StopIteration:
670
- return False # ran through all lines
671
-
672
- if pattern.search(line):
673
- detected = True
674
-
675
- for remaining_line in iterate: # All subsequent lines must match
676
- if not pattern.search(remaining_line):
677
- return False
678
-
679
- return True
680
-
681
- @staticmethod
682
- def _detect_include_plot_details(lines):
683
- """ Look for obvious plot details text. """
684
-
685
- options = "|".join(DETAILS_KEYWORDS)
686
- pattern = re.compile(rf"({options})")
687
- matches = list(filter(pattern.search, lines))
688
-
689
- if len(matches) > 3:
690
- return True
691
- else:
692
- return False
693
-
694
- @staticmethod
695
- def _detect_line_number(lines):
696
- """ Detect whether a line number column is present """
697
- pattern = re.compile(r"^([0-9]+)[^-/0-9]")
698
- last = None
699
-
700
- for line in lines:
701
- match = pattern.search(line)
702
-
703
- if match:
704
- if last is not None and int(match[1]) < last:
705
- return False
706
-
707
- last = int(match[1])
708
-
709
- if last is None:
710
- return False
711
-
712
- else:
713
- return True
714
-
715
- @staticmethod
716
- def _detect_no_quotes_or_commas(lines):
717
- """ Detect whether the 'no quotes or commas' parameter is enabled """
718
- header = re.compile('"Date')
719
- for line in lines:
720
- if header.search(line):
721
- return False
722
-
723
- return True
724
-
725
- @staticmethod
726
- def _parse_number_format(lines):
727
- """ Use regex magic to extract the characters used for various separators """
728
- pattern = re.compile(r""" (?P<sep>[\t,;]) # Column separator
729
- ( # Group for one data column
730
- (?P<neg1>[-\(])? # Possible opening negative sign
731
- (\d{1,3} # millions, billions or more, etc.
732
- (?P<thou>
733
- [ ,\.] # Separated by a thousands delimiter
734
- )
735
- )? # Zero or one times
736
- (\d{3}(?P=thou))* # 'Sandwiched' digit triplets using same thousands separator
737
- \d{1,3} # Hundreds, tens, ones
738
- (?P<decimal>
739
- [\., ] # Separated by a decimal delimiter
740
- )
741
- \d+ # Decimal digits (assume at least 1)
742
- (?P<neg2>[-\)])? # Possible terminating negative sign
743
- (?P=sep) # The same column separator
744
- )+ # Repeated for each data column
745
- """, re.VERBOSE)
746
-
747
- thousands = list()
748
- decimals = list()
749
- neg1 = list()
750
- neg2 = list()
751
- sep = list()
752
-
753
- for line in lines:
754
-
755
- match = pattern.search(line)
756
-
757
- if match:
758
- thousands += match.captures("thou")
759
- decimals += match.captures("decimal")
760
- neg1 += match.captures("neg1") if match.captures("neg1") else []
761
- neg2 += match.captures("neg2") if match.captures("neg2") else []
762
- sep += match.captures("sep")
763
-
764
- deci_sep = mode(decimals)
765
- col_sep = mode(sep)
766
-
767
- try:
768
- thou_sep = mode(thousands)
769
- except StatisticsError:
770
- thou_sep = None
771
-
772
- try:
773
- negative_open = mode(neg1)
774
- except StatisticsError:
775
- negative_open = None
776
-
777
- try:
778
- negative_term = mode(neg2)
779
- except StatisticsError:
780
- negative_term = None
781
-
782
- return thou_sep, deci_sep, col_sep, negative_open, negative_term
783
-
784
- @staticmethod
785
- def _evaluate_positive_number_format(thou_sep, deci_sep):
786
- """ Detect what format positive numbers are in
787
- | 1 | 1,234.56 | comma, period |
788
- | 2 | 1 234,56 | space, comma |
789
- | 3 | 1.234,56 | period, comma |
790
- | 4 | 1.234 56 | period, space |
791
- """
792
-
793
- if thou_sep == "," and deci_sep == ".":
794
- return 1
795
- elif thou_sep == " " and deci_sep == ",":
796
- return 2
797
- elif thou_sep == "." and deci_sep == ",":
798
- return 3
799
- elif thou_sep == " " and deci_sep == ".":
800
- return 4
801
- elif thou_sep is None:
802
- if deci_sep == ".":
803
- return 1
804
- elif deci_sep == " ":
805
- return 4
806
- elif deci_sep is None:
807
- if thou_sep == ",":
808
- return 1
809
- elif thou_sep == " ":
810
- return 2
811
-
812
- else:
813
- return None
814
-
815
- @staticmethod
816
- def _evaluate_negative_number_format(negative_open:str, negative_terminator:str) -> int:
817
- """
818
- Determine what the negative number format is
819
-
820
- | 1 | -123 | -, None |
821
- | 2 | 123- | None, - |
822
- | 3 | (123) | (, ) |
823
- """
824
- if negative_open == "-" and negative_terminator is None:
825
- return 1
826
- elif negative_open is None and negative_terminator == "-":
827
- return 2
828
- elif negative_open == "(" and negative_terminator == ")":
829
- return 3
830
-
831
- @property
832
- def thousands_separator(self) -> str:
833
- """Get thousands separator
834
-
835
- Returns
836
- -------
837
- str
838
- Thousands separator character
839
- """
840
- if hasattr(self, '_thousands_separator'):
841
- return self._thousands_separator
842
- elif self.positive_number_format == 1:
843
- return ","
844
- elif self.positive_number_format == 2:
845
- return " "
846
- elif self.positive_number_format == 3:
847
- return "."
848
- elif self.positive_number_format == 4:
849
- return "."
850
- else:
851
- return None
852
-
853
- @thousands_separator.setter
854
- def thousands_separator(self, val):
855
- if self.positive_number_format is not None:
856
- raise AttributeError("Can't set thousands separator explicitly if positive_number_format is defined")
857
- else:
858
- self._thousands_separator = val
859
-
860
- @property
861
- def decimal_separator(self) -> str:
862
- """Return the decimal separator
863
-
864
- Returns
865
- -------
866
- str
867
- decimal separator character
868
- """
869
- if hasattr(self, '_decimal_separator'):
870
- return self._decimal_separator
871
- elif self.positive_number_format == 1:
872
- return "."
873
- elif self.positive_number_format == 2:
874
- return ","
875
- elif self.positive_number_format == 3:
876
- return ","
877
- elif self.positive_number_format == 4:
878
- return " "
879
- else:
880
- return None
881
-
882
- @decimal_separator.setter
883
- def decimal_separator(self, val):
884
- if self.positive_number_format is not None:
885
- raise AttributeError("Can't set decimal separator explicitly if positive_number_format is defined")
886
- else:
887
- self._decimal_separator = val
888
-
889
- @property
890
- def positive_number_format(self) -> int:
891
- """Get the positive number format.
892
-
893
- Returns
894
- -------
895
- int
896
- Positive number format as an index
897
- """
898
- return self._positive_number_format
899
-
900
- @positive_number_format.setter
901
- def positive_number_format(self, val):
902
- if val not in self.POS_N_FMT + [None]:
903
- raise ValueError(f"Positive number format must be in {self.POS_N_FMT} (Not {val})")
904
-
905
- self._positive_number_format = val
906
-
907
- if val == 1:
908
- self._thousands_separator, self._decimal_separator = (",", ".")
909
- elif val == 2:
910
- self._thousands_separator, self._decimal_separator = (" ", ",")
911
- elif val == 3:
912
- self._thousands_separator, self._decimal_separator = (".", ",")
913
- elif val == 4:
914
- self._thousands_separator, self._decimal_separator = (".", " ")
1
+ import pandas as pd
2
+ import regex as re
3
+ import json
4
+ import pprint
5
+
6
+ from statistics import mode, StatisticsError
7
+ from typing import Optional
8
+
9
+ from .AbstractReader import AbstractReader
10
+
11
+
12
+ DATA_HEADERS = [ # Taken from HOBOware help manual. Not Complete.
13
+ "x accel", "y accel", "z accel",
14
+ "watt-hours", "kilowatt-hours",
15
+ "watts",
16
+ "rh", "temp", "wind speed", "wind dir", "soil moisture", "amps", "volts"
17
+ ]
18
+
19
+ DETAILS_KEYWORDS = ["First Sample Time", "Battery at Launch", "Device Info", "Deployment Info"]
20
+ DETAILS_HEADERS = ["Series", "Event Type"]
21
+ DETAILS_SUBHEADERS = ["Devices", "Device Info", "Deployment Info", "Series Statistics"]
22
+
23
+ # ==== ASSUMPTIONS ====
24
+ MAX_HEADER_LINES = 40
25
+
26
+ # =====================
27
+
28
+
29
+ class HOBO(AbstractReader):
30
+
31
+ TZ_REGEX = re.compile(r"GMT\s?[-+]\d\d:\d\d")
32
+ MAX_LINES = 200 # How many lines to check for header, date, etc.
33
+
34
+ def __init__(self, properties: "Optional[HOBOProperties]"=None):
35
+ """A class for reading HOBOWare exports
36
+
37
+ Because of the variability of HOBOWare CSV exports, the HOBO reader
38
+ relies on on a HOBOProperties configuration object. This can be
39
+ configured manually (most reliable) or autodetected from a file.
40
+
41
+ Parameters
42
+ ----------
43
+ properties : HOBOProperties, optional
44
+ A :py:class:`~tsp.dataloggers.HOBO.HOBOProperties` object that provides information about how the csv export was configured.. If ``None``, the software will attempt to figure out the configuration properties using :py:meth:`~tsp.dataloggers.HOBO.HOBOProperties.autodetect` , by default ``None``
45
+
46
+ Examples
47
+ --------
48
+
49
+ .. code-block:: python
50
+
51
+ from teaspoon.dataloggers import HOBO, HOBOProperties
52
+ from pathlib import Path
53
+ from pkg_resources import resource_filename
54
+
55
+ classic_file = resource_filename("teaspoon", "dataloggers/test_files/hobo_1_AB_classic.csv")
56
+ defaults_file = resource_filename("teaspoon", "dataloggers/test_files/hobo_1_AB_defaults.csv")
57
+
58
+ # To autodetect HOBOWare Properties:
59
+ data = HOBO().read(defaults_file)
60
+
61
+ # To manually specify a the HOBOWare configuration, initialize the HOBO reader with a HOBOProperties object
62
+ classic_file = resource_filename("teaspoon", "dataloggers/test_files/hobo_1_AB_classic.csv")
63
+ classic_properties = HOBOProperties.classic()
64
+ hobo = HOBO(classic_properties)
65
+ data = hobo.read(classic_file)
66
+
67
+ """
68
+ super().__init__()
69
+ self.tz = None
70
+ self.properties = properties
71
+
72
+ def read(self, file:str) -> "pd.DataFrame":
73
+ """Read a HOBOWare CSV export using the properties
74
+
75
+ Parameters
76
+ ----------
77
+ file : str
78
+ Path to HoboWare CSV export
79
+
80
+ Returns
81
+ -------
82
+ pandas.DataFrame
83
+ A pandas dataframe with only the data
84
+ """
85
+ if self.properties is None:
86
+ print("Attempting to detect file properties")
87
+ self.properties = HOBOProperties.autodetect(file)
88
+
89
+ with open(file, encoding="UTF-8") as f: # Get header info
90
+ lines = f.readlines()
91
+ self._extract_header_from_lines(lines)
92
+
93
+ if self.properties.include_plot_details:
94
+ self.META['details'] = self._read_details(lines)
95
+
96
+ self._set_tz_offset()
97
+
98
+ # Read remaining data as pd DataFrame
99
+ self.raw_table = self._safe_read(file, delimiter=self.properties.separator,
100
+ headerline_i=self.headerline_i)
101
+
102
+ time_df = self._create_datetime_column(self.raw_table)
103
+ data_df = self._extract_data_columns(self.raw_table)
104
+
105
+ self._convert_number_format(data_df)
106
+
107
+ self.DATA = pd.concat([time_df, data_df], axis=1)
108
+ self.DATA.columns = ["TIME"] + list(data_df.columns)
109
+
110
+ return self.DATA
111
+
112
+ def _safe_read(self, file, delimiter, headerline_i):
113
+ """ handle edge cases when reading csv """
114
+ if (self.properties.no_quotes_or_commas
115
+ and self.properties.separator == ','
116
+ and self.properties.include_logger_serial
117
+ and self.properties.include_sensor_serial):
118
+ raise IOError("Bad file (can't have comma separators, no quotes in header, and both logger and sensor serial numbers)")
119
+ # pattern = re.compile(r"LGR S/N:\s*(?P<serial>\d+),\s*#(?P=serial))")
120
+ # check header, replace, handle extra rows with details
121
+
122
+ else:
123
+ return pd.read_csv(file, delimiter=delimiter,
124
+ skiprows=headerline_i, index_col=False)
125
+
126
+ def _extract_header_from_lines(self, lines):
127
+ """ Get the text and row index for the header row """
128
+ for i, line in enumerate(lines):
129
+ if self._is_header(line):
130
+ self.headerline = line
131
+ self.headerline_i = i
132
+ break
133
+
134
+ if i > self.MAX_LINES:
135
+ raise Exception
136
+
137
+ def _is_header(self, line):
138
+ """ Determine whether a line is a header row """
139
+ pattern = self.properties._header_regex()
140
+ match = pattern.search(line)
141
+ return bool(match)
142
+
143
+ def _set_tz_offset(self):
144
+ """ Find and set time zone offset """
145
+ if self.tz:
146
+ return
147
+ elif self.properties.include_plot_details and self.properties.no_quotes_or_commas and self.META.get('details'):
148
+ tz = self._detect_time_zone_from_details(self.META.get('details'))
149
+ else:
150
+ tz = self._detect_time_zone_from_header_line(self.headerline)
151
+
152
+ self.tz = tz
153
+ self.META['tz_offset'] = tz
154
+
155
+ def _detect_time_zone_from_header_line(self, line):
156
+ """ Extract time zone from header line """
157
+ tz_match = self.TZ_REGEX.search(line)
158
+ tz = tz_match.group()[-6:].replace(":", "") if tz_match else None
159
+ return tz
160
+
161
+ def _detect_time_zone_from_details(self, details):
162
+ """ Extract time zone from details column as a list of dicts"""
163
+ tz_match = self.TZ_REGEX.search(self.META.get('details')[0]['First Sample Time'])
164
+ tz = tz_match.group()[-6:].replace(":", "") if tz_match else None
165
+ return tz
166
+
167
+ def _is_data_header(self, text):
168
+ """ Determine whether a string represents a column name with data """
169
+ if self.properties.no_quotes_or_commas:
170
+ pattern = re.compile(rf"({'|'.join(DATA_HEADERS)}) \(.{{1,5}}\)", re.IGNORECASE)
171
+ else:
172
+ pattern = re.compile(f"({'|'.join(DATA_HEADERS)}), ", re.IGNORECASE)
173
+
174
+ return pattern.findall(text)
175
+
176
+ def _is_datetime_header(self, text):
177
+ """ Determine whether a string represents a column name for date or time """
178
+ return bool(re.search("(Date Time|Date|Time)$", text))
179
+
180
+ def _extract_data_columns(self, df):
181
+ """ Return a subset of a dataframe containing only data columns """
182
+ keep = list()
183
+
184
+ for column_name in df.columns:
185
+ if self._is_data_header(column_name):
186
+ keep.append(column_name)
187
+
188
+ return df.loc[:, keep]
189
+
190
+ def _create_datetime_column(self, df):
191
+ """ Create a pandas datetime Series from a HOBO dataframe """
192
+ tzfmt = "%z" if self.tz else ""
193
+ tz = self.tz if self.tz else ""
194
+
195
+ if self.properties.separate_date_time:
196
+ _date_pattern = re.compile("Date")
197
+ date_header = next(filter(_date_pattern.search, df.columns))
198
+
199
+ _time_pattern = re.compile("Time")
200
+ time_header = next(filter(_time_pattern.search, df.columns))
201
+
202
+ full_date = df.loc[:, date_header] + df.loc[:, time_header] + tz
203
+
204
+ date_fmt = self.properties._date_pattern() + self.properties._time_pattern() + tzfmt
205
+ TIME = pd.to_datetime(full_date, format=date_fmt)
206
+
207
+ else:
208
+ date_time_pattern = re.compile("Date Time")
209
+ datetime_header = next(filter(date_time_pattern.search, df.columns))
210
+
211
+ full_date = df.loc[:, datetime_header] + tz
212
+ date_fmt = self.properties._date_pattern() + tzfmt
213
+ TIME = pd.to_datetime(full_date, format=date_fmt)
214
+
215
+ return TIME
216
+
217
+ def _read_details(self, lines):
218
+ """ Read series details from last column (if they are included)."""
219
+ meta_pattern = re.compile("(.*?):(.*)$")
220
+ details = list()
221
+ current = dict()
222
+
223
+ for line in lines:
224
+ info_column = line.split(self.properties.separator)[-1]
225
+ match = meta_pattern.search(info_column)
226
+
227
+ if match:
228
+ key, value = match.groups()
229
+
230
+ if current != {} and key.strip() in DETAILS_HEADERS:
231
+ details.append(current)
232
+ current = dict()
233
+
234
+ current[key.strip()] = value.strip()
235
+
236
+ if details != [] and re.search(r"^\s*$", info_column): # Stop once details block is over
237
+ break
238
+
239
+ return details
240
+
241
+ def _convert_number_format(self, df):
242
+ """ Convert numeric-style text to strings """
243
+ # map(lambda x: self._convert_series_number_format(df[x]), df)
244
+ for col in df.columns:
245
+ df.loc[:, col] = self._convert_series_number_format(df.loc[:, col])
246
+
247
+ def _convert_series_number_format(self, series):
248
+ """ Convert pandas series to numeric after """
249
+ if hasattr(series, 'str'):
250
+ if self.properties.thousands_separator:
251
+ series = series.str.replace(self.properties.thousands_separator, "")
252
+ if self.properties.decimal_separator != '.':
253
+ series = series.str.replace(self.properties.decimal_separator, ".")
254
+
255
+ series = series.str.replace(r"(\((\d*\.\d*)\)|(\d*\.\d*)-)", r"-\2", regex=True)
256
+
257
+ return pd.to_numeric(series)
258
+
259
+
260
+ class HOBOProperties:
261
+
262
+ DATE_FORMATS = ["MDY", "YMD", "DMY"]
263
+ POS_N_FMT = [1,2,3,4]
264
+ NEG_N_FMT = [1,2,3]
265
+
266
+ DEFAULTS = {"separator": ",",
267
+ "include_line_number": True,
268
+ "include_plot_title_in_header": True,
269
+ "always_show_fractional_seconds": False,
270
+ "separate_date_time": False,
271
+ "no_quotes_or_commas": False,
272
+ "include_logger_serial": True,
273
+ "include_sensor_serial": True,
274
+ "date_format": "MDY",
275
+ "date_separator": "/",
276
+ "time_format_24hr": False,
277
+ "positive_number_format": 1,
278
+ "negative_number_format": 1,
279
+ "include_plot_details": False
280
+ }
281
+
282
+ CLASSIC = {"separator": "\t",
283
+ "include_line_number": False,
284
+ "include_plot_title_in_header": False,
285
+ "always_show_fractional_seconds": True,
286
+ "separate_date_time": False,
287
+ "no_quotes_or_commas": True,
288
+ "include_logger_serial": False,
289
+ "include_sensor_serial": True,
290
+ "date_format": "MDY",
291
+ "date_separator": "/",
292
+ "time_format_24hr": True,
293
+ "positive_number_format": 1,
294
+ "negative_number_format": 1,
295
+ "include_plot_details": False
296
+ }
297
+
298
+ def __str__(self):
299
+ return pprint.pformat(self.get_properties())
300
+
301
+ def __init__(self, separator:str=",",
302
+ include_line_number:bool=True,
303
+ include_plot_title_in_header:bool=True,
304
+ always_show_fractional_seconds:bool=False,
305
+ separate_date_time:bool=False,
306
+ no_quotes_or_commas:bool=False,
307
+ include_logger_serial:bool=True,
308
+ include_sensor_serial:bool=True,
309
+ date_format:str="MDY",
310
+ date_separator:str="/",
311
+ time_format_24hr:bool=False,
312
+ positive_number_format:int=1,
313
+ negative_number_format:int=1,
314
+ include_plot_details:bool=False):
315
+
316
+ r""" An object used to describe the CSV export configuration from HOBOWare.
317
+
318
+ Parameters
319
+ ----------
320
+ separator : str, optional
321
+ Whether data are comma, semicolon or tab-separated ``",", ";", "\t"``, by default ``","``
322
+ include_line_number : bool, optional
323
+ Whether the CSV file has line numbers as the first column, by default True
324
+ include_plot_title_in_header : bool, optional
325
+ Whether the plot title is included in the CSV header, by default True
326
+ always_show_fractional_seconds : bool, optional
327
+ Whether fractinoal seconds are always included in the data export, by default False
328
+ separate_date_time : bool, optional
329
+ Whether date and time are in separate columns, by default False
330
+ no_quotes_or_commas : bool, optional
331
+ Whether quotes and commas are omitted from the header row, by default False
332
+ include_logger_serial : bool, optional
333
+ Whether logger serial number is included, by default True
334
+ include_sensor_serial : bool, optional
335
+ Whether sensor serial number is included, by default True
336
+ date_format : str, optional
337
+ Chosen from ``"MDY", "YMD", "DMY"``, by default "MDY"
338
+ date_separator : str, optional
339
+ What token separates year, month, and day in the date. Chosen from ``"/", "-"``, by default ``"/"``
340
+ time_format_24hr : bool, optional
341
+ Whether 24 hour time format is used, by default False
342
+ positive_number_format : int, optional
343
+ Format for positive numbers. See Notes, by default 1
344
+ negative_number_format : int, optional
345
+ Format for negative numbers. See Notes, by default 1
346
+ include_plot_details : bool, optional
347
+ Whether plot details are included in the file, by default False
348
+
349
+ Notes
350
+ -----
351
+ Integers are used to define the positive and negative number formats. The corresponding formats are in the tables below:
352
+
353
+ Positive number format
354
+
355
+ ===== ======== =================== =================
356
+ value example thousands separator decimal separator
357
+ ===== ======== =================== =================
358
+ 1 1,234.56 comma period
359
+ 2 1 234,56 space comma
360
+ 3 1.234,56 period comma
361
+ 4 1.234 56 period space
362
+ ===== ======== =================== =================
363
+
364
+ Negative number format
365
+
366
+ ===== ======== =================== =================
367
+ value example prefix suffix
368
+ ===== ======== =================== =================
369
+ 1 -123 minus sign (\-) None
370
+ 2 123- None minus sign (\-)
371
+ 3 \(123\) ( )
372
+ ===== ======== =================== =================
373
+
374
+ Examples
375
+ --------
376
+
377
+ .. code-block:: python
378
+
379
+ from teaspoon.dataloggers import HOBOProperties
380
+ from pkg_resources import resource_filename
381
+ from pathlib import Path
382
+
383
+ # Autodetect file structure
384
+ hobo_file = resource_filename("teaspoon", "dataloggers/test_files/hobo_1_AB_classic.csv")
385
+ P = HOBOProperties.autodetect(hobo_file)
386
+ print(P)
387
+
388
+ # HOBOWare 'default' format
389
+ print(HOBOProperties.defaults())
390
+
391
+ # HOBOWare 'classic' format
392
+ print(HOBOProperties.classic())
393
+
394
+ # Custom format (unspecified properties are defaults)
395
+ P = HOBOProperties(date_separator=",",
396
+ date_format="YMD",
397
+ include_line_number=True,
398
+ include_plot_details=False,
399
+ no_quotes_or_commas=False,
400
+ separate_date_time=False)
401
+ print(P)
402
+
403
+ # Save custom properties
404
+ savepath = Path(Path.home(), "custom_hobo_properties.json")
405
+ P.write(savepath)
406
+
407
+ # Read from a saved file
408
+ Q = HOBOProperties.from_file(savepath) # Read from a file
409
+ print(Q)
410
+
411
+ """
412
+
413
+ self.separator = separator
414
+ self.include_line_number = include_line_number
415
+ self.include_plot_title_in_header = include_plot_title_in_header
416
+ self.always_show_fractional_seconds = always_show_fractional_seconds
417
+ self.separate_date_time = separate_date_time
418
+ self.no_quotes_or_commas = no_quotes_or_commas
419
+ self.include_logger_serial = include_logger_serial
420
+ self.include_sensor_serial = include_sensor_serial
421
+ self.date_format = date_format
422
+ self.date_separator = date_separator
423
+ self.time_format_24hr = time_format_24hr
424
+ self.positive_number_format = positive_number_format
425
+ self.negative_number_format = negative_number_format
426
+ self.include_plot_details = include_plot_details
427
+
428
+ @classmethod
429
+ def defaults(cls):
430
+ """ Create a HOBO Properties object using HOBOWare defaults"""
431
+ hobo_properties = cls(**cls.DEFAULTS)
432
+ return hobo_properties
433
+
434
+ @classmethod
435
+ def classic(cls):
436
+ """ Create a HOBO Properties object using HOBOWare 'classic' settings """
437
+ hobo_properties = cls(**cls.CLASSIC)
438
+ return hobo_properties
439
+
440
+ @classmethod
441
+ def from_file(cls, file):
442
+ """ Create a HOBO Properties object from a text file """
443
+ data = cls.read(file)
444
+ hobo_properties = cls(**data)
445
+
446
+ return hobo_properties
447
+
448
+ @classmethod
449
+ def autodetect(cls, file, n_lines=400):
450
+ """ Automatically detect properties from a file """
451
+ print("Detecting file properties, this may take some time...")
452
+
453
+ with open(file, encoding="UTF-8") as f:
454
+ lines = f.readlines()
455
+ lines = lines[:n_lines] + lines[n_lines::1000]
456
+
457
+ thou_sep, deci_sep, col_sep, negative_open, negative_term = cls._parse_number_format(lines)
458
+
459
+ hobo = cls(separator=cls._detect_separator(lines),
460
+ include_line_number=cls._detect_line_number(lines),
461
+ # include_plot_title_in_header=True,
462
+ always_show_fractional_seconds=cls._detect_always_show_fractional_seconds(lines),
463
+ separate_date_time=cls._detect_separate_date_time(lines),
464
+ no_quotes_or_commas=cls._detect_no_quotes_or_commas(lines),
465
+ # include_logger_serial=True,
466
+ # include_sensor_serial=True,
467
+ date_format=cls._detect_date_format(lines),
468
+ date_separator=cls._detect_date_separator(lines),
469
+ time_format_24hr=cls._detect_time_format_24hr(lines),
470
+ positive_number_format=cls._evaluate_positive_number_format(thou_sep, deci_sep),
471
+ negative_number_format=cls._evaluate_negative_number_format(negative_open, negative_term),
472
+ include_plot_details=cls._detect_include_plot_details(lines))
473
+
474
+ if hobo.positive_number_format is None:
475
+ hobo.thousands_separator = thou_sep
476
+ hobo.decimal_separator = deci_sep
477
+
478
+ return hobo
479
+
480
+ def _date_pattern(self):
481
+ """ Return the appropriate strptime string to read dates from a HOBO file."""
482
+ if self.date_format not in self.DATE_FORMATS:
483
+ raise ValueError(f"Incorrect date pattern. Choose from {self.DATE_FORMATS}")
484
+
485
+ if self.date_format == "YMD":
486
+ pattern = "%y{0}%m{0}%d".format(self.date_separator)
487
+
488
+ elif self.date_format == "MDY":
489
+ pattern = "%m{0}%d{0}%y".format(self.date_separator)
490
+
491
+ elif self.date_format == "DMY":
492
+ pattern = "%d{0}%m{0}%y".format(self.date_separator)
493
+
494
+ if not self.separate_date_time:
495
+ pattern += f" {self._time_pattern()}"
496
+
497
+ return pattern
498
+
499
+ def _time_pattern(self):
500
+ """ Return the appropriate strptime string to read time from a HOBO file."""
501
+
502
+ if self.time_format_24hr:
503
+ fmt = "%H:%M:%S"
504
+ else:
505
+ fmt = "%I:%M:%S %p"
506
+
507
+ if self.always_show_fractional_seconds:
508
+ fmt = fmt.replace("S", "S.%f")
509
+
510
+ return fmt
511
+
512
+ def _header_regex(self):
513
+ """ Return the regular expression to match a header row. """
514
+ if self.separate_date_time:
515
+
516
+ if self.no_quotes_or_commas:
517
+ return re.compile(f"Date{self.separator}Time")
518
+ else:
519
+ return re.compile(f'Date"{self.separator}"Time')
520
+ else:
521
+ return re.compile("Date Time")
522
+
523
+ @staticmethod
524
+ def read(file):
525
+ """ Read HOBO file properties from a text file."""
526
+ with open(file) as json_file:
527
+ data = json.load(json_file)
528
+ return data
529
+
530
+ def write(self, file):
531
+ """ Write HOBO properties to a text file."""
532
+ with open(file, 'w') as json_file:
533
+ json.dump(self.get_properties(), json_file)
534
+
535
+ def get_properties(self):
536
+ """ Create dictionary-formatted properties """
537
+ return {x: getattr(self, x) for x in self.DEFAULTS.keys()}
538
+
539
+ @staticmethod
540
+ def _detect_date_separator(lines):
541
+ """ Detect the 'date_separator' property from a file."""
542
+
543
+ pattern = re.compile(r"(\d{2})(.)(\d{2}).(\d{2}).\d{2}:\d{2}:\d{2}")
544
+ date_sep = list()
545
+
546
+ for line in lines:
547
+ match = pattern.search(line)
548
+
549
+ if match:
550
+ date_sep.append(match[2])
551
+
552
+ return mode(date_sep)
553
+
554
+ @staticmethod
555
+ def _detect_separator(lines):
556
+ """ Detect the 'separator' property from a file."""
557
+ pattern = re.compile(r"\d{2}.\d{2}.\d{2}.\d{2}:\d{2}:\d{2}")
558
+ choices = [",", ";", "\t"]
559
+ splits = {key:[] for key in choices}
560
+
561
+ for line in lines:
562
+ match = pattern.search(line)
563
+
564
+ if match:
565
+ for sep in choices:
566
+ columns = len(line.split(sep))
567
+
568
+ if columns == 1:
569
+ choices.remove(sep)
570
+
571
+ if len(splits[sep]) != 0 and splits[sep][-1] != columns:
572
+ choices.remove(sep)
573
+
574
+ splits[sep].append(columns)
575
+
576
+ if len(choices) < 1:
577
+ raise RuntimeError("No possible separators")
578
+
579
+ elif len(choices) == 1:
580
+ return(choices[0])
581
+
582
+ else: # Two or more choices remaining? Use first occurring separator
583
+ pattern_2 = re.compile(f"({'|'.join(choices)})")
584
+ for line in lines:
585
+ match = pattern.search(line)
586
+
587
+ if match:
588
+ return pattern_2.search(line)[0]
589
+
590
+ @staticmethod
591
+ def _detect_date_format(lines):
592
+ """ Detect whether dates are MDY, YMD, or DMY.
593
+
594
+ Based on heuristics and the assumption of evenly distributed sampling at
595
+ frequency greater than monthly.
596
+ """
597
+
598
+ pattern = re.compile(r"(\d{2}).(\d{2}).(\d{2}).\d{2}:\d{2}:\d{2}")
599
+
600
+ p1 = list()
601
+ p2 = list()
602
+ p3 = list()
603
+
604
+ for line in lines:
605
+ match = pattern.search(line)
606
+
607
+ if match:
608
+ p1.append(int(match[1]))
609
+ p2.append(int(match[2]))
610
+ p3.append(int(match[3]))
611
+
612
+ if max(p2) > 12: # Day in middle slot
613
+ fmt = "MDY"
614
+
615
+ else:
616
+ if len(set(p1)) > len(set(p3)): # Which is more 'diverse'
617
+ fmt = "DMY"
618
+
619
+ else:
620
+ fmt = "YMD"
621
+
622
+ return fmt
623
+
624
+ @staticmethod
625
+ def _detect_separate_date_time(lines):
626
+ """ Look for one of two patterns """
627
+ separate = re.compile("Date[^ ].*Time")
628
+ combined = re.compile("Date Time")
629
+
630
+ sep_match = len(list(filter(separate.search, lines)))
631
+ com_match = len(list(filter(combined.search, lines)))
632
+
633
+ if sep_match + com_match > 1:
634
+ raise ValueError("Duplicate Date or Time headers")
635
+
636
+ if sep_match == 1:
637
+ return True # True, they are separate
638
+
639
+ elif com_match == 1:
640
+ return False # False, they are not separate
641
+
642
+ else:
643
+ raise ValueError("Could not find Date, Time headers")
644
+
645
+ @staticmethod
646
+ def _detect_time_format_24hr(lines):
647
+ """ Look for AM/PM string
648
+ - Header rows up top will not include AM/PM strings
649
+ - "plot details" may contain AM/PM strings
650
+ """
651
+ pattern = re.compile(r" (AM|PM).")
652
+ matches = list(filter(pattern.search, lines))
653
+ if (len(matches) < MAX_HEADER_LINES):
654
+ return True
655
+ else:
656
+ return False
657
+
658
+ @staticmethod
659
+ def _detect_always_show_fractional_seconds(lines):
660
+ """ Once you find a fractional second, check if all subsequent lines have them"""
661
+ detected = False
662
+ pattern = re.compile(r"(\d{2}:\d{2}:\d{2}\.\d|^[^\d]*$)") # decimal seconds OR no numbers.
663
+
664
+ iterate = iter(lines)
665
+
666
+ while not detected: # Get to the first matching line
667
+ try:
668
+ line = next(iterate)
669
+ except StopIteration:
670
+ return False # ran through all lines
671
+
672
+ if pattern.search(line):
673
+ detected = True
674
+
675
+ for remaining_line in iterate: # All subsequent lines must match
676
+ if not pattern.search(remaining_line):
677
+ return False
678
+
679
+ return True
680
+
681
+ @staticmethod
682
+ def _detect_include_plot_details(lines):
683
+ """ Look for obvious plot details text. """
684
+
685
+ options = "|".join(DETAILS_KEYWORDS)
686
+ pattern = re.compile(rf"({options})")
687
+ matches = list(filter(pattern.search, lines))
688
+
689
+ if len(matches) > 3:
690
+ return True
691
+ else:
692
+ return False
693
+
694
+ @staticmethod
695
+ def _detect_line_number(lines):
696
+ """ Detect whether a line number column is present """
697
+ pattern = re.compile(r"^([0-9]+)[^-/0-9]")
698
+ last = None
699
+
700
+ for line in lines:
701
+ match = pattern.search(line)
702
+
703
+ if match:
704
+ if last is not None and int(match[1]) < last:
705
+ return False
706
+
707
+ last = int(match[1])
708
+
709
+ if last is None:
710
+ return False
711
+
712
+ else:
713
+ return True
714
+
715
+ @staticmethod
716
+ def _detect_no_quotes_or_commas(lines):
717
+ """ Detect whether the 'no quotes or commas' parameter is enabled """
718
+ header = re.compile('"Date')
719
+ for line in lines:
720
+ if header.search(line):
721
+ return False
722
+
723
+ return True
724
+
725
+ @staticmethod
726
+ def _parse_number_format(lines):
727
+ """ Use regex magic to extract the characters used for various separators """
728
+ pattern = re.compile(r""" (?P<sep>[\t,;]) # Column separator
729
+ ( # Group for one data column
730
+ (?P<neg1>[-\(])? # Possible opening negative sign
731
+ (\d{1,3} # millions, billions or more, etc.
732
+ (?P<thou>
733
+ [ ,\.] # Separated by a thousands delimiter
734
+ )
735
+ )? # Zero or one times
736
+ (\d{3}(?P=thou))* # 'Sandwiched' digit triplets using same thousands separator
737
+ \d{1,3} # Hundreds, tens, ones
738
+ (?P<decimal>
739
+ [\., ] # Separated by a decimal delimiter
740
+ )
741
+ \d+ # Decimal digits (assume at least 1)
742
+ (?P<neg2>[-\)])? # Possible terminating negative sign
743
+ (?P=sep) # The same column separator
744
+ )+ # Repeated for each data column
745
+ """, re.VERBOSE)
746
+
747
+ thousands = list()
748
+ decimals = list()
749
+ neg1 = list()
750
+ neg2 = list()
751
+ sep = list()
752
+
753
+ for line in lines:
754
+
755
+ match = pattern.search(line)
756
+
757
+ if match:
758
+ thousands += match.captures("thou")
759
+ decimals += match.captures("decimal")
760
+ neg1 += match.captures("neg1") if match.captures("neg1") else []
761
+ neg2 += match.captures("neg2") if match.captures("neg2") else []
762
+ sep += match.captures("sep")
763
+
764
+ deci_sep = mode(decimals)
765
+ col_sep = mode(sep)
766
+
767
+ try:
768
+ thou_sep = mode(thousands)
769
+ except StatisticsError:
770
+ thou_sep = None
771
+
772
+ try:
773
+ negative_open = mode(neg1)
774
+ except StatisticsError:
775
+ negative_open = None
776
+
777
+ try:
778
+ negative_term = mode(neg2)
779
+ except StatisticsError:
780
+ negative_term = None
781
+
782
+ return thou_sep, deci_sep, col_sep, negative_open, negative_term
783
+
784
+ @staticmethod
785
+ def _evaluate_positive_number_format(thou_sep, deci_sep):
786
+ """ Detect what format positive numbers are in
787
+ | 1 | 1,234.56 | comma, period |
788
+ | 2 | 1 234,56 | space, comma |
789
+ | 3 | 1.234,56 | period, comma |
790
+ | 4 | 1.234 56 | period, space |
791
+ """
792
+
793
+ if thou_sep == "," and deci_sep == ".":
794
+ return 1
795
+ elif thou_sep == " " and deci_sep == ",":
796
+ return 2
797
+ elif thou_sep == "." and deci_sep == ",":
798
+ return 3
799
+ elif thou_sep == " " and deci_sep == ".":
800
+ return 4
801
+ elif thou_sep is None:
802
+ if deci_sep == ".":
803
+ return 1
804
+ elif deci_sep == " ":
805
+ return 4
806
+ elif deci_sep is None:
807
+ if thou_sep == ",":
808
+ return 1
809
+ elif thou_sep == " ":
810
+ return 2
811
+
812
+ else:
813
+ return None
814
+
815
+ @staticmethod
816
+ def _evaluate_negative_number_format(negative_open:str, negative_terminator:str) -> int:
817
+ """
818
+ Determine what the negative number format is
819
+
820
+ | 1 | -123 | -, None |
821
+ | 2 | 123- | None, - |
822
+ | 3 | (123) | (, ) |
823
+ """
824
+ if negative_open == "-" and negative_terminator is None:
825
+ return 1
826
+ elif negative_open is None and negative_terminator == "-":
827
+ return 2
828
+ elif negative_open == "(" and negative_terminator == ")":
829
+ return 3
830
+
831
+ @property
832
+ def thousands_separator(self) -> str:
833
+ """Get thousands separator
834
+
835
+ Returns
836
+ -------
837
+ str
838
+ Thousands separator character
839
+ """
840
+ if hasattr(self, '_thousands_separator'):
841
+ return self._thousands_separator
842
+ elif self.positive_number_format == 1:
843
+ return ","
844
+ elif self.positive_number_format == 2:
845
+ return " "
846
+ elif self.positive_number_format == 3:
847
+ return "."
848
+ elif self.positive_number_format == 4:
849
+ return "."
850
+ else:
851
+ return None
852
+
853
+ @thousands_separator.setter
854
+ def thousands_separator(self, val):
855
+ if self.positive_number_format is not None:
856
+ raise AttributeError("Can't set thousands separator explicitly if positive_number_format is defined")
857
+ else:
858
+ self._thousands_separator = val
859
+
860
+ @property
861
+ def decimal_separator(self) -> str:
862
+ """Return the decimal separator
863
+
864
+ Returns
865
+ -------
866
+ str
867
+ decimal separator character
868
+ """
869
+ if hasattr(self, '_decimal_separator'):
870
+ return self._decimal_separator
871
+ elif self.positive_number_format == 1:
872
+ return "."
873
+ elif self.positive_number_format == 2:
874
+ return ","
875
+ elif self.positive_number_format == 3:
876
+ return ","
877
+ elif self.positive_number_format == 4:
878
+ return " "
879
+ else:
880
+ return None
881
+
882
+ @decimal_separator.setter
883
+ def decimal_separator(self, val):
884
+ if self.positive_number_format is not None:
885
+ raise AttributeError("Can't set decimal separator explicitly if positive_number_format is defined")
886
+ else:
887
+ self._decimal_separator = val
888
+
889
+ @property
890
+ def positive_number_format(self) -> int:
891
+ """Get the positive number format.
892
+
893
+ Returns
894
+ -------
895
+ int
896
+ Positive number format as an index
897
+ """
898
+ return self._positive_number_format
899
+
900
+ @positive_number_format.setter
901
+ def positive_number_format(self, val):
902
+ if val not in self.POS_N_FMT + [None]:
903
+ raise ValueError(f"Positive number format must be in {self.POS_N_FMT} (Not {val})")
904
+
905
+ self._positive_number_format = val
906
+
907
+ if val == 1:
908
+ self._thousands_separator, self._decimal_separator = (",", ".")
909
+ elif val == 2:
910
+ self._thousands_separator, self._decimal_separator = (" ", ",")
911
+ elif val == 3:
912
+ self._thousands_separator, self._decimal_separator = (".", ",")
913
+ elif val == 4:
914
+ self._thousands_separator, self._decimal_separator = (".", " ")