tsp 1.7.7__py3-none-any.whl → 1.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tsp/__init__.py +11 -11
- tsp/__meta__.py +1 -1
- tsp/concatenation.py +153 -0
- tsp/core.py +1162 -1035
- tsp/data/2023-01-06_755-test-Dataset_2031-Constant_Over_Interval-Hourly-Ground_Temperature-Thermistor_Automated.timeserie.csv +4 -4
- tsp/data/2023-01-06_755-test.metadata.txt +208 -208
- tsp/data/NTGS_example_csv.csv +6 -6
- tsp/data/NTGS_example_slash_dates.csv +6 -6
- tsp/data/example_geotop.csv +5240 -5240
- tsp/data/example_gtnp.csv +1298 -1298
- tsp/data/example_permos.csv +7 -7
- tsp/data/test_geotop_has_space.txt +5 -5
- tsp/dataloggers/AbstractReader.py +43 -43
- tsp/dataloggers/FG2.py +110 -110
- tsp/dataloggers/GP5W.py +114 -114
- tsp/dataloggers/Geoprecision.py +34 -34
- tsp/dataloggers/HOBO.py +914 -914
- tsp/dataloggers/RBRXL800.py +190 -190
- tsp/dataloggers/RBRXR420.py +308 -308
- tsp/dataloggers/__init__.py +15 -15
- tsp/dataloggers/logr.py +115 -115
- tsp/dataloggers/test_files/004448.DAT +2543 -2543
- tsp/dataloggers/test_files/004531.DAT +17106 -17106
- tsp/dataloggers/test_files/004531.HEX +3587 -3587
- tsp/dataloggers/test_files/004534.HEX +3587 -3587
- tsp/dataloggers/test_files/010252.dat +1731 -1731
- tsp/dataloggers/test_files/010252.hex +1739 -1739
- tsp/dataloggers/test_files/010274.hex +1291 -1291
- tsp/dataloggers/test_files/010278.hex +3544 -3544
- tsp/dataloggers/test_files/012064.dat +1286 -1286
- tsp/dataloggers/test_files/012064.hex +1294 -1294
- tsp/dataloggers/test_files/012081.hex +3532 -3532
- tsp/dataloggers/test_files/07B1592.DAT +1483 -1483
- tsp/dataloggers/test_files/07B1592.HEX +1806 -1806
- tsp/dataloggers/test_files/07B4450.DAT +2234 -2234
- tsp/dataloggers/test_files/07B4450.HEX +2559 -2559
- tsp/dataloggers/test_files/FG2_399.csv +9881 -9881
- tsp/dataloggers/test_files/GP5W.csv +1121 -1121
- tsp/dataloggers/test_files/GP5W_260.csv +1884 -1884
- tsp/dataloggers/test_files/GP5W_270.csv +2210 -2210
- tsp/dataloggers/test_files/H08-030-08_HOBOware.csv +998 -998
- tsp/dataloggers/test_files/RBR_01.dat +1046 -1046
- tsp/dataloggers/test_files/RBR_02.dat +2426 -2426
- tsp/dataloggers/test_files/RSTDT2055.csv +2152 -2152
- tsp/dataloggers/test_files/U23-001_HOBOware.csv +1001 -1001
- tsp/dataloggers/test_files/hobo-negative-2.txt +6396 -6396
- tsp/dataloggers/test_files/hobo-negative-3.txt +5593 -5593
- tsp/dataloggers/test_files/hobo-positive-number-1.txt +1000 -1000
- tsp/dataloggers/test_files/hobo-positive-number-2.csv +1003 -1003
- tsp/dataloggers/test_files/hobo-positive-number-3.csv +1133 -1133
- tsp/dataloggers/test_files/hobo-positive-number-4.csv +1209 -1209
- tsp/dataloggers/test_files/hobo2.csv +8702 -8702
- tsp/dataloggers/test_files/hobo_1_AB.csv +21732 -21732
- tsp/dataloggers/test_files/hobo_1_AB_Details.txt +133 -133
- tsp/dataloggers/test_files/hobo_1_AB_classic.csv +4373 -4373
- tsp/dataloggers/test_files/hobo_1_AB_defaults.csv +21732 -21732
- tsp/dataloggers/test_files/hobo_1_AB_minimal.txt +1358 -1358
- tsp/dataloggers/test_files/hobo_1_AB_var2.csv +3189 -3189
- tsp/dataloggers/test_files/hobo_1_AB_var3.csv +2458 -2458
- tsp/dataloggers/test_files/logR_ULogC16-32_1.csv +106 -106
- tsp/dataloggers/test_files/logR_ULogC16-32_2.csv +100 -100
- tsp/dataloggers/test_files/mon_3_Ta_2010-08-18_2013-02-08.txt +21724 -21724
- tsp/dataloggers/test_files/rbr_001.dat +1133 -1133
- tsp/dataloggers/test_files/rbr_001.hex +1139 -1139
- tsp/dataloggers/test_files/rbr_001_no_comment.dat +1132 -1132
- tsp/dataloggers/test_files/rbr_001_no_comment.hex +1138 -1138
- tsp/dataloggers/test_files/rbr_002.dat +1179 -1179
- tsp/dataloggers/test_files/rbr_002.hex +1185 -1185
- tsp/dataloggers/test_files/rbr_003.hex +1292 -1292
- tsp/dataloggers/test_files/rbr_003.xls +0 -0
- tsp/dataloggers/test_files/rbr_xl_001.DAT +1105 -1105
- tsp/dataloggers/test_files/rbr_xl_002.DAT +1126 -1126
- tsp/dataloggers/test_files/rbr_xl_003.DAT +4622 -4622
- tsp/dataloggers/test_files/rbr_xl_003.HEX +3587 -3587
- tsp/gtnp.py +148 -148
- tsp/labels.py +3 -3
- tsp/misc.py +90 -90
- tsp/physics.py +101 -101
- tsp/plots/static.py +373 -373
- tsp/readers.py +548 -548
- tsp/time.py +45 -45
- tsp/tspwarnings.py +14 -14
- tsp/utils.py +101 -101
- tsp/version.py +1 -1
- {tsp-1.7.7.dist-info → tsp-1.8.1.dist-info}/METADATA +30 -23
- tsp-1.8.1.dist-info/RECORD +94 -0
- {tsp-1.7.7.dist-info → tsp-1.8.1.dist-info}/WHEEL +5 -5
- {tsp-1.7.7.dist-info → tsp-1.8.1.dist-info/licenses}/LICENSE +674 -674
- tsp/dataloggers/test_files/CSc_CR1000_1.dat +0 -295
- tsp/scratch.py +0 -6
- tsp-1.7.7.dist-info/RECORD +0 -95
- {tsp-1.7.7.dist-info → tsp-1.8.1.dist-info}/top_level.txt +0 -0
tsp/gtnp.py
CHANGED
|
@@ -1,148 +1,148 @@
|
|
|
1
|
-
from datetime import timezone, timedelta, tzinfo
|
|
2
|
-
from typing import Optional
|
|
3
|
-
from collections import OrderedDict
|
|
4
|
-
|
|
5
|
-
import re
|
|
6
|
-
import warnings
|
|
7
|
-
|
|
8
|
-
from tsp.time import get_utc_offset
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class GtnpMetadata:
|
|
12
|
-
def __init__(self, filepath):
|
|
13
|
-
"""A class to read GTN-P metadata files
|
|
14
|
-
|
|
15
|
-
Parameters
|
|
16
|
-
----------
|
|
17
|
-
filepath : str
|
|
18
|
-
Path to GTN-P *.metadata.txt file.
|
|
19
|
-
"""
|
|
20
|
-
self.filepath = filepath
|
|
21
|
-
self._dict = OrderedDict()
|
|
22
|
-
self._read()
|
|
23
|
-
self._parse()
|
|
24
|
-
|
|
25
|
-
def _read(self):
|
|
26
|
-
try:
|
|
27
|
-
with open(self.filepath, 'r') as f:
|
|
28
|
-
self._raw = f.readlines()
|
|
29
|
-
|
|
30
|
-
except UnicodeDecodeError:
|
|
31
|
-
warnings.warn("Couldn't read file with utf-8 encoding. Metadata might be corrupted.")
|
|
32
|
-
with open(self.filepath, 'r', errors='ignore') as f:
|
|
33
|
-
self._raw = f.readlines()
|
|
34
|
-
|
|
35
|
-
@property
|
|
36
|
-
def raw(self) -> 'list[str]':
|
|
37
|
-
return self._raw
|
|
38
|
-
|
|
39
|
-
@raw.setter
|
|
40
|
-
def raw(self, value):
|
|
41
|
-
raise ValueError("Cannot set")
|
|
42
|
-
|
|
43
|
-
@property
|
|
44
|
-
def parsed(self) -> dict:
|
|
45
|
-
return self._dict
|
|
46
|
-
|
|
47
|
-
def _parse(self):
|
|
48
|
-
lines = [line for line in self._raw] # Make a copy in case we need to use fallback plan
|
|
49
|
-
|
|
50
|
-
try:
|
|
51
|
-
self._dict = OrderedDict()
|
|
52
|
-
recursively_build_metadata(lines, self._dict)
|
|
53
|
-
|
|
54
|
-
except Exception:
|
|
55
|
-
print("Couldn't build nested dictionary. Fallback to simple dictionary.")
|
|
56
|
-
self._dict = OrderedDict()
|
|
57
|
-
self._parse_dict()
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
def _parse_dict(self) -> None:
|
|
61
|
-
pattern = re.compile(r"^([^:]+):\s*(.*)$")
|
|
62
|
-
|
|
63
|
-
for line in self._raw:
|
|
64
|
-
result = pattern.match(line)
|
|
65
|
-
if result:
|
|
66
|
-
key, value = result.groups()
|
|
67
|
-
|
|
68
|
-
if value.strip() != "":
|
|
69
|
-
self._dict[key] = value.strip()
|
|
70
|
-
|
|
71
|
-
def get_timezone(self) -> Optional[tzinfo]:
|
|
72
|
-
try:
|
|
73
|
-
zone = self._dict['Timezone']
|
|
74
|
-
except KeyError:
|
|
75
|
-
return None
|
|
76
|
-
|
|
77
|
-
if zone == 'UTC':
|
|
78
|
-
return timezone.utc
|
|
79
|
-
elif isinstance(zone, str):
|
|
80
|
-
seconds = get_utc_offset(zone.strip())
|
|
81
|
-
tz = timezone(timedelta(seconds=seconds))
|
|
82
|
-
return tz
|
|
83
|
-
|
|
84
|
-
def get_latitude(self) -> Optional[float]:
|
|
85
|
-
try:
|
|
86
|
-
return float(self._dict['Latitude'])
|
|
87
|
-
except KeyError:
|
|
88
|
-
return None
|
|
89
|
-
|
|
90
|
-
def get_longitude(self) -> Optional[float]:
|
|
91
|
-
try:
|
|
92
|
-
return float(self._dict['Longitude'])
|
|
93
|
-
except KeyError:
|
|
94
|
-
return None
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
def recursively_build_metadata(lines: list, odict: OrderedDict, depth:int=0) -> None:
|
|
98
|
-
""" A recursive function to build an OrderedDict from a list of lines.
|
|
99
|
-
|
|
100
|
-
The function expects lines to be in the format:
|
|
101
|
-
Key: Value
|
|
102
|
-
Key: Value
|
|
103
|
-
Key:
|
|
104
|
-
Subkey: Multi line Subvalue
|
|
105
|
-
Multi line Subvalue
|
|
106
|
-
Multi line Subvalue
|
|
107
|
-
Subkey: Subvalue
|
|
108
|
-
Subkey:
|
|
109
|
-
Subsubkey: Subsubvalue
|
|
110
|
-
|
|
111
|
-
Parameters
|
|
112
|
-
----------
|
|
113
|
-
lines : list
|
|
114
|
-
A list of lines from a metadata file.
|
|
115
|
-
odict : OrderedDict
|
|
116
|
-
An OrderedDict to build.
|
|
117
|
-
depth : int, optional
|
|
118
|
-
The depth of the OrderedDict, by default 0
|
|
119
|
-
|
|
120
|
-
"""
|
|
121
|
-
pattern = re.compile(r"^(\t*)([^:]+):\s*(.*)$")
|
|
122
|
-
|
|
123
|
-
while lines:
|
|
124
|
-
line = lines.pop(0)
|
|
125
|
-
result = pattern.match(line)
|
|
126
|
-
|
|
127
|
-
if result:
|
|
128
|
-
tabs, key, value = result.groups()
|
|
129
|
-
|
|
130
|
-
if len(tabs) < depth: # Un-indent, return to previous level
|
|
131
|
-
lines.insert(0, line)
|
|
132
|
-
return
|
|
133
|
-
|
|
134
|
-
if value.strip() != "": # Valid key:value pair
|
|
135
|
-
odict[key] = value.strip()
|
|
136
|
-
|
|
137
|
-
else: # Empty value, recurse
|
|
138
|
-
odict[key] = OrderedDict()
|
|
139
|
-
recursively_build_metadata(lines, odict[key], depth=depth+1)
|
|
140
|
-
|
|
141
|
-
else: # Multi-line value
|
|
142
|
-
try:
|
|
143
|
-
odict[next(reversed(odict))] = odict[next(reversed(odict))] + line
|
|
144
|
-
except StopIteration: # If no key:value pair has been added yet
|
|
145
|
-
continue
|
|
146
|
-
except TypeError: # If the value is not a string
|
|
147
|
-
continue
|
|
148
|
-
continue
|
|
1
|
+
from datetime import timezone, timedelta, tzinfo
|
|
2
|
+
from typing import Optional
|
|
3
|
+
from collections import OrderedDict
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
import warnings
|
|
7
|
+
|
|
8
|
+
from tsp.time import get_utc_offset
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class GtnpMetadata:
|
|
12
|
+
def __init__(self, filepath):
|
|
13
|
+
"""A class to read GTN-P metadata files
|
|
14
|
+
|
|
15
|
+
Parameters
|
|
16
|
+
----------
|
|
17
|
+
filepath : str
|
|
18
|
+
Path to GTN-P *.metadata.txt file.
|
|
19
|
+
"""
|
|
20
|
+
self.filepath = filepath
|
|
21
|
+
self._dict = OrderedDict()
|
|
22
|
+
self._read()
|
|
23
|
+
self._parse()
|
|
24
|
+
|
|
25
|
+
def _read(self):
|
|
26
|
+
try:
|
|
27
|
+
with open(self.filepath, 'r') as f:
|
|
28
|
+
self._raw = f.readlines()
|
|
29
|
+
|
|
30
|
+
except UnicodeDecodeError:
|
|
31
|
+
warnings.warn("Couldn't read file with utf-8 encoding. Metadata might be corrupted.")
|
|
32
|
+
with open(self.filepath, 'r', errors='ignore') as f:
|
|
33
|
+
self._raw = f.readlines()
|
|
34
|
+
|
|
35
|
+
@property
|
|
36
|
+
def raw(self) -> 'list[str]':
|
|
37
|
+
return self._raw
|
|
38
|
+
|
|
39
|
+
@raw.setter
|
|
40
|
+
def raw(self, value):
|
|
41
|
+
raise ValueError("Cannot set")
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def parsed(self) -> dict:
|
|
45
|
+
return self._dict
|
|
46
|
+
|
|
47
|
+
def _parse(self):
|
|
48
|
+
lines = [line for line in self._raw] # Make a copy in case we need to use fallback plan
|
|
49
|
+
|
|
50
|
+
try:
|
|
51
|
+
self._dict = OrderedDict()
|
|
52
|
+
recursively_build_metadata(lines, self._dict)
|
|
53
|
+
|
|
54
|
+
except Exception:
|
|
55
|
+
print("Couldn't build nested dictionary. Fallback to simple dictionary.")
|
|
56
|
+
self._dict = OrderedDict()
|
|
57
|
+
self._parse_dict()
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _parse_dict(self) -> None:
|
|
61
|
+
pattern = re.compile(r"^([^:]+):\s*(.*)$")
|
|
62
|
+
|
|
63
|
+
for line in self._raw:
|
|
64
|
+
result = pattern.match(line)
|
|
65
|
+
if result:
|
|
66
|
+
key, value = result.groups()
|
|
67
|
+
|
|
68
|
+
if value.strip() != "":
|
|
69
|
+
self._dict[key] = value.strip()
|
|
70
|
+
|
|
71
|
+
def get_timezone(self) -> Optional[tzinfo]:
|
|
72
|
+
try:
|
|
73
|
+
zone = self._dict['Timezone']
|
|
74
|
+
except KeyError:
|
|
75
|
+
return None
|
|
76
|
+
|
|
77
|
+
if zone == 'UTC':
|
|
78
|
+
return timezone.utc
|
|
79
|
+
elif isinstance(zone, str):
|
|
80
|
+
seconds = get_utc_offset(zone.strip())
|
|
81
|
+
tz = timezone(timedelta(seconds=seconds))
|
|
82
|
+
return tz
|
|
83
|
+
|
|
84
|
+
def get_latitude(self) -> Optional[float]:
|
|
85
|
+
try:
|
|
86
|
+
return float(self._dict['Latitude'])
|
|
87
|
+
except KeyError:
|
|
88
|
+
return None
|
|
89
|
+
|
|
90
|
+
def get_longitude(self) -> Optional[float]:
|
|
91
|
+
try:
|
|
92
|
+
return float(self._dict['Longitude'])
|
|
93
|
+
except KeyError:
|
|
94
|
+
return None
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def recursively_build_metadata(lines: list, odict: OrderedDict, depth:int=0) -> None:
|
|
98
|
+
""" A recursive function to build an OrderedDict from a list of lines.
|
|
99
|
+
|
|
100
|
+
The function expects lines to be in the format:
|
|
101
|
+
Key: Value
|
|
102
|
+
Key: Value
|
|
103
|
+
Key:
|
|
104
|
+
Subkey: Multi line Subvalue
|
|
105
|
+
Multi line Subvalue
|
|
106
|
+
Multi line Subvalue
|
|
107
|
+
Subkey: Subvalue
|
|
108
|
+
Subkey:
|
|
109
|
+
Subsubkey: Subsubvalue
|
|
110
|
+
|
|
111
|
+
Parameters
|
|
112
|
+
----------
|
|
113
|
+
lines : list
|
|
114
|
+
A list of lines from a metadata file.
|
|
115
|
+
odict : OrderedDict
|
|
116
|
+
An OrderedDict to build.
|
|
117
|
+
depth : int, optional
|
|
118
|
+
The depth of the OrderedDict, by default 0
|
|
119
|
+
|
|
120
|
+
"""
|
|
121
|
+
pattern = re.compile(r"^(\t*)([^:]+):\s*(.*)$")
|
|
122
|
+
|
|
123
|
+
while lines:
|
|
124
|
+
line = lines.pop(0)
|
|
125
|
+
result = pattern.match(line)
|
|
126
|
+
|
|
127
|
+
if result:
|
|
128
|
+
tabs, key, value = result.groups()
|
|
129
|
+
|
|
130
|
+
if len(tabs) < depth: # Un-indent, return to previous level
|
|
131
|
+
lines.insert(0, line)
|
|
132
|
+
return
|
|
133
|
+
|
|
134
|
+
if value.strip() != "": # Valid key:value pair
|
|
135
|
+
odict[key] = value.strip()
|
|
136
|
+
|
|
137
|
+
else: # Empty value, recurse
|
|
138
|
+
odict[key] = OrderedDict()
|
|
139
|
+
recursively_build_metadata(lines, odict[key], depth=depth+1)
|
|
140
|
+
|
|
141
|
+
else: # Multi-line value
|
|
142
|
+
try:
|
|
143
|
+
odict[next(reversed(odict))] = odict[next(reversed(odict))] + line
|
|
144
|
+
except StopIteration: # If no key:value pair has been added yet
|
|
145
|
+
continue
|
|
146
|
+
except TypeError: # If the value is not a string
|
|
147
|
+
continue
|
|
148
|
+
continue
|
tsp/labels.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
HOURLY = 60 * 60
|
|
2
|
-
DAILY = HOURLY * 24
|
|
3
|
-
MONTHLY = DAILY * 31
|
|
1
|
+
HOURLY = 60 * 60
|
|
2
|
+
DAILY = HOURLY * 24
|
|
3
|
+
MONTHLY = DAILY * 31
|
|
4
4
|
YEARLY = DAILY * 365
|
tsp/misc.py
CHANGED
|
@@ -1,90 +1,90 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
import pandas as pd
|
|
3
|
-
import re
|
|
4
|
-
|
|
5
|
-
import tsp.labels as lbl
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
def _is_depth_column(col_name, pattern) -> bool:
|
|
9
|
-
return bool(re.search(pattern, col_name))
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def completeness(df: pd.DataFrame, f1, f2) -> pd.DataFrame:
|
|
13
|
-
""" Calculate completeness of an aggregated dataframe
|
|
14
|
-
Parameters
|
|
15
|
-
----------
|
|
16
|
-
df : pd.DataFrame
|
|
17
|
-
Dataframe with temporal index and values equal to the number of observations
|
|
18
|
-
in aggregation period
|
|
19
|
-
f1 : str
|
|
20
|
-
Aggregation period of data from which df is aggregated
|
|
21
|
-
f2 : str
|
|
22
|
-
Aggregation period of df
|
|
23
|
-
|
|
24
|
-
Returns
|
|
25
|
-
-------
|
|
26
|
-
pd.DataFrame : Dataframe with completeness values as a decimal fraction [0,1]
|
|
27
|
-
"""
|
|
28
|
-
# df must have temporal index
|
|
29
|
-
C = None
|
|
30
|
-
if f1 == lbl.HOURLY:
|
|
31
|
-
if f2 == lbl.DAILY:
|
|
32
|
-
C = df / 24
|
|
33
|
-
|
|
34
|
-
elif f1 == lbl.DAILY:
|
|
35
|
-
if f2 == lbl.MONTHLY:
|
|
36
|
-
C = df / E_day_in_month(df)
|
|
37
|
-
elif f2 == lbl.YEARLY:
|
|
38
|
-
C = df / E_day_in_year(df)
|
|
39
|
-
|
|
40
|
-
elif f1 == lbl.MONTHLY:
|
|
41
|
-
if f2 == lbl.YEARLY:
|
|
42
|
-
cnt = 12
|
|
43
|
-
|
|
44
|
-
elif isinstance(f1, float) and isinstance(f1, float):
|
|
45
|
-
R = f2 / f1
|
|
46
|
-
C = df / R
|
|
47
|
-
|
|
48
|
-
if C is None:
|
|
49
|
-
raise ValueError(f"Unknown aggregation period {f1} or {f2}")
|
|
50
|
-
|
|
51
|
-
return C
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
def df_has_period(f, *args, **kwargs):
|
|
55
|
-
df = args[0] if args[0] else kwargs.get('df')
|
|
56
|
-
if not isinstance(df.index, pd.PeriodIndex):
|
|
57
|
-
raise ValueError("Index must be a PeriodIndex")
|
|
58
|
-
return f(*args, **kwargs)
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
#@df_has_period
|
|
62
|
-
def E_day_in_year(df: "pd.DataFrame") -> "pd.DataFrame":
|
|
63
|
-
""" Expected number of daily observations per year """
|
|
64
|
-
leap = df.index.to_period().is_leap_year
|
|
65
|
-
days = np.atleast_2d(np.where(leap, 366, 365)).transpose()
|
|
66
|
-
result = pd.DataFrame(index=df.index,
|
|
67
|
-
columns=df.columns,
|
|
68
|
-
data=np.repeat(np.atleast_2d(days), df.shape[1], axis=1))
|
|
69
|
-
return result
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
#@df_has_period
|
|
73
|
-
def E_month_in_year(df: "pd.DataFrame") -> "pd.DataFrame":
|
|
74
|
-
""" Expected number of monthly observations per year """
|
|
75
|
-
result = pd.DataFrame(index=df.index,
|
|
76
|
-
columns=df.columns,
|
|
77
|
-
data=12)
|
|
78
|
-
return result
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
#@df_has_period
|
|
82
|
-
def E_day_in_month(df: "pd.DataFrame") -> "pd.DataFrame":
|
|
83
|
-
""" Expected number of daily observations per month """
|
|
84
|
-
nday = df.index.to_period().days_in_month
|
|
85
|
-
result = pd.DataFrame(index=df.index,
|
|
86
|
-
columns=df.columns,
|
|
87
|
-
data=np.repeat(np.atleast_2d(nday).transpose(), df.shape[1], axis=1))
|
|
88
|
-
return result
|
|
89
|
-
|
|
90
|
-
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import re
|
|
4
|
+
|
|
5
|
+
import tsp.labels as lbl
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def _is_depth_column(col_name, pattern) -> bool:
|
|
9
|
+
return bool(re.search(pattern, col_name))
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def completeness(df: pd.DataFrame, f1, f2) -> pd.DataFrame:
|
|
13
|
+
""" Calculate completeness of an aggregated dataframe
|
|
14
|
+
Parameters
|
|
15
|
+
----------
|
|
16
|
+
df : pd.DataFrame
|
|
17
|
+
Dataframe with temporal index and values equal to the number of observations
|
|
18
|
+
in aggregation period
|
|
19
|
+
f1 : str
|
|
20
|
+
Aggregation period of data from which df is aggregated
|
|
21
|
+
f2 : str
|
|
22
|
+
Aggregation period of df
|
|
23
|
+
|
|
24
|
+
Returns
|
|
25
|
+
-------
|
|
26
|
+
pd.DataFrame : Dataframe with completeness values as a decimal fraction [0,1]
|
|
27
|
+
"""
|
|
28
|
+
# df must have temporal index
|
|
29
|
+
C = None
|
|
30
|
+
if f1 == lbl.HOURLY:
|
|
31
|
+
if f2 == lbl.DAILY:
|
|
32
|
+
C = df / 24
|
|
33
|
+
|
|
34
|
+
elif f1 == lbl.DAILY:
|
|
35
|
+
if f2 == lbl.MONTHLY:
|
|
36
|
+
C = df / E_day_in_month(df)
|
|
37
|
+
elif f2 == lbl.YEARLY:
|
|
38
|
+
C = df / E_day_in_year(df)
|
|
39
|
+
|
|
40
|
+
elif f1 == lbl.MONTHLY:
|
|
41
|
+
if f2 == lbl.YEARLY:
|
|
42
|
+
cnt = 12
|
|
43
|
+
|
|
44
|
+
elif isinstance(f1, float) and isinstance(f1, float):
|
|
45
|
+
R = f2 / f1
|
|
46
|
+
C = df / R
|
|
47
|
+
|
|
48
|
+
if C is None:
|
|
49
|
+
raise ValueError(f"Unknown aggregation period {f1} or {f2}")
|
|
50
|
+
|
|
51
|
+
return C
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def df_has_period(f, *args, **kwargs):
|
|
55
|
+
df = args[0] if args[0] else kwargs.get('df')
|
|
56
|
+
if not isinstance(df.index, pd.PeriodIndex):
|
|
57
|
+
raise ValueError("Index must be a PeriodIndex")
|
|
58
|
+
return f(*args, **kwargs)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
#@df_has_period
|
|
62
|
+
def E_day_in_year(df: "pd.DataFrame") -> "pd.DataFrame":
|
|
63
|
+
""" Expected number of daily observations per year """
|
|
64
|
+
leap = df.index.to_period().is_leap_year
|
|
65
|
+
days = np.atleast_2d(np.where(leap, 366, 365)).transpose()
|
|
66
|
+
result = pd.DataFrame(index=df.index,
|
|
67
|
+
columns=df.columns,
|
|
68
|
+
data=np.repeat(np.atleast_2d(days), df.shape[1], axis=1))
|
|
69
|
+
return result
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
#@df_has_period
|
|
73
|
+
def E_month_in_year(df: "pd.DataFrame") -> "pd.DataFrame":
|
|
74
|
+
""" Expected number of monthly observations per year """
|
|
75
|
+
result = pd.DataFrame(index=df.index,
|
|
76
|
+
columns=df.columns,
|
|
77
|
+
data=12)
|
|
78
|
+
return result
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
#@df_has_period
|
|
82
|
+
def E_day_in_month(df: "pd.DataFrame") -> "pd.DataFrame":
|
|
83
|
+
""" Expected number of daily observations per month """
|
|
84
|
+
nday = df.index.to_period().days_in_month
|
|
85
|
+
result = pd.DataFrame(index=df.index,
|
|
86
|
+
columns=df.columns,
|
|
87
|
+
data=np.repeat(np.atleast_2d(nday).transpose(), df.shape[1], axis=1))
|
|
88
|
+
return result
|
|
89
|
+
|
|
90
|
+
|