tadc 2.3.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tadc/__init__.py ADDED
@@ -0,0 +1 @@
1
+ from .run import *
tadc/control_data.py ADDED
@@ -0,0 +1,97 @@
1
+ """CO-OPS API data retrieval functions for CO-OPS Datums Calculator"""
2
+
3
+ from datetime import datetime, date, time, timedelta
4
+ import numpy as np
5
+ import pandas as pd
6
+ import requests
7
+
8
+ from . import tides as tf
9
+
10
+
11
+ def Get_Monthly_Means(Control_Station_ID, Begin_Month, Begin_Year, End_Month, End_Year, Conversion):
12
+ #This function retrieves the control station's monthly means using CO-OPS data api
13
+ end_days = tf.Last_Day_In_Month(int(End_Year),int(End_Month))
14
+ if int(Begin_Month) < 10:
15
+ sb = '0'
16
+ else:
17
+ sb = ''
18
+ if int(End_Month) < 10:
19
+ se = '0'
20
+ else:
21
+ se = ''
22
+ url1 = 'https://api.tidesandcurrents.noaa.gov/api/prod/datagetter?'
23
+ url2 = 'begin_date=' + str(Begin_Year) + sb + str(Begin_Month) + '01' + '&end_date=' + str(End_Year) + se + str(End_Month) + str(end_days) + '&station=' + str(Control_Station_ID)
24
+ url3 = '&product=monthly_mean&datum=stnd&units=metric&time_zone=gmt&application=TADC&format=json'
25
+ r = requests.get(url1 + url2 + url3)
26
+ MM = pd.DataFrame(r.json()['data'])
27
+ for c in ['highest','MHHW','MHW','MSL','MLW','MLLW','lowest']:
28
+ MM[c] = MM[c].astype(float) * Conversion
29
+ MM_lists = [MM[['highest','MHHW','MHW','MSL','MLW','MLLW','lowest']].iloc[i].values.tolist() for i in range(len(MM))] # Convert to the list of lists format needed by run.py #
30
+ return MM_lists
31
+
32
+
33
+ def Get_High_Lows(Control_Station_ID, Start_DT, End_DT, gmt_offset, Conversion):
34
+ #This function retrieves control station high and low tides using CO-OPS data api
35
+
36
+ #if subordinate (short-term) station time is not in gmt, get time offset
37
+ Start_DT += timedelta(hours=gmt_offset)
38
+ End_DT += timedelta(hours=gmt_offset)
39
+
40
+ if End_DT - Start_DT > timedelta(days=365):
41
+ chunks = pd.date_range(Start_DT, End_DT, periods=int(np.ceil((End_DT - Start_DT).days/365))+1)
42
+ else:
43
+ chunks = (Start_DT, End_DT)
44
+
45
+ hl_chunks = []
46
+ for i in range(len(chunks)-1):
47
+ start_dt = chunks[i]
48
+ end_dt = chunks[i+1]
49
+
50
+ start_datestr = datetime.strftime(start_dt,'%Y%m%d')
51
+ end_datestr = datetime.strftime(end_dt,'%Y%m%d')
52
+
53
+ url1 = 'https://api.tidesandcurrents.noaa.gov/api/prod/datagetter?'
54
+ url2 = 'begin_date=' + start_datestr + '&end_date=' + end_datestr + '&station=' + str(Control_Station_ID)
55
+ url3 = '&product=High_low&datum=stnd&units=metric&time_zone=gmt&application=TADC&format=json'
56
+ r = requests.get(url1 + url2 + url3)
57
+
58
+ hl_chunks.append(pd.DataFrame(r.json()['data']))
59
+ HL = pd.concat(hl_chunks,ignore_index=True)
60
+ HL['t'] = pd.to_datetime(HL['t']) - timedelta(hours=gmt_offset)
61
+ HL['v'] = HL['v'].astype(float) * Conversion
62
+ HL['ty'] = [HL['ty'].iloc[i].replace(' ','') for i in range(len(HL))]
63
+ HL_lists = [HL[['t','v','ty']].iloc[i].values.tolist() for i in range(len(HL))] # Convert to the list of lists format needed by run.py #
64
+ return HL_lists
65
+
66
+
67
+ def Get_Accepted_Datums(Station_ID, Conversion):
68
+ #This function retrieves the accepted control station datums using CO-OPS metadata api
69
+ url = 'https://api.tidesandcurrents.noaa.gov/mdapi/prod/webapi/stations/' + str(Station_ID) + '/datums.json?units=metric'
70
+ r = requests.get(url)
71
+ datums = pd.DataFrame(r.json()['datums'])
72
+ SD = []
73
+ for datum in ['MHHW','MHW','DTL','MTL','MSL','MLW','MLLW','GT','MN','DHQ','DLQ','NAVD88','LWI','HWI']:
74
+ try:
75
+ val = datums.loc[datums['name'] == datum,'value'].values[0]
76
+ except IndexError:
77
+ SD.append(np.nan)
78
+ else:
79
+ if datum not in ['LWI','HWI']:
80
+ SD.append(val * Conversion)
81
+ else:
82
+ SD.append(val)
83
+ return SD
84
+
85
+
86
+ def Get_SubMethod(Station_ID):
87
+ #This function checks if the control station is a West coast/Pacific or East Coast/Gulf Coast/Caribbean Island station
88
+ #for choosing datum computation method
89
+ url = 'https://api.tidesandcurrents.noaa.gov/mdapi/prod/webapi/stations/' + str(Station_ID) + '.json?units=metric'
90
+ r = requests.get(url)
91
+ lon = r.json()['stations'][0]['lng']
92
+ if lon < -100:
93
+ return('Standard')
94
+ else:
95
+ return('Modified')
96
+
97
+
@@ -0,0 +1,76 @@
1
+ import matplotlib.pyplot as plt
2
+ import numpy as np
3
+ import pandas as pd
4
+
5
+
6
+ class Out:
7
+ def __init__(self, daily_extremes, extremes_type, datum, units, input_file):
8
+ self.__daily_extremes = daily_extremes
9
+ if extremes_type == 'max':
10
+ self.daily_maxs = daily_extremes
11
+ self.daily_mins = None
12
+ elif extremes_type == 'min':
13
+ self.daily_mins = daily_extremes
14
+ self.daily_maxs = None
15
+ if datum == 'Input':
16
+ self.datum = 'input datum'
17
+ else:
18
+ self.datum = datum
19
+ self.units = units
20
+ self.input_file = input_file
21
+
22
+ def percentile(self, prctile):
23
+ return self.__daily_extremes['elevation'].quantile(prctile/100)
24
+
25
+ def plot(self, prctile=None):
26
+ fig,ax = plt.subplots(1,figsize=(9,5))
27
+ ax.tick_params(axis='both',labelsize=8)
28
+ ax.grid('on',linestyle='--')
29
+ ax.plot(self.__daily_extremes['time'],self.__daily_extremes['elevation'],'-o',label='Daily max',zorder=2)
30
+ ax.set_ylabel('Elevation ('+self.units+' above '+self.datum+')',fontsize=8)
31
+ if prctile != None:
32
+ prctile_elev = self.percentile(prctile)
33
+ ax.set_xlim(ax.get_xlim())
34
+ ax.plot(ax.get_xlim(),[prctile_elev,prctile_elev],'k--',label=str(prctile)+' percentile',zorder=3)
35
+ ax.legend(fontsize=8)
36
+ if self.daily_maxs is not None:
37
+ ax.set_title('Daily Maximum Water Levels for '+self.input_file,fontsize=8)
38
+ else:
39
+ ax.set_title('Daily Minimum Water Levels for '+self.input_file,fontsize=8)
40
+ total_dt = self.__daily_extremes['time'].iloc[-1] - self.__daily_extremes['time'].iloc[0]
41
+ ticks = pd.date_range(self.__daily_extremes['time'].iloc[0],
42
+ self.__daily_extremes['time'].iloc[-1],
43
+ freq=total_dt/8)
44
+ ax.set_xlim(self.__daily_extremes['time'].iloc[0] - (total_dt/8/4),
45
+ self.__daily_extremes['time'].iloc[-1] + (total_dt/8/4))
46
+ ax.set_xticks(ticks)
47
+ fig.autofmt_xdate()
48
+ fig.show()
49
+ return fig
50
+
51
+
52
+ def run(extremes_type, datum, data, datums, units, input_file):
53
+ datums['Input'] = 0
54
+
55
+ # Get timestamps into a usable format #
56
+ data = data.rename(columns={data.columns[0]:'time',data.columns[1]:'val'})
57
+ data['time'] = pd.to_datetime(data['time'])
58
+ data = data.replace(-99999.99, np.nan)
59
+
60
+ # Put the data onto the threshold datum and onto MHHW #
61
+ data_dwant = pd.DataFrame({'time':data['time'],'val':data['val']-datums[datum]})
62
+
63
+ # Calc daily maxes #
64
+ data_dwant = data_dwant.set_index('time')
65
+ interval_hrs = (data_dwant.index[1] - data_dwant.index[0]).seconds/3600
66
+ n = data_dwant.groupby(data_dwant.index.date)['val'].size()
67
+ per_complete = n / (24 / interval_hrs) * 100
68
+ if extremes_type == 'max':
69
+ dmi = data_dwant.groupby(data_dwant.index.date)['val'].idxmax()
70
+ elif extremes_type == 'min':
71
+ dmi = data_dwant.groupby(data_dwant.index.date)['val'].idxmin()
72
+ dm = data_dwant.loc[dmi].reset_index()
73
+ dm = dm.rename(columns={'time':'time','val':'elevation'})
74
+ dm['completeness'] = per_complete.values
75
+
76
+ return Out(dm, extremes_type, datum, units, input_file)
tadc/filter_defs.py ADDED
@@ -0,0 +1,18 @@
1
+ #
2
+ # Filter definitions for Datums Calculator Tide picker
3
+ #
4
+ #
5
+ from scipy.signal import butter, filtfilt
6
+
7
+ #Butterworth digital filter design.
8
+ def butter_lowpass(cutOff, fs, order=5):
9
+ nyq = 0.5 * fs
10
+ normalCutoff = cutOff / nyq
11
+ b, a = butter(order, normalCutoff, btype='low', analog = False)
12
+ return b, a
13
+
14
+ def butter_lowpass_filter(data, cutOff, fs, order=4):
15
+ b, a = butter_lowpass(cutOff, fs, order=order)
16
+ y = filtfilt(b, a, data)
17
+ return y
18
+
@@ -0,0 +1,211 @@
1
+ from datetime import datetime, timedelta
2
+ import matplotlib.pyplot as plt
3
+ import matplotlib.dates as mdates
4
+ import numpy as np
5
+ import pandas as pd
6
+
7
+
8
+ class Out:
9
+ def __init__(self, inundations, datum, units, input_file, fun_inps):
10
+ self.inundations = inundations
11
+ if datum == 'Input':
12
+ self.datum = 'input datum'
13
+ else:
14
+ self.datum = datum
15
+ self.units = units
16
+ self.input_file = input_file
17
+ self.__fun_inps = fun_inps
18
+
19
+ def plot(self):
20
+ fig_picks,ax = plt.subplots(1,figsize=(9,5))
21
+ ax.plot(self.__fun_inps['data']['time'],self.__fun_inps['data']['val'] - self.__fun_inps['datums'][self.__fun_inps['threshold_datum']],zorder=2,label='Data')
22
+ ax.plot(ax.get_xlim(),[self.__fun_inps['threshold'],self.__fun_inps['threshold']],'k--',zorder=3,label='Threshold')
23
+ ax.set_ylabel('Elevation ('+self.units+' above '+self.datum+')',fontsize=8)
24
+ ax.grid('on',linestyle='--')
25
+ ax.tick_params(axis='both',labelsize=8)
26
+ ax.plot(self.inundations['Period Start'],np.tile(self.__fun_inps['threshold'],len(self.inundations)),'ks',markerfacecolor='r',zorder=2,label='Threshold crossing')
27
+ ax.plot(self.inundations['Period End'],np.tile(self.__fun_inps['threshold'],len(self.inundations)),'ks',markerfacecolor='r',zorder=2)
28
+ ax.legend(fontsize=8)
29
+ total_dt = self.__fun_inps['data']['time'].iloc[-1] - self.__fun_inps['data']['time'].iloc[0]
30
+ ticks = pd.date_range(self.__fun_inps['data']['time'].iloc[0],
31
+ self.__fun_inps['data']['time'].iloc[-1],
32
+ freq=total_dt/8)
33
+ ax.set_xlim(self.__fun_inps['data']['time'].iloc[0] - (total_dt/8/4),
34
+ self.__fun_inps['data']['time'].iloc[-1] + (total_dt/8/4))
35
+ ax.set_xticks(ticks)
36
+ fig_picks.autofmt_xdate()
37
+ ax.set_title(('Inundation History for '+self.input_file+'\n' +
38
+ 'Threshold = ' + str(self.__fun_inps['threshold']) + ' '+self.units+' above ' + self.datum + '\n' +
39
+ 'Time range = ' + datetime.strftime(self.__fun_inps['data']['time'].iloc[0],'%Y-%m-%d') + ' to ' + datetime.strftime(self.__fun_inps['data']['time'].iloc[-1],'%Y-%m-%d') + '\n' +
40
+ 'Results: ' + str(len(self.inundations)) + ' Inundations. Total Duration = ' + str(round(self.inundations['Duration (hours)'].sum(),2)) + ' hours '+
41
+ '(' + str(round(self.inundations['Duration (hours)'].sum()/((self.__fun_inps['data']['time'].iloc[-1]- self.__fun_inps['data']['time'].iloc[0]).total_seconds()/60/60)*100,2)) + '%)'),
42
+ fontsize=8, fontweight='normal', loc='left', ha='left')
43
+ fig_picks.show()
44
+
45
+
46
+ fig_d_vs_h,axx = plt.subplots(1)
47
+ axx.plot(self.inundations['Duration (hours)'],self.inundations['Maximum Elevation Above Threshold'],'ko',markerfacecolor='gray',zorder=2)
48
+ axx.grid('on',linestyle='--')
49
+ axx.set_xlabel('Duration of Inundation (Hours)',fontsize=8)
50
+ axx.set_ylabel('Maximum Elevation ('+self.units+') Above Threshold',fontsize=8)
51
+ axx.tick_params(axis='both',labelsize=8)
52
+ axx.set_ylim(0,axx.get_ylim()[-1])
53
+ axx.set_title(('Maximum Elevation vs. Duration of Inundation for '+self.input_file+'\n' +
54
+ 'Threshold = ' + str(self.__fun_inps['threshold']) + ' '+self.units+' above ' + self.datum + '\n' +
55
+ 'Time range = ' + datetime.strftime(self.__fun_inps['data']['time'].iloc[0],'%Y-%m-%d') + ' to ' + datetime.strftime(self.__fun_inps['data']['time'].iloc[-1],'%Y-%m-%d') + '\n' +
56
+ 'Results: ' + str(len(self.inundations)) + ' Inundations. Total Duration = ' + str(round(self.inundations['Duration (hours)'].sum(),2)) + ' hours '+
57
+ '(' + str(round(self.inundations['Duration (hours)'].sum()/((self.__fun_inps['data']['time'].iloc[-1]- self.__fun_inps['data']['time'].iloc[0]).total_seconds()/60/60)*100,2)) + '%)'),
58
+ fontsize=8, fontweight='normal', loc='left', ha='left')
59
+ fig_d_vs_h.show()
60
+
61
+ if self.units == 'Feet' or self.units == 'feet' or self.units == 'ft':
62
+ bs = 0.1
63
+ elif self.units == 'Inches' or self.units == 'inches' or self.units == 'in':
64
+ bs = 1
65
+ elif self.units == 'Meters' or self.units == 'meters' or self.units == 'm':
66
+ bs = 0.05
67
+ elif self.units == 'Centimeters' or self.units == 'centimeters' or self.units == 'cm':
68
+ bs = 5
69
+ elif self.units == 'Millimeters' or self.units == 'millimeters' or self.units == 'mm':
70
+ bs = 50
71
+ bins = np.arange(0,self.inundations['Maximum Elevation Above Threshold'].max()+bs,bs)
72
+ labels = [
73
+ f"{start if i == 0 else start + 0.001:.3f} - {end:.3f}"
74
+ for i, (start, end) in enumerate(zip(bins[:-1], bins[1:]))
75
+ ]
76
+ bin_vals = pd.cut(self.inundations['Maximum Elevation Above Threshold'],bins,right=True,labels=labels)
77
+ bin_counts = bin_vals.value_counts(sort=False)
78
+ reverse_cumsum = bin_counts.sort_index(ascending=False).cumsum().sort_index()
79
+ reverse_percentages = (reverse_cumsum / bin_counts.sum()) * 100
80
+ fig_f_of_elev,axxx = plt.subplots(1,figsize=(9,5))
81
+ axxx2 = axxx.twinx()
82
+ fig_f_of_elev.subplots_adjust(bottom=0.2)
83
+ axxx.tick_params(axis='x',rotation=25)
84
+ axxx.tick_params(axis='both',labelsize=8)
85
+ l1 = bin_counts.plot.bar(ax=axxx, width=0.9, edgecolor='black', color='royalblue',
86
+ rot=45, label='Frequency',zorder=2)
87
+ axxx.set_xlabel('Inundation Exceedance ('+self.units+')',fontsize=8)
88
+ axxx.set_ylabel('Frequency',fontsize=8)
89
+ axxx.set_ylim(0,bin_counts.max()+0.5)
90
+ l2 = reverse_percentages.plot.line(ax=axxx2, color='red', marker='o', linewidth=2,
91
+ label='Percentage of Inundations',zorder=2)
92
+ axxx2.set_ylim(0,105)
93
+ axxx2.set_ylabel('Percentage of Inundation',fontsize=8)
94
+ axxx2.tick_params(axis='both',labelsize=8)
95
+ axxx.grid('on',linestyle='--')
96
+ axxx.set_title(('Frequency of Elevations for '+self.input_file+'\n' +
97
+ 'Threshold = ' + str(self.__fun_inps['threshold']) + ' '+self.units+' above ' + self.datum + '\n' +
98
+ 'Time range = ' + datetime.strftime(self.__fun_inps['data']['time'].iloc[0],'%Y-%m-%d') + ' to ' + datetime.strftime(self.__fun_inps['data']['time'].iloc[-1],'%Y-%m-%d') + '\n' +
99
+ 'Results: ' + str(len(self.inundations)) + ' Inundations. Total Duration = ' + str(round(self.inundations['Duration (hours)'].sum(),2)) + ' hours '+
100
+ '(' + str(round(self.inundations['Duration (hours)'].sum()/((self.__fun_inps['data']['time'].iloc[-1]- self.__fun_inps['data']['time'].iloc[0]).total_seconds()/60/60)*100,2)) + '%)'),
101
+ fontsize=8, fontweight='normal', loc='left', ha='left')
102
+ handles1, labels1 = axxx.get_legend_handles_labels()
103
+ handles2, labels2 = axxx2.get_legend_handles_labels()
104
+ axxx.legend(handles1 + handles2, labels1 + labels2, loc='upper right',fontsize=8)
105
+ fig_f_of_elev.show()
106
+
107
+ bs = 1
108
+ bins = np.arange(0,self.inundations['Duration (hours)'].max()+bs,bs)
109
+ labels = [
110
+ f"{start if i == 0 else start + 0.001:.3f} - {end:.3f}"
111
+ for i, (start, end) in enumerate(zip(bins[:-1], bins[1:]))
112
+ ]
113
+ bin_vals = pd.cut(self.inundations['Duration (hours)'],bins,right=True,labels=labels)
114
+ bin_counts = bin_vals.value_counts(sort=False)
115
+ reverse_cumsum = bin_counts.sort_index(ascending=False).cumsum().sort_index()
116
+ reverse_percentages = (reverse_cumsum / bin_counts.sum()) * 100
117
+ fig_f_of_dur,axxx = plt.subplots(1,figsize=(9,5))
118
+ axxx2 = axxx.twinx()
119
+ fig_f_of_dur.subplots_adjust(bottom=0.2)
120
+ axxx.tick_params(axis='x',rotation=25)
121
+ axxx.tick_params(axis='both',labelsize=8)
122
+ l1 = bin_counts.plot.bar(ax=axxx, width=0.9, edgecolor='black', color='royalblue',
123
+ rot=45, label='Frequency',zorder=2)
124
+ axxx.set_xlabel('Inundation Exceedance (Hours)',fontsize=8)
125
+ axxx.set_ylabel('Frequency',fontsize=8)
126
+ axxx.set_ylim(0,bin_counts.max()+0.5)
127
+ l2 = reverse_percentages.plot.line(ax=axxx2, color='red', marker='o', linewidth=2,
128
+ label='Percentage of Inundations',zorder=2)
129
+ axxx2.set_ylim(0,105)
130
+ axxx2.set_ylabel('Percentage of Inundation',fontsize=8)
131
+ axxx2.tick_params(axis='both',labelsize=8)
132
+ axxx.grid('on',linestyle='--')
133
+ axxx.set_title(('Frequency of Durations for '+self.input_file+'\n' +
134
+ 'Threshold = ' + str(self.__fun_inps['threshold']) + ' '+self.units+' above ' + self.datum + '\n' +
135
+ 'Time range = ' + datetime.strftime(self.__fun_inps['data']['time'].iloc[0],'%Y-%m-%d') + ' to ' + datetime.strftime(self.__fun_inps['data']['time'].iloc[-1],'%Y-%m-%d') + '\n' +
136
+ 'Results: ' + str(len(self.inundations)) + ' Inundations. Total Duration = ' + str(round(self.inundations['Duration (hours)'].sum(),2)) + ' hours '+
137
+ '(' + str(round(self.inundations['Duration (hours)'].sum()/((self.__fun_inps['data']['time'].iloc[-1]- self.__fun_inps['data']['time'].iloc[0]).total_seconds()/60/60)*100,2)) + '%)'),
138
+ fontsize=8, fontweight='normal', loc='left', ha='left')
139
+ handles1, labels1 = axxx.get_legend_handles_labels()
140
+ handles2, labels2 = axxx2.get_legend_handles_labels()
141
+ axxx.legend(handles1 + handles2, labels1 + labels2, loc='upper right',fontsize=8)
142
+ fig_f_of_dur.show()
143
+ return [fig_picks, fig_d_vs_h, fig_f_of_elev, fig_f_of_dur]
144
+
145
+
146
+ def run(threshold, threshold_datum, data, datums, high_lows, units, input_file):
147
+ datums['Input'] = 0
148
+
149
+ # Get timestamps into a usable format #
150
+ data = data.rename(columns={data.columns[0]:'time',data.columns[1]:'val'})
151
+ data['time'] = pd.to_datetime(data['time'])
152
+ data = data.replace(-99999.99, np.nan)
153
+
154
+ # Put the data onto the threshold datum and onto MHHW #
155
+ data_mhhw = pd.DataFrame({'time':data['time'],'val':data['val']-datums['MHHW']})
156
+ data_dwant = pd.DataFrame({'time':data['time'],'val':data['val']-datums[threshold_datum]})
157
+
158
+ # Separate threshold exceedances into temporally separate groups #
159
+ up_crosses = np.where((data_dwant['val']>threshold) & (data_dwant['val'].shift(1)<=threshold))[0]
160
+ down_crosses = np.where((data_dwant['val']<=threshold) & (data_dwant['val'].shift(1)>threshold))[0]
161
+ if len(up_crosses) == 0 and len(down_crosses) == 0:
162
+ return pd.DataFrame()
163
+ else:
164
+ exceedance_groups = []
165
+ i_up = 0
166
+ i_down = 0
167
+ while i_up < min(len(up_crosses)-1,len(down_crosses)) and i_down < min(len(up_crosses)-1,len(down_crosses)):
168
+ group_start = up_crosses[i_up]
169
+ if up_crosses[i_up] < down_crosses[i_down] < up_crosses[i_up+1]:
170
+ group_end = down_crosses[i_down]
171
+ group = data_dwant.iloc[group_start:group_end]
172
+ exceedance_groups.append(group)
173
+ else:
174
+ i_up -= 1
175
+ i_up += 1
176
+ i_down += 1
177
+ group_start_final = up_crosses[np.argmin(np.abs(down_crosses[-1]-up_crosses))]
178
+ group_end_final = down_crosses[-1]
179
+ group_final = data_dwant.iloc[group_start_final:group_end_final]
180
+ exceedance_groups.append(group_final)
181
+
182
+ # For each exceedance, get the first, last, and peak times and format to nice DataFrame #
183
+ c = -1
184
+ for group in exceedance_groups:
185
+ c += 1
186
+ up_cross_time = group.iloc[0]['time']
187
+ down_cross_time = group.iloc[-1]['time']
188
+ peak_time = group['time'].iloc[group['val'].argmax()]
189
+ d = (high_lows['time']-peak_time.replace(tzinfo=None)).abs().min()
190
+ di = (high_lows['time']-peak_time.replace(tzinfo=None)).abs().argmin()
191
+ if d<timedelta(minutes=30):
192
+ tide_type = high_lows.iloc[di]['tide type']
193
+ else:
194
+ tide_type = 'Unknown'
195
+ row = pd.DataFrame({'Peak Date/Time':group['time'].iloc[group['val'].argmax()],
196
+ 'Period Start':[up_cross_time],
197
+ 'Period End':down_cross_time,
198
+ 'Duration (hours)':(down_cross_time - up_cross_time).total_seconds()/60/60,
199
+ 'Maximum Elevation Above Threshold':group['val'].max() - threshold,
200
+ 'Maximum Elevation (MHHW)':data_mhhw['val'][data_mhhw['time'] == group['time'].iloc[group['val'].argmax()]].values[0],
201
+ 'Tide Type':tide_type})
202
+ if c == 0:
203
+ inundations = row
204
+ else:
205
+ inundations = pd.concat([inundations,row],ignore_index=True)
206
+
207
+ return Out(inundations, threshold_datum, units, input_file, {'threshold' : threshold , 'threshold_datum' : threshold_datum , 'data' : data , 'datums' : datums})
208
+
209
+
210
+
211
+
tadc/qa.py ADDED
@@ -0,0 +1,71 @@
1
+ import datetime
2
+ import logging
3
+ import numpy as np
4
+ import pandas as pd
5
+ import requests
6
+ logger = logging.getLogger(__name__)
7
+
8
+
9
+ class Assurances:
10
+ def __init__(self, ts, resample_minutes):
11
+ self.ts = ts
12
+ self.resample_minutes = resample_minutes
13
+
14
+ def assure_no_unreadable_values(self):
15
+ ts = self.ts.rename(columns={self.ts.columns[0]:'time',self.ts.columns[1]:'val'})
16
+ try:
17
+ ts['val'].astype(float)
18
+ except Exception as error:
19
+ bad_val = error.args[0].split(':')[1].replace(' ','').replace("'","")
20
+ ts.loc[ts['val']==bad_val,'val'] = 'NaN'
21
+ ts['val'] = ts['val'].astype(float)
22
+ logger.warning('WARNING: Unreadable value found in data: ' + bad_val + '. Replacing all occurrences with NaNs.')
23
+ self.ts = ts
24
+
25
+ def assure_even_temporal_spacing(self):
26
+ def resamp(ts, interval_want, interval_mean):
27
+ if interval_want < interval_mean:
28
+ logger.warning('WARNING: Input resampling rate is higher than data sampling rate. This may result in unstable behavior. Consider resampling to a lower rate.')
29
+ ti = pd.date_range(ts['time'].iloc[0],ts['time'].iloc[-1],freq=interval_want)
30
+ ts_interp = ts.set_index('time').reindex(ti,method='nearest',tolerance=interval_mean).reset_index().rename(columns={'index':'time'}) # Reinterpolate being careful to preserve gaps #
31
+ return ts_interp
32
+ ts = self.ts
33
+ ts['time'] = pd.to_datetime(ts['time'])
34
+ ts = ts.groupby('time').first().reset_index() # Remove duplicates #
35
+ time_diffs_all = ts['time'].diff()
36
+ interval_mean = ts['time'].diff().mean().floor('min')
37
+ if len(time_diffs_all.unique().dropna()) > 1:
38
+ if self.resample_minutes is None:
39
+ interval_want = interval_mean
40
+ logger.warning('WARNING: Input timeseries has uneven temporal spacing. Re-interpolating to a spacing of ' + str(round(interval_want.seconds/60, 2)) + ' minutes per sample.')
41
+
42
+ else:
43
+ interval_want = pd.Timedelta(minutes = self.resample_minutes)
44
+ logger.warning('WARNING: Input timeseries has uneven temporal spacing. Re-interpolating to a spacing of ' + str(round(interval_want.seconds/60, 2)) + ' minutes per sample.')
45
+ self.ts = resamp(ts, interval_want, interval_mean)
46
+ else:
47
+ if self.resample_minutes is not None:
48
+ interval_want = pd.Timedelta(minutes = self.resample_minutes)
49
+ logger.warning('WARNING: Re-interpolating to a spacing of ' + str(round(interval_want.seconds/60, 2)) + ' minutes per sample.')
50
+ self.ts = resamp(ts, interval_want, interval_mean)
51
+ else:
52
+ self.ts = ts
53
+
54
+ def assure_flatlines_are_gaps(self):
55
+ is_flatline = self.ts['val'].diff().abs() < 0.001
56
+ consecutive_groups = is_flatline.ne(is_flatline.shift()).cumsum()
57
+ group_sizes = consecutive_groups.groupby(consecutive_groups).transform('size')
58
+ is_long_flatline = is_flatline & (group_sizes > 1)
59
+ is_end_of_run = is_long_flatline & is_long_flatline.ne(is_long_flatline.shift(-1))
60
+ is_long_flatline_final = is_long_flatline & ~is_end_of_run
61
+ if is_long_flatline_final.sum() > 0:
62
+ self.ts.loc[is_long_flatline_final,'val'] = np.nan
63
+ logger.warning('WARNING: Flatlines detected. Treating flatlines as missing data.')
64
+
65
+
66
+ def run(ts, resample_minutes):
67
+ assurances = Assurances(ts, resample_minutes)
68
+ assurances.assure_no_unreadable_values()
69
+ assurances.assure_even_temporal_spacing()
70
+ assurances.assure_flatlines_are_gaps()
71
+ return assurances.ts
tadc/qc.py ADDED
@@ -0,0 +1,54 @@
1
+ import datetime
2
+ import logging
3
+ import numpy as np
4
+ import pandas as pd
5
+ import requests
6
+ from scipy.signal import periodogram
7
+ logger = logging.getLogger(__name__)
8
+
9
+
10
+ class Tests:
11
+ def __init__(self, ts, control_station_id, subordinate_lat, subordinate_lon):
12
+ self.ts = ts
13
+ self.control_station_id = control_station_id
14
+ self.subordinate_lat = subordinate_lat
15
+ self.subordinate_lon = subordinate_lon
16
+
17
+ def check_csv_format(self):
18
+ if len(self.ts.columns) != 2:
19
+ raise RuntimeError("Input csv file must contain two columns: time and water level")
20
+ else:
21
+ self.ts = self.ts.rename(columns={self.ts.columns[0]:'time',self.ts.columns[1]:'val'})
22
+
23
+ def check_date_format(self):
24
+ try:
25
+ self.ts['time'] = pd.to_datetime(self.ts['time'])
26
+ except ValueError:
27
+ raise ValueError("Timestamps could not be interpreted.")
28
+
29
+ def check_control_station_distance(self):
30
+ if self.control_station_id != None:
31
+ r = requests.get('https://api.tidesandcurrents.noaa.gov/mdapi/prod/webapi/stations/' + str(self.control_station_id) + '.json?units=english')
32
+ lat_control = r.json()['stations'][0]['lat']
33
+ lon_control = r.json()['stations'][0]['lng']
34
+ d = self._haversine(self.subordinate_lat,self.subordinate_lon,lat_control,lon_control)
35
+ if d > 10:
36
+ logger.warn('WARNING: Control station is ' + str(round(d,2)) + ' km from subordinate station.')
37
+
38
+ @staticmethod
39
+ def _haversine(lat1, lon1, lat2, lon2):
40
+ lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
41
+ R = 6371
42
+ dlat = lat2 - lat1
43
+ dlon = lon2 - lon1
44
+ a = np.sin(dlat/2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2.0)**2
45
+ c = 2 * np.arcsin(np.sqrt(a))
46
+ km = R * c
47
+ return km
48
+
49
+
50
+ def run(ts, control_station_id, subordinate_lat, subordinate_lon):
51
+ tests = Tests(ts, control_station_id, subordinate_lat, subordinate_lon)
52
+ tests.check_csv_format()
53
+ tests.check_date_format()
54
+ tests.check_control_station_distance()