PyPI - tadc - Versions diffs - 2.3.5__py3-none-any.whl - Mend

tadc 2.3.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

tadc/__init__.py +1 -0
tadc/control_data.py +97 -0
tadc/daily_extremes_analysis.py +76 -0
tadc/filter_defs.py +18 -0
tadc/inundation_analysis.py +211 -0
tadc/qa.py +71 -0
tadc/qc.py +54 -0
tadc/run.py +1433 -0
tadc/tides.py +627 -0
tadc-2.3.5.dist-info/METADATA +116 -0
tadc-2.3.5.dist-info/RECORD +14 -0
tadc-2.3.5.dist-info/WHEEL +5 -0
tadc-2.3.5.dist-info/licenses/LICENSE.txt +1 -0
tadc-2.3.5.dist-info/top_level.txt +1 -0

tadc/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .run import *

tadc/control_data.py ADDED Viewed

@@ -0,0 +1,97 @@
+"""CO-OPS API data retrieval functions for CO-OPS Datums Calculator"""
+from datetime import datetime, date, time, timedelta
+import numpy as np
+import pandas as pd
+import requests
+from . import tides as tf
+def Get_Monthly_Means(Control_Station_ID, Begin_Month, Begin_Year, End_Month, End_Year, Conversion):
+    #This function retrieves the control station's monthly means using CO-OPS data api
+    end_days = tf.Last_Day_In_Month(int(End_Year),int(End_Month))
+    if int(Begin_Month) < 10:
+        sb = '0'
+    else:
+        sb = ''
+    if int(End_Month) < 10:
+        se = '0'
+    else:
+        se = ''
+    url1 = 'https://api.tidesandcurrents.noaa.gov/api/prod/datagetter?'
+    url2 = 'begin_date=' + str(Begin_Year) + sb + str(Begin_Month) + '01' + '&end_date=' + str(End_Year) + se + str(End_Month) + str(end_days) + '&station=' + str(Control_Station_ID)
+    url3 = '&product=monthly_mean&datum=stnd&units=metric&time_zone=gmt&application=TADC&format=json'
+    r = requests.get(url1 + url2 + url3)
+    MM = pd.DataFrame(r.json()['data'])
+    for c in ['highest','MHHW','MHW','MSL','MLW','MLLW','lowest']:
+        MM[c]  = MM[c].astype(float) * Conversion
+    MM_lists = [MM[['highest','MHHW','MHW','MSL','MLW','MLLW','lowest']].iloc[i].values.tolist() for i in range(len(MM))]  # Convert to the list of lists format needed by run.py #
+    return MM_lists
+def Get_High_Lows(Control_Station_ID, Start_DT, End_DT, gmt_offset, Conversion):
+    #This function retrieves control station high and low tides using CO-OPS data api
+    #if subordinate (short-term) station time is not in gmt, get time offset
+    Start_DT += timedelta(hours=gmt_offset)
+    End_DT += timedelta(hours=gmt_offset)
+    if End_DT - Start_DT > timedelta(days=365):
+        chunks = pd.date_range(Start_DT, End_DT, periods=int(np.ceil((End_DT - Start_DT).days/365))+1)
+    else:
+        chunks = (Start_DT, End_DT)
+    hl_chunks = []
+    for i in range(len(chunks)-1):
+        start_dt = chunks[i]
+        end_dt = chunks[i+1]
+        start_datestr = datetime.strftime(start_dt,'%Y%m%d')
+        end_datestr = datetime.strftime(end_dt,'%Y%m%d')
+        url1 = 'https://api.tidesandcurrents.noaa.gov/api/prod/datagetter?'
+        url2 = 'begin_date=' + start_datestr + '&end_date=' + end_datestr + '&station=' + str(Control_Station_ID)
+        url3 = '&product=High_low&datum=stnd&units=metric&time_zone=gmt&application=TADC&format=json'
+        r = requests.get(url1 + url2 + url3)
+        hl_chunks.append(pd.DataFrame(r.json()['data']))
+    HL = pd.concat(hl_chunks,ignore_index=True)
+    HL['t'] = pd.to_datetime(HL['t']) - timedelta(hours=gmt_offset)
+    HL['v']  = HL['v'].astype(float) * Conversion
+    HL['ty'] = [HL['ty'].iloc[i].replace(' ','') for i in range(len(HL))]
+    HL_lists = [HL[['t','v','ty']].iloc[i].values.tolist() for i in range(len(HL))]  # Convert to the list of lists format needed by run.py #
+    return HL_lists
+def Get_Accepted_Datums(Station_ID, Conversion):
+    #This function retrieves the accepted control station datums using CO-OPS metadata api
+    url = 'https://api.tidesandcurrents.noaa.gov/mdapi/prod/webapi/stations/' + str(Station_ID) + '/datums.json?units=metric'
+    r = requests.get(url)
+    datums = pd.DataFrame(r.json()['datums'])
+    SD = []
+    for datum in ['MHHW','MHW','DTL','MTL','MSL','MLW','MLLW','GT','MN','DHQ','DLQ','NAVD88','LWI','HWI']:
+        try:
+            val = datums.loc[datums['name'] == datum,'value'].values[0]
+        except IndexError:
+            SD.append(np.nan)
+        else:
+            if datum not in ['LWI','HWI']:
+                SD.append(val * Conversion)
+            else:
+                SD.append(val)
+    return SD
+def Get_SubMethod(Station_ID):
+    #This function checks if the control station is a West coast/Pacific or  East Coast/Gulf Coast/Caribbean Island station
+    #for choosing datum computation method
+    url = 'https://api.tidesandcurrents.noaa.gov/mdapi/prod/webapi/stations/' + str(Station_ID) + '.json?units=metric'
+    r = requests.get(url)
+    lon = r.json()['stations'][0]['lng']
+    if lon < -100:
+        return('Standard')
+    else:
+        return('Modified')

tadc/daily_extremes_analysis.py ADDED Viewed

@@ -0,0 +1,76 @@
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+class Out:
+    def __init__(self, daily_extremes, extremes_type, datum, units, input_file):
+        self.__daily_extremes = daily_extremes
+        if extremes_type == 'max':
+            self.daily_maxs = daily_extremes
+            self.daily_mins = None
+        elif extremes_type == 'min':
+            self.daily_mins = daily_extremes
+            self.daily_maxs = None
+        if datum == 'Input':
+            self.datum = 'input datum'
+        else:
+            self.datum = datum
+        self.units = units
+        self.input_file = input_file
+    def percentile(self, prctile):
+        return self.__daily_extremes['elevation'].quantile(prctile/100)
+    def plot(self, prctile=None):
+        fig,ax = plt.subplots(1,figsize=(9,5))
+        ax.tick_params(axis='both',labelsize=8)
+        ax.grid('on',linestyle='--')
+        ax.plot(self.__daily_extremes['time'],self.__daily_extremes['elevation'],'-o',label='Daily max',zorder=2)
+        ax.set_ylabel('Elevation ('+self.units+' above '+self.datum+')',fontsize=8)
+        if prctile != None:
+            prctile_elev = self.percentile(prctile)
+            ax.set_xlim(ax.get_xlim())
+            ax.plot(ax.get_xlim(),[prctile_elev,prctile_elev],'k--',label=str(prctile)+' percentile',zorder=3)
+            ax.legend(fontsize=8)
+        if self.daily_maxs is not None:
+            ax.set_title('Daily Maximum Water Levels for '+self.input_file,fontsize=8)
+        else:
+            ax.set_title('Daily Minimum Water Levels for '+self.input_file,fontsize=8)
+        total_dt = self.__daily_extremes['time'].iloc[-1] - self.__daily_extremes['time'].iloc[0]
+        ticks = pd.date_range(self.__daily_extremes['time'].iloc[0],
+                              self.__daily_extremes['time'].iloc[-1],
+                              freq=total_dt/8)
+        ax.set_xlim(self.__daily_extremes['time'].iloc[0] - (total_dt/8/4),
+                    self.__daily_extremes['time'].iloc[-1] + (total_dt/8/4))
+        ax.set_xticks(ticks)
+        fig.autofmt_xdate()
+        fig.show()
+        return fig
+def run(extremes_type, datum, data, datums, units, input_file):
+    datums['Input'] = 0
+    # Get timestamps into a usable format #
+    data = data.rename(columns={data.columns[0]:'time',data.columns[1]:'val'})
+    data['time'] = pd.to_datetime(data['time'])
+    data = data.replace(-99999.99, np.nan)
+    # Put the data onto the threshold datum and onto MHHW #
+    data_dwant = pd.DataFrame({'time':data['time'],'val':data['val']-datums[datum]})
+    # Calc daily maxes #
+    data_dwant = data_dwant.set_index('time')
+    interval_hrs = (data_dwant.index[1] - data_dwant.index[0]).seconds/3600
+    n = data_dwant.groupby(data_dwant.index.date)['val'].size()
+    per_complete = n / (24 / interval_hrs) * 100
+    if extremes_type == 'max':
+        dmi = data_dwant.groupby(data_dwant.index.date)['val'].idxmax()
+    elif extremes_type == 'min':
+        dmi = data_dwant.groupby(data_dwant.index.date)['val'].idxmin()
+    dm = data_dwant.loc[dmi].reset_index()
+    dm = dm.rename(columns={'time':'time','val':'elevation'})
+    dm['completeness'] = per_complete.values
+    return Out(dm, extremes_type, datum, units, input_file)

tadc/filter_defs.py ADDED Viewed

@@ -0,0 +1,18 @@
+#
+# Filter definitions for Datums Calculator Tide picker
+#
+#
+from scipy.signal import butter, filtfilt
+#Butterworth digital filter design.
+def butter_lowpass(cutOff, fs, order=5):
+    nyq = 0.5 * fs
+    normalCutoff = cutOff / nyq
+    b, a = butter(order, normalCutoff, btype='low', analog = False)
+    return b, a
+def butter_lowpass_filter(data, cutOff, fs, order=4):
+    b, a = butter_lowpass(cutOff, fs, order=order)
+    y = filtfilt(b, a, data)
+    return y

tadc/inundation_analysis.py ADDED Viewed

@@ -0,0 +1,211 @@
+from datetime import datetime, timedelta
+import matplotlib.pyplot as plt
+import matplotlib.dates as mdates
+import numpy as np
+import pandas as pd
+class Out:
+    def __init__(self, inundations, datum, units, input_file, fun_inps):
+        self.inundations = inundations
+        if datum == 'Input':
+            self.datum = 'input datum'
+        else:
+            self.datum = datum
+        self.units = units
+        self.input_file = input_file
+        self.__fun_inps = fun_inps
+    def plot(self):
+        fig_picks,ax = plt.subplots(1,figsize=(9,5))
+        ax.plot(self.__fun_inps['data']['time'],self.__fun_inps['data']['val'] - self.__fun_inps['datums'][self.__fun_inps['threshold_datum']],zorder=2,label='Data')
+        ax.plot(ax.get_xlim(),[self.__fun_inps['threshold'],self.__fun_inps['threshold']],'k--',zorder=3,label='Threshold')
+        ax.set_ylabel('Elevation ('+self.units+' above '+self.datum+')',fontsize=8)
+        ax.grid('on',linestyle='--')
+        ax.tick_params(axis='both',labelsize=8)
+        ax.plot(self.inundations['Period Start'],np.tile(self.__fun_inps['threshold'],len(self.inundations)),'ks',markerfacecolor='r',zorder=2,label='Threshold crossing')
+        ax.plot(self.inundations['Period End'],np.tile(self.__fun_inps['threshold'],len(self.inundations)),'ks',markerfacecolor='r',zorder=2)
+        ax.legend(fontsize=8)
+        total_dt = self.__fun_inps['data']['time'].iloc[-1] - self.__fun_inps['data']['time'].iloc[0]
+        ticks = pd.date_range(self.__fun_inps['data']['time'].iloc[0],
+                              self.__fun_inps['data']['time'].iloc[-1],
+                              freq=total_dt/8)
+        ax.set_xlim(self.__fun_inps['data']['time'].iloc[0] - (total_dt/8/4),
+                    self.__fun_inps['data']['time'].iloc[-1] + (total_dt/8/4))
+        ax.set_xticks(ticks)
+        fig_picks.autofmt_xdate()
+        ax.set_title(('Inundation History for '+self.input_file+'\n' +
+              'Threshold = ' + str(self.__fun_inps['threshold']) + ' '+self.units+' above ' + self.datum + '\n' +
+              'Time range =  ' + datetime.strftime(self.__fun_inps['data']['time'].iloc[0],'%Y-%m-%d') + ' to ' +  datetime.strftime(self.__fun_inps['data']['time'].iloc[-1],'%Y-%m-%d') + '\n' +
+              'Results: ' + str(len(self.inundations)) + ' Inundations. Total Duration = ' + str(round(self.inundations['Duration (hours)'].sum(),2)) + ' hours '+
+              '(' + str(round(self.inundations['Duration (hours)'].sum()/((self.__fun_inps['data']['time'].iloc[-1]- self.__fun_inps['data']['time'].iloc[0]).total_seconds()/60/60)*100,2)) + '%)'),
+              fontsize=8, fontweight='normal', loc='left', ha='left')
+        fig_picks.show()
+        fig_d_vs_h,axx = plt.subplots(1)
+        axx.plot(self.inundations['Duration (hours)'],self.inundations['Maximum Elevation Above Threshold'],'ko',markerfacecolor='gray',zorder=2)
+        axx.grid('on',linestyle='--')
+        axx.set_xlabel('Duration of Inundation (Hours)',fontsize=8)
+        axx.set_ylabel('Maximum Elevation ('+self.units+') Above Threshold',fontsize=8)
+        axx.tick_params(axis='both',labelsize=8)
+        axx.set_ylim(0,axx.get_ylim()[-1])
+        axx.set_title(('Maximum Elevation vs. Duration of Inundation for '+self.input_file+'\n' +
+                      'Threshold = ' + str(self.__fun_inps['threshold']) + ' '+self.units+' above ' + self.datum + '\n' +
+                      'Time range =  ' + datetime.strftime(self.__fun_inps['data']['time'].iloc[0],'%Y-%m-%d') + ' to ' +  datetime.strftime(self.__fun_inps['data']['time'].iloc[-1],'%Y-%m-%d') + '\n' +
+                      'Results: ' + str(len(self.inundations)) + ' Inundations. Total Duration = ' + str(round(self.inundations['Duration (hours)'].sum(),2)) + ' hours '+
+                      '(' + str(round(self.inundations['Duration (hours)'].sum()/((self.__fun_inps['data']['time'].iloc[-1]- self.__fun_inps['data']['time'].iloc[0]).total_seconds()/60/60)*100,2)) + '%)'),
+                      fontsize=8, fontweight='normal', loc='left', ha='left')
+        fig_d_vs_h.show()
+        if self.units == 'Feet' or self.units == 'feet' or self.units == 'ft':
+            bs = 0.1
+        elif self.units == 'Inches' or self.units == 'inches' or self.units == 'in':
+            bs = 1
+        elif self.units == 'Meters' or self.units == 'meters' or self.units == 'm':
+            bs = 0.05
+        elif self.units == 'Centimeters' or self.units == 'centimeters' or self.units == 'cm':
+            bs = 5
+        elif self.units == 'Millimeters' or self.units == 'millimeters' or self.units == 'mm':
+            bs = 50
+        bins = np.arange(0,self.inundations['Maximum Elevation Above Threshold'].max()+bs,bs)
+        labels = [
+            f"{start if i == 0 else start + 0.001:.3f} - {end:.3f}"
+            for i, (start, end) in enumerate(zip(bins[:-1], bins[1:]))
+        ]
+        bin_vals = pd.cut(self.inundations['Maximum Elevation Above Threshold'],bins,right=True,labels=labels)
+        bin_counts = bin_vals.value_counts(sort=False)
+        reverse_cumsum = bin_counts.sort_index(ascending=False).cumsum().sort_index()
+        reverse_percentages = (reverse_cumsum / bin_counts.sum()) * 100
+        fig_f_of_elev,axxx = plt.subplots(1,figsize=(9,5))
+        axxx2 = axxx.twinx()
+        fig_f_of_elev.subplots_adjust(bottom=0.2)
+        axxx.tick_params(axis='x',rotation=25)
+        axxx.tick_params(axis='both',labelsize=8)
+        l1 = bin_counts.plot.bar(ax=axxx, width=0.9, edgecolor='black', color='royalblue',
+                            rot=45, label='Frequency',zorder=2)
+        axxx.set_xlabel('Inundation Exceedance ('+self.units+')',fontsize=8)
+        axxx.set_ylabel('Frequency',fontsize=8)
+        axxx.set_ylim(0,bin_counts.max()+0.5)
+        l2 = reverse_percentages.plot.line(ax=axxx2, color='red', marker='o', linewidth=2,
+                                      label='Percentage of Inundations',zorder=2)
+        axxx2.set_ylim(0,105)
+        axxx2.set_ylabel('Percentage of Inundation',fontsize=8)
+        axxx2.tick_params(axis='both',labelsize=8)
+        axxx.grid('on',linestyle='--')
+        axxx.set_title(('Frequency of Elevations for '+self.input_file+'\n' +
+              'Threshold = ' + str(self.__fun_inps['threshold']) + ' '+self.units+' above ' + self.datum + '\n' +
+              'Time range =  ' + datetime.strftime(self.__fun_inps['data']['time'].iloc[0],'%Y-%m-%d') + ' to ' +  datetime.strftime(self.__fun_inps['data']['time'].iloc[-1],'%Y-%m-%d') + '\n' +
+              'Results: ' + str(len(self.inundations)) + ' Inundations. Total Duration = ' + str(round(self.inundations['Duration (hours)'].sum(),2)) + ' hours '+
+              '(' + str(round(self.inundations['Duration (hours)'].sum()/((self.__fun_inps['data']['time'].iloc[-1]- self.__fun_inps['data']['time'].iloc[0]).total_seconds()/60/60)*100,2)) + '%)'),
+              fontsize=8, fontweight='normal', loc='left', ha='left')
+        handles1, labels1 = axxx.get_legend_handles_labels()
+        handles2, labels2 = axxx2.get_legend_handles_labels()
+        axxx.legend(handles1 + handles2, labels1 + labels2, loc='upper right',fontsize=8)
+        fig_f_of_elev.show()
+        bs = 1
+        bins = np.arange(0,self.inundations['Duration (hours)'].max()+bs,bs)
+        labels = [
+            f"{start if i == 0 else start + 0.001:.3f} - {end:.3f}"
+            for i, (start, end) in enumerate(zip(bins[:-1], bins[1:]))
+        ]
+        bin_vals = pd.cut(self.inundations['Duration (hours)'],bins,right=True,labels=labels)
+        bin_counts = bin_vals.value_counts(sort=False)
+        reverse_cumsum = bin_counts.sort_index(ascending=False).cumsum().sort_index()
+        reverse_percentages = (reverse_cumsum / bin_counts.sum()) * 100
+        fig_f_of_dur,axxx = plt.subplots(1,figsize=(9,5))
+        axxx2 = axxx.twinx()
+        fig_f_of_dur.subplots_adjust(bottom=0.2)
+        axxx.tick_params(axis='x',rotation=25)
+        axxx.tick_params(axis='both',labelsize=8)
+        l1 = bin_counts.plot.bar(ax=axxx, width=0.9, edgecolor='black', color='royalblue',
+                            rot=45, label='Frequency',zorder=2)
+        axxx.set_xlabel('Inundation Exceedance (Hours)',fontsize=8)
+        axxx.set_ylabel('Frequency',fontsize=8)
+        axxx.set_ylim(0,bin_counts.max()+0.5)
+        l2 = reverse_percentages.plot.line(ax=axxx2, color='red', marker='o', linewidth=2,
+                                      label='Percentage of Inundations',zorder=2)
+        axxx2.set_ylim(0,105)
+        axxx2.set_ylabel('Percentage of Inundation',fontsize=8)
+        axxx2.tick_params(axis='both',labelsize=8)
+        axxx.grid('on',linestyle='--')
+        axxx.set_title(('Frequency of Durations for '+self.input_file+'\n' +
+              'Threshold = ' + str(self.__fun_inps['threshold']) + ' '+self.units+' above ' + self.datum + '\n' +
+              'Time range =  ' + datetime.strftime(self.__fun_inps['data']['time'].iloc[0],'%Y-%m-%d') + ' to ' +  datetime.strftime(self.__fun_inps['data']['time'].iloc[-1],'%Y-%m-%d') + '\n' +
+              'Results: ' + str(len(self.inundations)) + ' Inundations. Total Duration = ' + str(round(self.inundations['Duration (hours)'].sum(),2)) + ' hours '+
+              '(' + str(round(self.inundations['Duration (hours)'].sum()/((self.__fun_inps['data']['time'].iloc[-1]- self.__fun_inps['data']['time'].iloc[0]).total_seconds()/60/60)*100,2)) + '%)'),
+              fontsize=8, fontweight='normal', loc='left', ha='left')
+        handles1, labels1 = axxx.get_legend_handles_labels()
+        handles2, labels2 = axxx2.get_legend_handles_labels()
+        axxx.legend(handles1 + handles2, labels1 + labels2, loc='upper right',fontsize=8)
+        fig_f_of_dur.show()
+        return [fig_picks, fig_d_vs_h, fig_f_of_elev, fig_f_of_dur]
+def run(threshold, threshold_datum, data, datums, high_lows, units, input_file):
+    datums['Input'] = 0
+    # Get timestamps into a usable format #
+    data = data.rename(columns={data.columns[0]:'time',data.columns[1]:'val'})
+    data['time'] = pd.to_datetime(data['time'])
+    data = data.replace(-99999.99, np.nan)
+    # Put the data onto the threshold datum and onto MHHW #
+    data_mhhw = pd.DataFrame({'time':data['time'],'val':data['val']-datums['MHHW']})
+    data_dwant = pd.DataFrame({'time':data['time'],'val':data['val']-datums[threshold_datum]})
+    # Separate threshold exceedances into temporally separate groups #
+    up_crosses = np.where((data_dwant['val']>threshold) & (data_dwant['val'].shift(1)<=threshold))[0]
+    down_crosses = np.where((data_dwant['val']<=threshold) & (data_dwant['val'].shift(1)>threshold))[0]
+    if len(up_crosses) == 0 and len(down_crosses) == 0:
+        return pd.DataFrame()
+    else:
+        exceedance_groups = []
+        i_up = 0
+        i_down = 0
+        while i_up < min(len(up_crosses)-1,len(down_crosses)) and i_down < min(len(up_crosses)-1,len(down_crosses)):
+            group_start = up_crosses[i_up]
+            if up_crosses[i_up] < down_crosses[i_down] < up_crosses[i_up+1]:
+                group_end = down_crosses[i_down]
+                group = data_dwant.iloc[group_start:group_end]
+                exceedance_groups.append(group)
+            else:
+                i_up -= 1
+            i_up += 1
+            i_down += 1
+        group_start_final = up_crosses[np.argmin(np.abs(down_crosses[-1]-up_crosses))]
+        group_end_final = down_crosses[-1]
+        group_final = data_dwant.iloc[group_start_final:group_end_final]
+        exceedance_groups.append(group_final)
+        # For each exceedance, get the first, last, and peak times and format to nice DataFrame #
+        c = -1
+        for group in exceedance_groups:
+            c += 1
+            up_cross_time = group.iloc[0]['time']
+            down_cross_time = group.iloc[-1]['time']
+            peak_time = group['time'].iloc[group['val'].argmax()]
+            d = (high_lows['time']-peak_time.replace(tzinfo=None)).abs().min()
+            di = (high_lows['time']-peak_time.replace(tzinfo=None)).abs().argmin()
+            if d<timedelta(minutes=30):
+                tide_type = high_lows.iloc[di]['tide type']
+            else:
+                tide_type = 'Unknown'
+            row = pd.DataFrame({'Peak Date/Time':group['time'].iloc[group['val'].argmax()],
+                                'Period Start':[up_cross_time],
+                                'Period End':down_cross_time,
+                                'Duration (hours)':(down_cross_time - up_cross_time).total_seconds()/60/60,
+                                'Maximum Elevation Above Threshold':group['val'].max() - threshold,
+                                'Maximum Elevation (MHHW)':data_mhhw['val'][data_mhhw['time'] == group['time'].iloc[group['val'].argmax()]].values[0],
+                                'Tide Type':tide_type})
+            if c == 0:
+                inundations = row
+            else:
+                inundations = pd.concat([inundations,row],ignore_index=True)
+        return Out(inundations, threshold_datum, units, input_file, {'threshold' : threshold , 'threshold_datum' : threshold_datum , 'data' : data , 'datums' : datums})

tadc/qa.py ADDED Viewed

@@ -0,0 +1,71 @@
+import datetime
+import logging
+import numpy as np
+import pandas as pd
+import requests
+logger = logging.getLogger(__name__)
+class Assurances:
+    def __init__(self, ts, resample_minutes):
+        self.ts = ts
+        self.resample_minutes = resample_minutes
+    def assure_no_unreadable_values(self):
+        ts = self.ts.rename(columns={self.ts.columns[0]:'time',self.ts.columns[1]:'val'})
+        try:
+            ts['val'].astype(float)
+        except Exception as error:
+            bad_val = error.args[0].split(':')[1].replace(' ','').replace("'","")
+            ts.loc[ts['val']==bad_val,'val'] = 'NaN'
+            ts['val'] = ts['val'].astype(float)
+            logger.warning('WARNING: Unreadable value found in data: ' + bad_val + '. Replacing all occurrences with NaNs.')
+        self.ts = ts
+    def assure_even_temporal_spacing(self):
+        def resamp(ts, interval_want, interval_mean):
+            if interval_want < interval_mean:
+                logger.warning('WARNING: Input resampling rate is higher than data sampling rate. This may result in unstable behavior. Consider resampling to a lower rate.')
+            ti = pd.date_range(ts['time'].iloc[0],ts['time'].iloc[-1],freq=interval_want)
+            ts_interp = ts.set_index('time').reindex(ti,method='nearest',tolerance=interval_mean).reset_index().rename(columns={'index':'time'})  # Reinterpolate being careful to preserve gaps #
+            return ts_interp
+        ts = self.ts
+        ts['time'] = pd.to_datetime(ts['time'])
+        ts = ts.groupby('time').first().reset_index()  # Remove duplicates #
+        time_diffs_all = ts['time'].diff()
+        interval_mean = ts['time'].diff().mean().floor('min')
+        if len(time_diffs_all.unique().dropna()) > 1:
+            if self.resample_minutes is None:
+                interval_want = interval_mean
+                logger.warning('WARNING: Input timeseries has uneven temporal spacing. Re-interpolating to a spacing of ' + str(round(interval_want.seconds/60, 2)) + ' minutes per sample.')
+            else:
+                interval_want = pd.Timedelta(minutes = self.resample_minutes)
+                logger.warning('WARNING: Input timeseries has uneven temporal spacing. Re-interpolating to a spacing of ' + str(round(interval_want.seconds/60, 2)) + ' minutes per sample.')
+            self.ts = resamp(ts, interval_want, interval_mean)
+        else:
+            if self.resample_minutes is not None:
+                interval_want = pd.Timedelta(minutes = self.resample_minutes)
+                logger.warning('WARNING: Re-interpolating to a spacing of ' + str(round(interval_want.seconds/60, 2)) + ' minutes per sample.')
+                self.ts = resamp(ts, interval_want, interval_mean)
+            else:
+                self.ts = ts
+    def assure_flatlines_are_gaps(self):
+        is_flatline = self.ts['val'].diff().abs() < 0.001
+        consecutive_groups = is_flatline.ne(is_flatline.shift()).cumsum()
+        group_sizes = consecutive_groups.groupby(consecutive_groups).transform('size')
+        is_long_flatline = is_flatline & (group_sizes > 1)
+        is_end_of_run = is_long_flatline & is_long_flatline.ne(is_long_flatline.shift(-1))
+        is_long_flatline_final = is_long_flatline & ~is_end_of_run
+        if is_long_flatline_final.sum() > 0:
+            self.ts.loc[is_long_flatline_final,'val'] = np.nan
+            logger.warning('WARNING: Flatlines detected. Treating flatlines as missing data.')
+def run(ts, resample_minutes):
+    assurances = Assurances(ts, resample_minutes)
+    assurances.assure_no_unreadable_values()
+    assurances.assure_even_temporal_spacing()
+    assurances.assure_flatlines_are_gaps()
+    return assurances.ts

tadc/qc.py ADDED Viewed

@@ -0,0 +1,54 @@
+import datetime
+import logging
+import numpy as np
+import pandas as pd
+import requests
+from scipy.signal import periodogram
+logger = logging.getLogger(__name__)
+class Tests:
+    def __init__(self, ts, control_station_id, subordinate_lat, subordinate_lon):
+        self.ts = ts
+        self.control_station_id = control_station_id
+        self.subordinate_lat = subordinate_lat
+        self.subordinate_lon = subordinate_lon
+    def check_csv_format(self):
+        if len(self.ts.columns) != 2:
+            raise RuntimeError("Input csv file must contain two columns: time and water level")
+        else:
+            self.ts = self.ts.rename(columns={self.ts.columns[0]:'time',self.ts.columns[1]:'val'})
+    def check_date_format(self):
+        try:
+            self.ts['time'] = pd.to_datetime(self.ts['time'])
+        except ValueError:
+            raise ValueError("Timestamps could not be interpreted.")
+    def check_control_station_distance(self):
+        if self.control_station_id != None:
+            r = requests.get('https://api.tidesandcurrents.noaa.gov/mdapi/prod/webapi/stations/' + str(self.control_station_id) + '.json?units=english')
+            lat_control = r.json()['stations'][0]['lat']
+            lon_control = r.json()['stations'][0]['lng']
+            d = self._haversine(self.subordinate_lat,self.subordinate_lon,lat_control,lon_control)
+            if d > 10:
+                logger.warn('WARNING: Control station is ' + str(round(d,2)) + ' km from subordinate station.')
+    @staticmethod
+    def _haversine(lat1, lon1, lat2, lon2):
+        lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
+        R = 6371
+        dlat = lat2 - lat1
+        dlon = lon2 - lon1
+        a = np.sin(dlat/2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2.0)**2
+        c = 2 * np.arcsin(np.sqrt(a))
+        km = R * c
+        return km
+def run(ts, control_station_id, subordinate_lat, subordinate_lon):
+    tests = Tests(ts, control_station_id, subordinate_lat, subordinate_lon)
+    tests.check_csv_format()
+    tests.check_date_format()
+    tests.check_control_station_distance()