PyPI - virgo-modules - Versions diffs - 0.0.90__tar.gz → 0.1.0__tar.gz - Mend

virgo-modules 0.0.90tar.gz → 0.1.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of virgo-modules might be problematic. Click here for more details.

Files changed (18) hide show

{virgo_modules-0.0.90 → virgo_modules-0.1.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: virgo_modules
-Version: 0.0.90
+Version: 0.1.0
 Summary: data processing and statistical modeling using stock market data
 Home-page: https://github.com/miguelmayhem92/virgo_module
 Author: Miguel Mayhuire

{virgo_modules-0.0.90 → virgo_modules-0.1.0}/setup.py RENAMED Viewed

@@ -5,7 +5,7 @@ with open("virgo_app/README.md", "r") as f:
 setup(
     name="virgo_modules",
-    version="0.0.90",
+    version="0.1.0",
     description="data processing and statistical modeling using stock market data",
     package_dir={"": "virgo_app"},
     packages=find_packages(where="virgo_app"),

virgo_modules-0.1.0/virgo_app/virgo_modules/src/backtester.py ADDED Viewed

@@ -0,0 +1,360 @@
+import pandas as pd
+import numpy as np
+import json
+import matplotlib.pyplot as plt
+import matplotlib.gridspec as gridspec
+import seaborn as sns; sns.set()
+import warnings
+warnings.filterwarnings('ignore')
+from .aws_utils import upload_file_to_aws
+def sharpe_ratio(return_series):
+    '''
+    calculate sharpe ratio for given array.
+            Parameters:
+                    return_series (pd.series): pandas series of the asset returns
+            Returns:
+                    sharpe (float): sharpe ratio
+    '''
+    N = 255 # Trading days in the year (change to 365 for crypto)
+    rf = 0.005 # Half a percent risk free rare
+    mean = return_series.mean() * N -rf
+    sigma = return_series.std() * np.sqrt(N)
+    sharpe = round(mean / sigma, 3)
+    return sharpe
+class SignalAnalyserObject:
+    """
+    Class that produces back-tests analysis for a given feature
+    Attributes
+    ----------
+    symbol_name : str
+        stock or asset to assess
+    feature_name : str
+    test_size: int
+        testing data size
+    show_plot: boolean
+    save_path: str
+        if available, save result locally
+    save_aws: str
+        if available, save result locally
+    aws_credentials: dict
+    signal_position: int
+        if available, signal position to open a position
+    df: pd.DataFrame:
+        transformed data of the selected feature to perform back-test
+    Methods
+    -------
+    signal_analyser(days_list=list):
+        given a signal position for either botton or roof signal, calculate the espected return and distributions for a time scope in the days list (time horizons)
+    create_backtest_signal(days_strategy=int, high_exit=float, low_exit=float, open_in_list=list):
+        create a back-test analysis using the test data using some opening anc closing postion criterias
+    """
+    def __init__(self, data,symbol_name, feature_name, test_size, signal_position = False, correct_signals = False, show_plot = True, save_path = False, save_aws = False, aws_credentials = False, return_fig = False):
+        """
+        Initialize object
+        Parameters
+        ----------
+        data (pd.DataFrame): data
+        ticket_name (str): name of the asset
+        feature_name (str): name of the features
+        test_size (int): size of the test data
+        signal_position (int): signal position to open the position, False by default
+        correct_signals (int): clean abnormal signals using interpolation
+        show_plot (boolean): if true show plot for every method
+        save_path (str): if true, save results in file e.g r'C:/path/to/the/file/'
+        save_aws (str): if true, export results to remote repo e.g. 'path/to/file/'
+        aws_credentials (dict): credentials for aws
+        return_fig (boolean): if true, methods will return objects
+        Returns
+        -------
+        None
+        """
+        self.ticket_name = symbol_name
+        self.feature_name=feature_name
+        self.test_size=test_size
+        self.show_plot = show_plot
+        self.save_path = save_path
+        self.save_aws = save_aws
+        self.aws_credentials = aws_credentials
+        self.return_fig = return_fig
+        self.signal_position = signal_position
+        ## preprocessing
+        up_signal, low_signal= f'signal_up_{feature_name}', f'signal_low_{feature_name}'
+        features_base = ['Date', up_signal, low_signal, 'Close','Open','High','Low']
+        df = data[features_base].sort_values('Date')
+        df['signal_type'] = np.where(
+            df[up_signal] == 1,
+            'up',
+            np.where(
+                df[low_signal] == 1,
+                'down',
+                'no signal'
+            )
+        )
+        def correct_sygnals(df,correct_i = 1):
+            ### signal cleaning
+            for i in range(1+correct_i, len(df)-1):
+                start_i, end_i = i-(correct_i+1), i+1
+                dfw = df.iloc[start_i: end_i,]
+                before_type = dfw.iloc[0].signal_type
+                after_type = dfw.iloc[-1].signal_type
+                window_types = dfw.iloc[1:-1].signal_type.unique()
+                n_window_type = len(window_types)
+                if n_window_type == 1:
+                    if (before_type == after_type) and (window_types[0] != after_type):
+                        df.iloc[start_i+1: end_i-1, df.columns.get_loc('signal_type')] = before_type
+            return df.copy()
+        if correct_signals:
+            for correct_i in range(1,correct_signals+1):
+                df = correct_sygnals(df,correct_i = correct_i)
+            df[up_signal] = np.where(df['signal_type'] == 'up', 1,0)
+            df[low_signal] = np.where(df['signal_type'] == 'down', 1,0)
+        ## indexing chains
+        df['lag_signal_type'] = df['signal_type'].shift(1)
+        df['lag_Date'] = df['Date'].shift(1)
+        df['span'] = (pd.to_datetime(df['Date']) - pd.to_datetime(df['lag_Date'])).dt.days - 1
+        df['break'] = np.where((df['span'] > 3) & (df['lag_signal_type'] == df['signal_type']), 1, 0)
+        df['break'] = np.where((df['lag_signal_type'] != df['signal_type']), 1, df['break'])
+        df['chain_id'] = df.sort_values(['Date']).groupby(['break']).cumcount() + 1
+        df['chain_id'] = np.where(df['break'] == 1, df['chain_id'], np.nan )
+        df['chain_id'] = df['chain_id'].fillna(method = 'ffill')
+        df['internal_rn'] = df.sort_values(['Date']).groupby(['chain_id']).cumcount() + 1
+        df['inv_internal_rn'] = df.sort_values(['Date'],ascending = False).groupby(['chain_id']).cumcount() + 1
+        df['first_in_chain'] = np.where(df['internal_rn'] == 1, True, False)
+        df['last_in_chain'] = np.where(df['inv_internal_rn'] == 1, True, False)
+        df['span'] = (pd.to_datetime(df['Date']) - pd.to_datetime(df['lag_Date'])).dt.days - 1
+        self.df = df.drop(columns = ['span','break','lag_signal_type','lag_Date']).copy()
+    def signal_analyser(self, days_list):
+        """
+        Initialize object
+        Parameters
+        ----------
+        days_list (list): list of integers to calculate expected returns
+        Returns
+        -------
+        if returns_fig is true, returns a matplotlib fig
+        """
+        signal_position = self.signal_position
+        df = self.df.iloc[0:-self.test_size,:].copy()
+        returns_list = list()
+        for days in days_list:
+            feature_ = f'return_{days}d'
+            df[feature_] = (df['Close'].shift(-days)/df['Close']-1)*100
+            returns_list.append(feature_)
+        df['open_long'] = np.where(df.last_in_chain == True, True, np.nan)
+        df['open_short'] = np.where(df.first_in_chain == True, True, np.nan)
+        # plotting
+        fig, axs = plt.subplots(1, 4, figsize = (20,5))
+        palette ={"go down": "tomato", "go up": "lightblue"}
+        df2 = df[df.signal_type.isin(['up','down'])]
+        df2['lag_Date'] = df2['Date'].shift(1)
+        df2['lag_signal_type'] = df2['signal_type'].shift(1)
+        df2 = df2[df2.lag_signal_type != df2.signal_type]
+        df2['span'] = (pd.to_datetime(df2['Date']) - pd.to_datetime(df2['lag_Date'])).dt.days - 1
+        sns.violinplot(data=df2, y="span",ax = axs[0], color = 'lightblue', linewidth=0.7,inner="quart")
+        sns.stripplot(data=df2, y="span",ax = axs[0], jitter=True, zorder=1)
+        axs[0].set_title('span between last signals')
+        df.signal_type = df.signal_type.map({'up':'go down', 'down': 'go up'})
+        df_ = df[df.last_in_chain == True]
+        df_['part'] = '-'
+        sns.violinplot(data=df_, y="internal_rn", x='part', ax = axs[1], hue="signal_type", inner="quart",palette = palette,gap=0.1, split=True, linewidth=0.7)
+        axs[1].set_title('signal duration distribution')
+        if signal_position:
+            for feature in returns_list:
+                df[feature] = df[feature].shift(-signal_position)
+        df_melt = df[df.open_long == 1].melt(id_vars=['signal_type'], value_vars=returns_list, var_name='time', value_name='value')
+        df_melt = df_melt.dropna()
+        sns.violinplot(data=df_melt, x="time", y="value", hue="signal_type",ax = axs[2], split=True, gap=0.1, inner="quart",palette = palette, linewidth=0.8)
+        axs[2].axhline(y=0, color='grey', linestyle='--')
+        axs[2].set_title('E. returns - end of the signal')
+        df_melt = df[df.open_short == 1].melt(id_vars=['signal_type'], value_vars=returns_list, var_name='time', value_name='value')
+        df_melt = df_melt.dropna()
+        sns.violinplot(data=df_melt, x="time", y="value", hue="signal_type",ax = axs[3], split=True, gap=0.1, inner="quart",palette = palette, linewidth=0.8)
+        axs[3].axhline(y=0, color='grey', linestyle='--')
+        axs[3].set_title('E. returns - start of the signal')
+        if self.show_plot:
+            plt.show()
+        if self.save_path:
+            result_plot_name = f'signals_strategy_distribution_{self.feature_name}.png'
+            fig.savefig(self.save_path+result_plot_name)
+            # pickle.dump(axs, open(self.save_path+result_plot_name, 'wb'))
+        if self.save_path and self.save_aws:
+            # upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = f'market_plots/{self.ticket_name}/'+result_plot_name, input_path = self.save_path+result_plot_name)
+            upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = self.save_aws + result_plot_name, input_path = self.save_path + result_plot_name, aws_credentials = self.aws_credentials)
+        if not self.show_plot:
+            plt.close()
+        del df
+        if self.return_fig:
+            return fig
+    def create_backtest_signal(self,days_strategy, high_exit = False, low_exit = False, open_in_list = ['down']):
+        """
+        Initialize object
+        Parameters
+        ----------
+        days_strategy (int): position horizon
+        high_exit (float): max threshold to close position
+        low_exit (float): min threshold to close position, this parameter has to be positive
+        open_in_list (list): list of strings ("down","up") to assess signals
+        Returns
+        -------
+        if returns_fig is true, returns a matplotlib fig and list of dicts containing analysis
+        """
+        asset_1 = 'Close'
+        up_signal, low_signal= f'signal_up_{self.feature_name}', f'signal_low_{self.feature_name}'
+        signal_position = self.signal_position
+        dft = self.df.iloc[-self.test_size:,:].reset_index(drop=True).copy()
+        dft['lrets_bench'] = np.log(dft[asset_1]/dft[asset_1].shift(1))
+        dft['bench_prod'] = dft['lrets_bench'].cumsum()
+        dft['bench_prod_exp'] = np.exp(dft['bench_prod']) - 1
+        map_ = {'down':'END LOW TREND', 'up': 'BEGINNING HIGH TREND'}
+        open_in_list_items = len(open_in_list)
+        fig, axs = plt.subplots(1,open_in_list_items, figsize = (7*open_in_list_items,6))
+        messages = list()
+        for i, open_in in enumerate(open_in_list):
+            axs_ = axs if open_in_list_items == 1 else axs[i]
+            if open_in == 'down':
+                dft['open_long'] = np.where((dft.last_in_chain == True) & (dft.signal_type == 'down'), True, np.nan) # open strat
+            elif open_in == 'up':
+                dft['open_long'] = np.where((dft.first_in_chain == True) & (dft.signal_type == 'up'), True, np.nan) # open strat
+            def chain_position(dft):
+                dft['open_long_id'] = np.where(dft['open_long'] == True, dft.chain_id, np.nan)
+                dft['open_long_id'] = dft['open_long_id'].fillna(method = 'ffill')
+                dft['open_long_rn'] = dft.sort_values(['Date']).groupby(['open_long_id']).cumcount() + 1
+                return dft
+            if signal_position:
+                dft['open_long'] = dft.sort_values(['Date'])['open_long'].shift(signal_position)
+            dft = chain_position(dft)
+            dft['flag'] = np.where(dft['open_long_rn'] < days_strategy, 1,0)
+            if high_exit and low_exit:
+                dft['open_strat'] = np.where(dft.open_long == True, dft.Open, np.nan) # open strat
+                dft['open_strat'] = dft['open_strat'].fillna(method = 'ffill')
+                dft['open_strat'] = np.where(dft.flag == 1, dft.open_strat, np.nan)
+                dft['high_strat_ret'] = (dft['High']/dft['open_strat']-1)*100
+                dft['low_strat_ret'] = (dft['Low']/dft['open_strat']-1)*100
+                dft['max_step_chain'] = dft.groupby(['open_long_id'])['open_long_rn'].transform('max')
+                dft['high_exit'] =  np.where(((dft['high_strat_ret'] >= high_exit) | (dft['open_long_rn'] == days_strategy) | (dft['max_step_chain'] == dft['open_long_rn'])), 1, np.nan)
+                dft['low_exit'] =  np.where((dft['low_strat_ret'] <= low_exit), -1, np.nan)
+                dft["exit_type"] = dft[["high_exit", "low_exit"]].max(axis=1)
+                dft['exit_type'] = np.where(dft["exit_type"] == 1, 1, np.where(dft["exit_type"] == -1,-1,np.nan))
+                dft['exit'] = np.where(dft['exit_type'].isnull(), np.nan, 1)
+                dft['exit_order'] = dft.sort_values(['Date']).groupby(['open_long_id','exit']).cumcount() + 1
+                dft['exit'] = np.where(dft['exit_order'] == 1, True, np.nan)
+                dft = dft.drop(columns = ['exit_order'])
+                ## if last signal is near
+                max_id = dft.open_long_id.max()
+                dft['max_internal_rn'] = dft.sort_values(['Date']).groupby(['open_long_id']).open_long_rn.transform('max')
+                dft['exit'] = np.where((dft.open_long_id == max_id) & (dft.max_internal_rn < days_strategy) & (dft.max_internal_rn == dft.open_long_rn), 1, dft['exit'])
+                dft['exit_step'] = np.where(dft.exit == 1, dft.open_long_rn, np.nan)
+                dft['exit_step'] = dft.sort_values(['Date']).groupby(['open_long_id']).exit_step.transform('max')
+                dft['flag'] = np.where(dft.open_long_rn <= dft.exit_step, 1, 0)
+            dft['lrets_strat'] = np.log(dft[asset_1].shift(-1)/dft[asset_1]) * dft['flag']
+            dft['lrets_strat'] = np.where(dft['lrets_strat'].isna(),-0.0,dft['lrets_strat'])
+            dft['lrets_prod'] = dft['lrets_strat'].cumsum()
+            dft['strat_prod_exp'] = np.exp(dft['lrets_prod']) - 1
+            bench_rets = round(dft['bench_prod_exp'].values[-1]*100,1)
+            strat_rets = round(dft['strat_prod_exp'].values[-1]*100,1)
+            bench_sr = round(sharpe_ratio(dft.bench_prod_exp.dropna()),1)
+            strat_sr = round(sharpe_ratio(dft.strat_prod_exp.dropna()),1)
+            message1 = f'{bench_rets}%'
+            message2 = f'{strat_rets}%'
+            messages_ = {
+                'type strategy':map_[open_in],
+                'benchmark return:':message1,
+                'benchmark sharpe ratio:': bench_sr,
+                'strategy return:':message2,
+                'strategy sharpe ratio:': strat_sr,
+            }
+            messages.append(messages_)
+            if self.show_plot:
+                print('----------------------------')
+                print(messages_)
+                print('----------------------------')
+            axs_.plot(dft.bench_prod_exp.values, label = 'benchmark', color = 'steelblue')
+            axs_.scatter(range(len(dft)),np.where(dft[low_signal] == 1,dft.bench_prod_exp.values,np.nan),color = 'red', label = 'signal')
+            axs_.scatter(range(len(dft)),np.where(dft[up_signal] == 1,dft.bench_prod_exp.values,np.nan),color = 'green', label = 'signal')
+            axs_.plot(dft.strat_prod_exp.values, label = 'strategy', color = 'darksalmon')
+            axs_.set_xlabel("index")
+            axs_.set_ylabel("comulative return")
+            axs_.set_title(f'{map_[open_in]} strategy and cumulative returns based on signals')
+            axs_.legend()
+        if self.show_plot:
+            plt.plot()
+        if self.save_path:
+            result_json_name = f'signals_strategy_return_{self.feature_name}.json'
+            result_plot_name = f'signals_strategy_return_{self.feature_name}.png'
+            plt.savefig(self.save_path+result_plot_name)
+            with open(self.save_path+result_json_name, "w") as outfile:
+                json.dump(messages, outfile)
+        if self.save_path and self.save_aws:
+            upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = self.save_aws + result_json_name, input_path = self.save_path + result_json_name, aws_credentials = self.aws_credentials)
+            upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = self.save_aws + result_plot_name, input_path = self.save_path + result_plot_name, aws_credentials = self.aws_credentials)
+        if not self.show_plot:
+            plt.close()
+        del dft
+        if self.return_fig:
+            return fig, messages

{virgo_modules-0.0.90 → virgo_modules-0.1.0}/virgo_app/virgo_modules/src/re_utils.py RENAMED Viewed

@@ -1378,7 +1378,8 @@ def extract_data_traintest(object_stock,features_to_search,configs, target_confi
         arguments_to_use = configs[feature_name]['config_params']
         method_to_use = configs[feature_name]['method']
         getattr(object_stock, method_to_use)(**arguments_to_use, plot = False, save_features = False)
-        object_stock.produce_order_features(feature_name)
+        if method_to_use not in ['minmax_pricefeature']:
+            object_stock.produce_order_features(feature_name)
     # geting targets
     object_stock.get_categorical_targets(**target_params_up)
     object_stock.df = object_stock.df.drop(columns = ['target_down']).rename(columns = {'target_up':'target_up_save'})

{virgo_modules-0.0.90 → virgo_modules-0.1.0}/virgo_app/virgo_modules/src/ticketer_source.py RENAMED Viewed

@@ -250,25 +250,6 @@ class FeaturesEntropy(BaseEstimator, TransformerMixin):
         X[self.feature_name] = X[self.feature_name].fillna(self.default_null)
         return X
-def sharpe_ratio(return_series):
-    '''
-    calculate sharpe ratio for given array.
-            Parameters:
-                    return_series (pd.series): pandas series of the asset returns
-            Returns:
-                    sharpe (float): sharpe ratio
-    '''
-    N = 255 # Trading days in the year (change to 365 for crypto)
-    rf = 0.005 # Half a percent risk free rare
-    mean = return_series.mean() * N -rf
-    sigma = return_series.std() * np.sqrt(N)
-    sharpe = round(mean / sigma, 3)
-    return sharpe
 class signal_combiner(BaseEstimator, TransformerMixin):
     """
@@ -1803,7 +1784,7 @@ class stock_eda_panel(object):
         if plot:
             self.signal_plotter(feature_name)
-    def minmax_pricefeature(self, type_func, window, distance = False, save_features = False):
+    def minmax_pricefeature(self, type_func, window, distance = False, plot = False, save_features = False):
         """
         perform relative price/distance with respect to the min/max price in a given time scope
@@ -2809,103 +2790,453 @@ class hmm_feature_selector():
         self.feature_results = feature_results
         self.best_features = pd.DataFrame(self.feature_results).T.sort_values('mean relevance').iloc[-1,:].features
-class signal_analyser_object:
+def execute_signal_analyser(test_data_size, feature_name, days_list, configuration, method, object_stock, signal_analyser_object, plot = False, backtest= False, exit_params = {}):
+    '''
+    code snippet that is going run some objects. The analysis is signal analyse which is backtesting
+            Parameters:
+                    test_data_size (int): test data size
+                    feature_name (str): name of the feature to assess
+                    days_list (list): tome scope to assess the returns
+                    configuration (dict): parameters of the method to run
+                    object_stock (obj): object with data to assess
+                    signal_analyser_object (obj): signal_analyser object
+                    plot (boolean): if true, plot results
+                    backtest (boolean): if true, run backtest
+                    exit_params (dict): parameters of exit returns
+            Returns:
+                    mean_median_return (float): median return of the backtests
+    '''
+    method(**configuration)
+    signal_assess = signal_analyser_object(object_stock.df,object_stock.stock_code,show_plot = plot)
+    signal_assess.signal_analyser(test_size = test_data_size, feature_name = feature_name, days_list = days_list, threshold = 1)
+    if backtest:
+        print('-----------------------back test ---------------------------')
+        signal_assess.create_backtest_signal(backtest, test_data_size, feature_name, **exit_params )
+    return signal_assess.mean_median_return
+def iterate_signal_analyser(test_data_size,feature_name, days_list, arguments_to_test, method, object_stock, signal_analyser_object, plot = True):
+    '''
+    code snippet is going to iterate signal analyser
+            Parameters:
+                    test_data_size (int): test data size
+                    feature_name (str): name of the feature to assess
+                    days_list (list): tome scope to assess the returns
+                    arguments_to_test: parameters to test
+                    method: methods to run
+                    object_stock (obj): object with data to assess
+                    signal_analyser_object (obj): signal_analyser object
+                    plot (boolean): if true, plot results
+            Returns:
+                    best_result (int): index from the arguments_to_test with the best result
+    '''
+    results = list()
+    for key in arguments_to_test.keys():
+        configuration = arguments_to_test.get(key)
+        mean_median_return = execute_signal_analyser(test_data_size, feature_name, days_list, configuration, method, object_stock, signal_analyser_object)
+        results.append(mean_median_return)
+    df_result = pd.DataFrame({'keys':arguments_to_test.keys(),'results':results})
+    if plot:
+        plt.plot(df_result['keys'], df_result['results'])
+        plt.scatter(df_result['keys'], df_result['results'])
+        plt.title('simulation between configurations')
+        plt.ylabel('median expected return')
+        plt.show()
+    best_result = df_result.sort_values('results',ascending = False)['keys'].values[0]
+    return best_result
+class analyse_index(stock_eda_panel):
     """
-    class that is going to analyse signals
+    class that is going to train hmm models to perform feature selection
     Attributes
     ----------
-    data  : pd.DataFrame
-        symbol of the asset
-    ticket_name :str
-        asset symbol
-    show_plot : boolean
-        if true show plot for every method
+    data_index : pd.DataFrame
+         name of the index
+    indexes: list
+        list of indexes
+    asset : str
+         name of the asset
+    n_obs : int
+         number of rows to extract
+    lag : int
+         lag to apply
+    data_window : str
+         5y 10y 15y
+    show_plot : bool
+         If True, show plots
     save_path : str
-        if true, save results in file
+         local path for saving e.g r'C:/path/to/the/file/'
     save_aws : str
-        if true, export results to remote repo
+         remote key in s3 bucket path e.g. 'path/to/file/'
     aws_credentials : dict
-        credentials for aws
-    return_fig : boolean
-        if true, methods will return objects
-    create_backtest_signal(days_strategy=list, test_size=int, feature_name=str, high_exit=float, low_exit=float):
-        perform backtest signal analysis
+         dict with the aws credentials
+    merger_df : pd.DataFrame
+        dataframe with the index and asset data
+    states_result = dict
+        betas and correlation score results
     Methods
     -------
-    signal_analyser(test_size=int, feature_name=str, days_list=list, threshold=float,verbose=boolean, signal_position=boolean):
-        perform signal analysis and feature extraction
+    process_data():
+        using stock_eda_panel, get data and merge data
+    plot_betas(sample_size=int, offset=int, subsample_ts=int):
+        display beta analysis plot
+    get_betas(subsample_ts=int)
+        get general beta and last sample beta, correlation score is included too
     """
-    def __init__(self, data,symbol_name, show_plot = True, save_path = False, save_aws = False, aws_credentials = False, return_fig = False):
+    def __init__(self, index_data, asset, n_obs, lag, data_window = '5y', show_plot = False, save_path = False, save_aws = False, aws_credentials = False, return_fig = False):
         """
         Initialize object
         Parameters
         ----------
-        data (pd.DataFrame): data
-        ticket_name (str): name of the asset
-        show_plot (boolean): if true show plot for every method
-        save_path (str): if true, save results in file e.g r'C:/path/to/the/file/'
-        save_aws (str): if true, export results to remote repo e.g. 'path/to/file/'
-        aws_credentials (dict): credentials for aws
-        return_fig (boolean): if true, methods will return objects
+        index_data (pd.DataFrame or str): index data dataframe or index string
+        asset (str): name of the asset
+        n_obs (int): number of rows to extract
+        lag (int): lag to apply
+        data_window (str): 5y 10y 15y
+        show_plot (bool): If True, show plots
+        save_path (str): local path for saving e.g r'C:/path/to/the/file/'
+        save_aws (str): remote key in s3 bucket path e.g. 'path/to/file/'
+        aws_credentials (dict): dict with the aws credentials
         Returns
         -------
         None
         """
-        self.data = data.copy()
-        self.ticket_name = symbol_name
+        if type(index_data) != str:
+            index_data['Date'] = pd.to_datetime(index_data['Date'])
+            self.index_data = index_data
+            self.indexes = [ x for x in list(index_data.columns) if x != 'Date']
+        else:
+            self.indexes = [index_data]
+        self.index_data = index_data
+        self.asset = asset
+        self.n_obs = n_obs
+        self.data_window = data_window
+        self.lag = lag
         self.show_plot = show_plot
+        self.return_fig = return_fig
         self.save_path = save_path
         self.save_aws = save_aws
-        self.aws_credentials = aws_credentials
-        self.return_fig = return_fig
-    def signal_analyser(self, test_size, feature_name, days_list, threshold = 0.05,verbose = False, signal_position = False):
+    def process_data(self):
         """
-        perform signal analysis and feature extraction
+        using stock_eda_panel, get data and merge data
+        Parameters
+        ----------
+        None
+        Returns
+        -------
+        None
+        """
+        asset =  stock_eda_panel(self.asset, self.n_obs, data_window=self.data_window)
+        asset.get_data()
+        df = asset.df[['Date','Close']]
+        if type(self.index_data) != str:
+            df_merge = df.merge(self.index_data, on = ['Date'], how = 'left').sort_values('Date')
+        else:
+            indx =  stock_eda_panel(self.index_data, self.n_obs, data_window=self.data_window)
+            indx.get_data()
+            indx_df = indx.df[['Date','Close']].rename(columns = {'Close':self.index_data})
+            df_merge = df.merge(indx_df, on = ['Date'], how = 'left').sort_values('Date')
+        for colx in ['Close'] + self.indexes:
+            df_merge[f'{colx}_pct'] = df_merge[colx]/df_merge[colx].shift(self.lag) - 1
+        df_merge.dropna(inplace = True)
+        self.merger_df = df_merge.rename(columns = {'Close_pct': 'asset_return'})
+    def plot_betas(self,sample_size, offset, subsample_ts =False, index = False):
+        """
+        display beta analysis plot
         Parameters
         ----------
-        test_size (int): test data size
-        feature_name (str): name of the feature to assess
-        days_list (list): list of integers [3,8,10] to assess
-        threshold (float): alpha or z threshold
-        verbose (boolean): print metrics
-        signal_position (int): if true, the signal is taken at the given step after the signal end
+        sample_size (int): number of days or window size to calculate beta
+        offset (int): overlap between windows
+        subsample_ts (int): subsample size of data
         Returns
         -------
         None
         """
-        data = self.data
-        self.feature_name = feature_name
-        up_signal, low_signal= f'signal_up_{feature_name}', f'signal_low_{feature_name}'
-        features_base = ['Date', up_signal, low_signal, 'Close']
+        if (type(self.index_data) == str) & (index != False):
+            raise Exception("No need of index argument")
+        else:
+            index = self.indexes[0]
+        index_pct = f'{index}_pct'
+        ### ploting analysis
+        figure, ax = plt.subplot_mosaic(
+            [["scatter_total", "scatter_sample",'ts','ts']],
+            layout="constrained",
+            figsize=(18, 5)
+        )
-        df = data[features_base].sort_values('Date').iloc[0:-test_size,:]
-        returns_list = list()
+        ax['scatter_total'].scatter(self.merger_df.asset_return, self.merger_df[index_pct])
+        huber_regr = HuberRegressor(fit_intercept = True)
+        huber_regr.fit(self.merger_df.asset_return.values.reshape(-1,1), self.merger_df[index_pct].values.reshape(-1,1))
+        b, a = huber_regr.coef_[0], huber_regr.intercept_
+        # b, a = np.polyfit(self.merger_df.asset_return, self.merger_df[index_pct], 1)
+        ax['scatter_total'].plot(self.merger_df.asset_return, b*self.merger_df.asset_return+a, color='red')
-        for days in days_list:
+        ax['ts'].plot(self.merger_df.Date, self.merger_df.Close, color = 'grey', alpha = 0.3)
-            feature_ = f'return_{days}d'
-            days = days + signal_position if signal_position else days
-            df[feature_] = (df['Close'].shift(-days)/df['Close']-1)*100
-            returns_list.append(feature_)
+        if subsample_ts:
+            self.merger_df = self.merger_df.iloc[-subsample_ts:,:].dropna()
-        df['signal_type'] = np.where(
-            df[up_signal] == 1,
-            'up',
-            np.where(
-                df[low_signal] == 1,
-                'down',
-                None
-            )
-        )
+        for i in range(0,len(self.merger_df)-sample_size,offset):
+            merger_ = self.merger_df.sort_values('Date', ascending = False).iloc[i:i+sample_size,:]
+            x = merger_[index_pct]
+            y = merger_.asset_return
+            # b, a = np.polyfit(x,y, 1)
+            huber_regr = HuberRegressor(fit_intercept = True)
+            huber_regr.fit(x.values.reshape(-1,1), y.values.reshape(-1,1))
+            b, a = huber_regr.coef_[0], huber_regr.intercept_
+            normalize = mcolors.Normalize(vmin=-1, vmax=1)
+            colormap = cm.jet
+            ax['scatter_sample'].plot(x, y,'o', color = 'blue', alpha = 0.1)
+            ax['scatter_sample'].plot(x, b*x+a, color=colormap(normalize(b)))
+            ax['scatter_sample'].set_xlim(-0.06, 0.06)
+            ax['scatter_sample'].set_ylim(-0.06, 0.06)
+            plot = ax['ts'].scatter(merger_.Date, merger_.Close, color=colormap(normalize(b)), s = 10)
+        scalarmappaple = cm.ScalarMappable(norm=normalize, cmap=colormap)
+        scalarmappaple.set_array(x)
+        plt.title(f'{self.asset} using index: {index}')
+        plt.colorbar(scalarmappaple)
+        if self.show_plot:
+            plt.show()
+        if self.save_path:
+            result_plot_name = f'market_best_fit.png'
+            figure.savefig(self.save_path+result_plot_name)
+        if self.save_path and self.save_aws:
+            # upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = f'market_plots/{self.asset}/'+result_plot_name,input_path = self.save_path+result_plot_name)
+            upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = self.save_aws + result_plot_name, input_path = self.save_path + result_plot_name, aws_credentials = self.aws_credentials)
+        if not self.show_plot:
+            plt.close()
+        if self.return_fig:
+            return figure
+    def get_betas(self,subsample_ts=False):
+        """
+        get general beta and last sample beta, correlation score is included too
+        Parameters
+        ----------
+        subsample_ts (int): subsample size of data
+        Returns
+        -------
+        None
+        """
+        result = list()
+        for index in self.indexes:
+            index_pct = f'{index}_pct'
+            huber_regr = HuberRegressor(fit_intercept = True)
+            huber_regr.fit(self.merger_df.asset_return.values.reshape(-1,1), self.merger_df[index_pct].values.reshape(-1,1))
+            general_beta, a = huber_regr.coef_[0], huber_regr.intercept_
+            general_r = stats.mstats.pearsonr(self.merger_df.asset_return, self.merger_df[index])[0]
+            dict_res = {
+                    'index':index,
+                    'general_beta':general_beta,
+                    'general_r':general_r,
+                }
+            if subsample_ts:
+                tmp_df = self.merger_df.iloc[-subsample_ts:,:].dropna()
+                huber_regr = HuberRegressor(fit_intercept = True)
+                huber_regr.fit(tmp_df.asset_return.values.reshape(-1,1), tmp_df[index_pct].values.reshape(-1,1))
+                sample_beta, a = huber_regr.coef_[0], huber_regr.intercept_
+                sample_r = stats.mstats.pearsonr(tmp_df.asset_return, tmp_df[index])[0]
+                dict_res['sample_beta'] = sample_beta
+                dict_res['sample_r'] = sample_r
+            result.append(dict_res)
+        self.states_result = result
+def get_relevant_beta(data_market, ticket_name,  show_plot = True, save_path = False, save_aws = False, aws_credentials = False):
+    '''
+    select relevant beta result data of a given asset
+            Parameters:
+                    data_market (pd.DataFrame): dataframe of the market results
+                    ticket_name (str): name of the asset
+                    show_plot (bool): If tru, plot results
+                    save_path (str): local path for saving e.g r'C:/path/to/the/file/'
+                    save_aws (str):  remote key in s3 bucket path e.g. 'path/to/file/'
+                    aws_credentials (dict): dict of the aws credentials
+            Returns:
+                    selection (pd.DataFrame): dataframe of the most relevant beta
+    '''
+    all_betas = data_market[data_market.asset == ticket_name].sort_values('general_r', ascending = False)
+    all_betas['gen_r2'] = all_betas.general_r ** 2
+    all_betas['sampl_r2'] = all_betas.sample_r ** 2
+    selection = all_betas.sort_values('gen_r2',ascending =False).head(2).sort_values('sampl_r2',ascending =False).head(1).drop(columns = ['gen_r2','sampl_r2'])
+    if show_plot:
+        print(selection)
+    if save_path:
+        result_plot_name = f'market_best_fit.csv'
+        selection.to_csv(save_path+result_plot_name)
+    if save_path and save_aws:
+        # upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = f'market_plots/{ticket_name}/'+result_plot_name,input_path = save_path+result_plot_name)
+        upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = save_aws + result_plot_name, input_path = save_path + result_plot_name, aws_credentials = aws_credentials)
+    return selection
+#### to deprecate
+def sharpe_ratio(return_series):
+    '''
+    calculate sharpe ratio for given array.
+            Parameters:
+                    return_series (pd.series): pandas series of the asset returns
+            Returns:
+                    sharpe (float): sharpe ratio
+    '''
+    N = 255 # Trading days in the year (change to 365 for crypto)
+    rf = 0.005 # Half a percent risk free rare
+    mean = return_series.mean() * N -rf
+    sigma = return_series.std() * np.sqrt(N)
+    sharpe = round(mean / sigma, 3)
+    return sharpe
+class signal_analyser_object:
+    """
+    class that is going to analyse signals
+    Attributes
+    ----------
+    data  : pd.DataFrame
+        symbol of the asset
+    ticket_name :str
+        asset symbol
+    show_plot : boolean
+        if true show plot for every method
+    save_path : str
+        if true, save results in file
+    save_aws : str
+        if true, export results to remote repo
+    aws_credentials : dict
+        credentials for aws
+    return_fig : boolean
+        if true, methods will return objects
+    create_backtest_signal(days_strategy=list, test_size=int, feature_name=str, high_exit=float, low_exit=float):
+        perform backtest signal analysis
+    Methods
+    -------
+    signal_analyser(test_size=int, feature_name=str, days_list=list, threshold=float,verbose=boolean, signal_position=boolean):
+        perform signal analysis and feature extraction
+    """
+    def __init__(self, data,symbol_name, show_plot = True, save_path = False, save_aws = False, aws_credentials = False, return_fig = False):
+        """
+        Initialize object
+        Parameters
+        ----------
+        data (pd.DataFrame): data
+        ticket_name (str): name of the asset
+        show_plot (boolean): if true show plot for every method
+        save_path (str): if true, save results in file e.g r'C:/path/to/the/file/'
+        save_aws (str): if true, export results to remote repo e.g. 'path/to/file/'
+        aws_credentials (dict): credentials for aws
+        return_fig (boolean): if true, methods will return objects
+        Returns
+        -------
+        None
+        """
+        self.data = data.copy()
+        self.ticket_name = symbol_name
+        self.show_plot = show_plot
+        self.save_path = save_path
+        self.save_aws = save_aws
+        self.aws_credentials = aws_credentials
+        self.return_fig = return_fig
+    def signal_analyser(self, test_size, feature_name, days_list, threshold = 0.05,verbose = False, signal_position = False):
+        """
+        perform signal analysis and feature extraction
+        Parameters
+        ----------
+        test_size (int): test data size
+        feature_name (str): name of the feature to assess
+        days_list (list): list of integers [3,8,10] to assess
+        threshold (float): alpha or z threshold
+        verbose (boolean): print metrics
+        signal_position (int): if true, the signal is taken at the given step after the signal end
+        Returns
+        -------
+        None
+        """
+        data = self.data
+        self.feature_name = feature_name
+        up_signal, low_signal= f'signal_up_{feature_name}', f'signal_low_{feature_name}'
+        features_base = ['Date', up_signal, low_signal, 'Close']
+        df = data[features_base].sort_values('Date').iloc[0:-test_size,:]
+        returns_list = list()
+        for days in days_list:
+            feature_ = f'return_{days}d'
+            days = days + signal_position if signal_position else days
+            df[feature_] = (df['Close'].shift(-days)/df['Close']-1)*100
+            returns_list.append(feature_)
+        df['signal_type'] = np.where(
+            df[up_signal] == 1,
+            'up',
+            np.where(
+                df[low_signal] == 1,
+                'down',
+                None
+            )
+        )
         df = df[~df.signal_type.isna()]
         df['lag_Date'] = df['Date'].shift(1)
         df['lag_signal_type'] = df['signal_type'].shift(1)
@@ -3173,332 +3504,3 @@ class signal_analyser_object:
         if self.return_fig:
             return fig, messages
-def execute_signal_analyser(test_data_size, feature_name, days_list, configuration, method, object_stock, signal_analyser_object, plot = False, backtest= False, exit_params = {}):
-    '''
-    code snippet that is going run some objects. The analysis is signal analyse which is backtesting
-            Parameters:
-                    test_data_size (int): test data size
-                    feature_name (str): name of the feature to assess
-                    days_list (list): tome scope to assess the returns
-                    configuration (dict): parameters of the method to run
-                    object_stock (obj): object with data to assess
-                    signal_analyser_object (obj): signal_analyser object
-                    plot (boolean): if true, plot results
-                    backtest (boolean): if true, run backtest
-                    exit_params (dict): parameters of exit returns
-            Returns:
-                    mean_median_return (float): median return of the backtests
-    '''
-    method(**configuration)
-    signal_assess = signal_analyser_object(object_stock.df,object_stock.stock_code,show_plot = plot)
-    signal_assess.signal_analyser(test_size = test_data_size, feature_name = feature_name, days_list = days_list, threshold = 1)
-    if backtest:
-        print('-----------------------back test ---------------------------')
-        signal_assess.create_backtest_signal(backtest, test_data_size, feature_name, **exit_params )
-    return signal_assess.mean_median_return
-def iterate_signal_analyser(test_data_size,feature_name, days_list, arguments_to_test, method, object_stock, signal_analyser_object, plot = True):
-    '''
-    code snippet is going to iterate signal analyser
-            Parameters:
-                    test_data_size (int): test data size
-                    feature_name (str): name of the feature to assess
-                    days_list (list): tome scope to assess the returns
-                    arguments_to_test: parameters to test
-                    method: methods to run
-                    object_stock (obj): object with data to assess
-                    signal_analyser_object (obj): signal_analyser object
-                    plot (boolean): if true, plot results
-            Returns:
-                    best_result (int): index from the arguments_to_test with the best result
-    '''
-    results = list()
-    for key in arguments_to_test.keys():
-        configuration = arguments_to_test.get(key)
-        mean_median_return = execute_signal_analyser(test_data_size, feature_name, days_list, configuration, method, object_stock, signal_analyser_object)
-        results.append(mean_median_return)
-    df_result = pd.DataFrame({'keys':arguments_to_test.keys(),'results':results})
-    if plot:
-        plt.plot(df_result['keys'], df_result['results'])
-        plt.scatter(df_result['keys'], df_result['results'])
-        plt.title('simulation between configurations')
-        plt.ylabel('median expected return')
-        plt.show()
-    best_result = df_result.sort_values('results',ascending = False)['keys'].values[0]
-    return best_result
-class analyse_index(stock_eda_panel):
-    """
-    class that is going to train hmm models to perform feature selection
-    Attributes
-    ----------
-    data_index : pd.DataFrame
-         name of the index
-    indexes: list
-        list of indexes
-    asset : str
-         name of the asset
-    n_obs : int
-         number of rows to extract
-    lag : int
-         lag to apply
-    data_window : str
-         5y 10y 15y
-    show_plot : bool
-         If True, show plots
-    save_path : str
-         local path for saving e.g r'C:/path/to/the/file/'
-    save_aws : str
-         remote key in s3 bucket path e.g. 'path/to/file/'
-    aws_credentials : dict
-         dict with the aws credentials
-    merger_df : pd.DataFrame
-        dataframe with the index and asset data
-    states_result = dict
-        betas and correlation score results
-    Methods
-    -------
-    process_data():
-        using stock_eda_panel, get data and merge data
-    plot_betas(sample_size=int, offset=int, subsample_ts=int):
-        display beta analysis plot
-    get_betas(subsample_ts=int)
-        get general beta and last sample beta, correlation score is included too
-    """
-    def __init__(self, index_data, asset, n_obs, lag, data_window = '5y', show_plot = False, save_path = False, save_aws = False, aws_credentials = False, return_fig = False):
-        """
-        Initialize object
-        Parameters
-        ----------
-        index_data (pd.DataFrame or str): index data dataframe or index string
-        asset (str): name of the asset
-        n_obs (int): number of rows to extract
-        lag (int): lag to apply
-        data_window (str): 5y 10y 15y
-        show_plot (bool): If True, show plots
-        save_path (str): local path for saving e.g r'C:/path/to/the/file/'
-        save_aws (str): remote key in s3 bucket path e.g. 'path/to/file/'
-        aws_credentials (dict): dict with the aws credentials
-        Returns
-        -------
-        None
-        """
-        if type(index_data) != str:
-            index_data['Date'] = pd.to_datetime(index_data['Date'])
-            self.index_data = index_data
-            self.indexes = [ x for x in list(index_data.columns) if x != 'Date']
-        else:
-            self.indexes = [index_data]
-        self.index_data = index_data
-        self.asset = asset
-        self.n_obs = n_obs
-        self.data_window = data_window
-        self.lag = lag
-        self.show_plot = show_plot
-        self.return_fig = return_fig
-        self.save_path = save_path
-        self.save_aws = save_aws
-    def process_data(self):
-        """
-        using stock_eda_panel, get data and merge data
-        Parameters
-        ----------
-        None
-        Returns
-        -------
-        None
-        """
-        asset =  stock_eda_panel(self.asset, self.n_obs, data_window=self.data_window)
-        asset.get_data()
-        df = asset.df[['Date','Close']]
-        if type(self.index_data) != str:
-            df_merge = df.merge(self.index_data, on = ['Date'], how = 'left').sort_values('Date')
-        else:
-            indx =  stock_eda_panel(self.index_data, self.n_obs, data_window=self.data_window)
-            indx.get_data()
-            indx_df = indx.df[['Date','Close']].rename(columns = {'Close':self.index_data})
-            df_merge = df.merge(indx_df, on = ['Date'], how = 'left').sort_values('Date')
-        for colx in ['Close'] + self.indexes:
-            df_merge[f'{colx}_pct'] = df_merge[colx]/df_merge[colx].shift(self.lag) - 1
-        df_merge.dropna(inplace = True)
-        self.merger_df = df_merge.rename(columns = {'Close_pct': 'asset_return'})
-    def plot_betas(self,sample_size, offset, subsample_ts =False, index = False):
-        """
-        display beta analysis plot
-        Parameters
-        ----------
-        sample_size (int): number of days or window size to calculate beta
-        offset (int): overlap between windows
-        subsample_ts (int): subsample size of data
-        Returns
-        -------
-        None
-        """
-        if (type(self.index_data) == str) & (index != False):
-            raise Exception("No need of index argument")
-        else:
-            index = self.indexes[0]
-        index_pct = f'{index}_pct'
-        ### ploting analysis
-        figure, ax = plt.subplot_mosaic(
-            [["scatter_total", "scatter_sample",'ts','ts']],
-            layout="constrained",
-            figsize=(18, 5)
-        )
-        ax['scatter_total'].scatter(self.merger_df.asset_return, self.merger_df[index_pct])
-        huber_regr = HuberRegressor(fit_intercept = True)
-        huber_regr.fit(self.merger_df.asset_return.values.reshape(-1,1), self.merger_df[index_pct].values.reshape(-1,1))
-        b, a = huber_regr.coef_[0], huber_regr.intercept_
-        # b, a = np.polyfit(self.merger_df.asset_return, self.merger_df[index_pct], 1)
-        ax['scatter_total'].plot(self.merger_df.asset_return, b*self.merger_df.asset_return+a, color='red')
-        ax['ts'].plot(self.merger_df.Date, self.merger_df.Close, color = 'grey', alpha = 0.3)
-        if subsample_ts:
-            self.merger_df = self.merger_df.iloc[-subsample_ts:,:].dropna()
-        for i in range(0,len(self.merger_df)-sample_size,offset):
-            merger_ = self.merger_df.sort_values('Date', ascending = False).iloc[i:i+sample_size,:]
-            x = merger_[index_pct]
-            y = merger_.asset_return
-            # b, a = np.polyfit(x,y, 1)
-            huber_regr = HuberRegressor(fit_intercept = True)
-            huber_regr.fit(x.values.reshape(-1,1), y.values.reshape(-1,1))
-            b, a = huber_regr.coef_[0], huber_regr.intercept_
-            normalize = mcolors.Normalize(vmin=-1, vmax=1)
-            colormap = cm.jet
-            ax['scatter_sample'].plot(x, y,'o', color = 'blue', alpha = 0.1)
-            ax['scatter_sample'].plot(x, b*x+a, color=colormap(normalize(b)))
-            ax['scatter_sample'].set_xlim(-0.06, 0.06)
-            ax['scatter_sample'].set_ylim(-0.06, 0.06)
-            plot = ax['ts'].scatter(merger_.Date, merger_.Close, color=colormap(normalize(b)), s = 10)
-        scalarmappaple = cm.ScalarMappable(norm=normalize, cmap=colormap)
-        scalarmappaple.set_array(x)
-        plt.title(f'{self.asset} using index: {index}')
-        plt.colorbar(scalarmappaple)
-        if self.show_plot:
-            plt.show()
-        if self.save_path:
-            result_plot_name = f'market_best_fit.png'
-            figure.savefig(self.save_path+result_plot_name)
-        if self.save_path and self.save_aws:
-            # upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = f'market_plots/{self.asset}/'+result_plot_name,input_path = self.save_path+result_plot_name)
-            upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = self.save_aws + result_plot_name, input_path = self.save_path + result_plot_name, aws_credentials = self.aws_credentials)
-        if not self.show_plot:
-            plt.close()
-        if self.return_fig:
-            return figure
-    def get_betas(self,subsample_ts=False):
-        """
-        get general beta and last sample beta, correlation score is included too
-        Parameters
-        ----------
-        subsample_ts (int): subsample size of data
-        Returns
-        -------
-        None
-        """
-        result = list()
-        for index in self.indexes:
-            index_pct = f'{index}_pct'
-            huber_regr = HuberRegressor(fit_intercept = True)
-            huber_regr.fit(self.merger_df.asset_return.values.reshape(-1,1), self.merger_df[index_pct].values.reshape(-1,1))
-            general_beta, a = huber_regr.coef_[0], huber_regr.intercept_
-            general_r = stats.mstats.pearsonr(self.merger_df.asset_return, self.merger_df[index])[0]
-            dict_res = {
-                    'index':index,
-                    'general_beta':general_beta,
-                    'general_r':general_r,
-                }
-            if subsample_ts:
-                tmp_df = self.merger_df.iloc[-subsample_ts:,:].dropna()
-                huber_regr = HuberRegressor(fit_intercept = True)
-                huber_regr.fit(tmp_df.asset_return.values.reshape(-1,1), tmp_df[index_pct].values.reshape(-1,1))
-                sample_beta, a = huber_regr.coef_[0], huber_regr.intercept_
-                sample_r = stats.mstats.pearsonr(tmp_df.asset_return, tmp_df[index])[0]
-                dict_res['sample_beta'] = sample_beta
-                dict_res['sample_r'] = sample_r
-            result.append(dict_res)
-        self.states_result = result
-def get_relevant_beta(data_market, ticket_name,  show_plot = True, save_path = False, save_aws = False, aws_credentials = False):
-    '''
-    select relevant beta result data of a given asset
-            Parameters:
-                    data_market (pd.DataFrame): dataframe of the market results
-                    ticket_name (str): name of the asset
-                    show_plot (bool): If tru, plot results
-                    save_path (str): local path for saving e.g r'C:/path/to/the/file/'
-                    save_aws (str):  remote key in s3 bucket path e.g. 'path/to/file/'
-                    aws_credentials (dict): dict of the aws credentials
-            Returns:
-                    selection (pd.DataFrame): dataframe of the most relevant beta
-    '''
-    all_betas = data_market[data_market.asset == ticket_name].sort_values('general_r', ascending = False)
-    all_betas['gen_r2'] = all_betas.general_r ** 2
-    all_betas['sampl_r2'] = all_betas.sample_r ** 2
-    selection = all_betas.sort_values('gen_r2',ascending =False).head(2).sort_values('sampl_r2',ascending =False).head(1).drop(columns = ['gen_r2','sampl_r2'])
-    if show_plot:
-        print(selection)
-    if save_path:
-        result_plot_name = f'market_best_fit.csv'
-        selection.to_csv(save_path+result_plot_name)
-    if save_path and save_aws:
-        # upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = f'market_plots/{ticket_name}/'+result_plot_name,input_path = save_path+result_plot_name)
-        upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = save_aws + result_plot_name, input_path = save_path + result_plot_name, aws_credentials = aws_credentials)
-    return selection

{virgo_modules-0.0.90 → virgo_modules-0.1.0}/virgo_app/virgo_modules.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: virgo-modules
-Version: 0.0.90
+Version: 0.1.0
 Summary: data processing and statistical modeling using stock market data
 Home-page: https://github.com/miguelmayhem92/virgo_module
 Author: Miguel Mayhuire

{virgo_modules-0.0.90 → virgo_modules-0.1.0}/virgo_app/virgo_modules.egg-info/SOURCES.txt RENAMED Viewed

@@ -9,6 +9,7 @@ virgo_app/virgo_modules.egg-info/requires.txt
 virgo_app/virgo_modules.egg-info/top_level.txt
 virgo_app/virgo_modules/src/__init__.py
 virgo_app/virgo_modules/src/aws_utils.py
+virgo_app/virgo_modules/src/backtester.py
 virgo_app/virgo_modules/src/edge_utils.py
 virgo_app/virgo_modules/src/pull_artifacts.py
 virgo_app/virgo_modules/src/re_utils.py