PyPI - virgo-modules - Versions diffs - 0.0.87__tar.gz → 0.0.89__tar.gz - Mend

virgo-modules 0.0.87tar.gz → 0.0.89tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of virgo-modules might be problematic. Click here for more details.

Files changed (17) hide show

{virgo_modules-0.0.87 → virgo_modules-0.0.89}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: virgo_modules
-Version: 0.0.87
+Version: 0.0.89
 Summary: data processing and statistical modeling using stock market data
 Home-page: https://github.com/miguelmayhem92/virgo_module
 Author: Miguel Mayhuire

{virgo_modules-0.0.87 → virgo_modules-0.0.89}/setup.py RENAMED Viewed

@@ -5,7 +5,7 @@ with open("virgo_app/README.md", "r") as f:
 setup(
     name="virgo_modules",
-    version="0.0.87",
+    version="0.0.89",
     description="data processing and statistical modeling using stock market data",
     package_dir={"": "virgo_app"},
     packages=find_packages(where="virgo_app"),

{virgo_modules-0.0.87 → virgo_modules-0.0.89}/virgo_app/virgo_modules/src/re_utils.py RENAMED Viewed

@@ -700,7 +700,11 @@ def get_data(ticker_name:str, ticket_settings:dict, n_days:int = False, hmm_avai
         'stochastic_feature':'stochastic_feature',
         'william_feature':'william_feature',
         'vortex_feature':'vortex_feature',
-        'pair_index_feature':'pair_index_feature' # this has a diff structure!
+        'pair_index_feature':'pair_index_feature', # this has a diff structure!
+        'min_distance_pricefeature':'minmax_pricefeature',
+        'min_relprice_pricefeature':'minmax_pricefeature',
+        'max_distance_pricefeature':'minmax_pricefeature',
+        'max_relprice_pricefeature':'minmax_pricefeature',
     }
     exceptions = ['pair_feature','pair_index_feature']
     ### standar feature

{virgo_modules-0.0.87 → virgo_modules-0.0.89}/virgo_app/virgo_modules/src/ticketer_source.py RENAMED Viewed

@@ -147,6 +147,109 @@ class FeatureSelector(BaseEstimator, TransformerMixin):
     def transform(self, X, y=None):
         return X[self.columns]
+class features_entropy(BaseEstimator, TransformerMixin):
+    """
+    Class that creates a feature that calculate entropy for a given feature classes, but it might get some leackeage in the training set.
+    this class is compatible with scikitlearn pipeline
+    Attributes
+    ----------
+    columns : list
+        list of features to select
+    entropy_map: pd.DataFrame
+        dataframe of the map with the entropies per class
+    perc: float
+        percentage of the dates using for calculate the entropy map
+    Methods
+    -------
+    fit(additional="", X=DataFrame, y=None):
+        fit transformation.
+    transform(X=DataFrame, y=None):
+        apply feature transformation
+    """
+    def __init__(self, features, target, feature_name = None, feature_type = 'discrete', perc = 0.5, default_null = 0.99):
+        self.features = features
+        self.feature_type = feature_type
+        self.target = target
+        self.perc = perc
+        self.default_null = default_null
+        if not feature_name:
+            self.feature_name = '_'.join(features)
+            self.feature_name = self.feature_name + '_' + target + '_' + feature_type
+        else:
+            self.feature_name = feature_name
+    def fit(self, X, y=None):
+        unique_dates = list(X['Date'].unique())
+        unique_dates.sort()
+        total_length = len(unique_dates)
+        cut = int(round(total_length*self.perc,0))
+        train_dates = unique_dates[:cut]
+        max_train_date = max(train_dates)
+        X_ = X[X['Date'] <= max_train_date]
+        df = pd.merge(X_, y, left_index=True, right_index=True, how = 'left').copy()
+        column_list = [f'{self.feature_type}_signal_{colx}' for colx in self.features]
+        df_aggr = (
+            df
+            .groupby(column_list, as_index = False)
+            .apply(
+                lambda x: pd.Series(
+                    dict(
+                        counts = x[self.target].count(),
+                        trues=(x[self.target] == 1).sum(),
+                        falses=(x[self.target] == 0).sum(),
+                    )
+                )
+            )
+            .assign(
+                trues_rate=lambda x: x['trues'] / x['counts']
+            )
+            .assign(
+                falses_rate=lambda x: x['falses'] / x['counts']
+            )
+            .assign(
+                log2_trues = lambda x: np.log2(1/x['trues_rate'])
+            )
+            .assign(
+                log2_falses = lambda x: np.log2(1/x['falses_rate'])
+            )
+            .assign(
+                comp1 = lambda x: x['trues_rate']*x['log2_trues']
+            )
+            .assign(
+                comp2 = lambda x: x['falses_rate']*x['log2_falses']
+            )
+            .assign(
+                class_entropy = lambda x: np.round(x['comp1']+x['comp2'],3)
+            )
+        )
+        self.column_list = column_list
+        self.entropy_map = (
+            df_aggr
+            [column_list+['class_entropy']]
+            .rename(columns = {'class_entropy': self.feature_name})
+            .copy()
+        )
+        del df, df_aggr
+        return self
+    def transform(self, X, y=None):
+        X = X.merge(self.entropy_map, on=self.column_list, how = 'left')
+        X[self.feature_name] = X[self.feature_name].fillna(self.default_null)
+        return X
 def sharpe_ratio(return_series):
     '''
@@ -383,6 +486,8 @@ class stock_eda_panel(object):
         perfom fast stochastic oscilator or william indicator
     vortex_feature(window=int, threshold=float, plot=boolean, save_features=boolean):
         perform vortex oscilator
+    minmax_pricefeature(type_func=str, window=int, distance=bolean, save_features=boolean)
+        get relative price/ distance feature with respect to the min/max price in a given window
     pair_index_feature(pair_symbol=str, feature_label=str, window=int, threshold=float, plot=boolean, save_features=boolean):
         perform additional asset ROC feature, then a new feature is created in the main dataframe
     produce_order_features(feature_name=str, save_features=boolean):
@@ -1698,6 +1803,70 @@ class stock_eda_panel(object):
         if plot:
             self.signal_plotter(feature_name)
+    def minmax_pricefeature(self, type_func, window, distance = False, save_features = False):
+        """
+        perform relative price/distance with respect to the min/max price in a given time scope
+        Parameters
+        ----------
+        type_func (str): either min or max
+        window (int): window scope
+        distance (boolean): if true, get distance feature else relative feature
+        save_features (boolean): True to save feature configuration and feature names
+        Returns
+        -------
+        None
+        """
+        if type_func == 'min':
+            self.df['Price_ref'] = self.df[['Open','High', 'Low','Close']].min(axis = 1)
+        elif type_func == 'max':
+            self.df['Price_ref'] = self.df[['Open','High', 'Low','Close']].max(axis = 1)
+        init_shape = self.df.shape[0]
+        df_date = self.df[['Date','Price_ref']].rename(columns = {'Date':'Date_ref'}).copy()
+        self.df = self.df.rename(columns = {'Price_ref':'Price_to_use'})
+        if type_func == 'min':
+            self.df[f'window_price'] = (self.df.sort_values("Date")["Price_to_use"].transform(lambda x: x.rolling(window, min_periods=1).min()))
+        elif type_func == 'max':
+            self.df[f'window_price'] = (self.df.sort_values("Date")["Price_to_use"].transform(lambda x: x.rolling(window, min_periods=1).max()))
+        self.df = self.df.merge(df_date, left_on = 'window_price', right_on = 'Price_ref', how = 'left')
+        self.df['date_span'] = self.df['Date'] - self.df['Date_ref']
+        self.df['RN'] = self.df.sort_values(['date_span'], ascending=False).groupby(['Date']).cumcount() + 1
+        self.df = self.df[self.df['RN'] == 1]
+        if distance:
+            self.df[f'{type_func}_distance_to_price'] = pd.to_numeric(self.df['date_span'].dt.days, downcast='integer')
+        if not distance:
+            if type_func == 'min':
+                self.df[f'{type_func}_relprice'] = self.df['Price_to_use']/self.df['window_price']-1
+            if type_func == 'max':
+                self.df[f'{type_func}_relprice'] = self.df['window_price']/self.df['Price_to_use']-1
+        self.df = self.df.drop(columns = ['RN', 'date_span', 'Price_to_use', 'window_price', 'Date_ref','Price_ref'])
+        end_shape = self.df.shape[0]
+        if init_shape != end_shape:
+            raise Exception("shapes are not the same")
+        if save_features:
+            if distance:
+                self.features.append(f'{type_func}_distance_to_price')
+                name_attr = f'{type_func}_distance'
+            if not distance:
+                self.features.append(f'{type_func}_relprice')
+                name_attr = f'{type_func}_relprice'
+            setattr(self,f'settings_{name_attr}_pricefeature' , {'type_func': type_func, 'window': window, 'distance': distance})
     def pair_index_feature(self, pair_symbol, feature_label, window, threshold, plot = False, save_features = False):
         """
         perform additional asset ROC feature, then a new feature is created in the main dataframe
@@ -2297,7 +2466,9 @@ class stock_eda_panel(object):
         ## for now this is hard coded
         feature_list = ['spread_ma','relative_spread_ma','pair_feature','count_features','bidirect_count_features','price_range','relative_price_range','rsi_feature',
                         'rsi_feature_v2', 'days_features','days_features_v2', 'volume_feature','smooth_volume', 'roc_feature', 'stoch_feature', 'stochastic_feature',
-                        'william_feature', 'vortex_feature', 'pair_index_feature','hmm']
+                        'william_feature', 'vortex_feature', 'pair_index_feature','hmm',
+                        'min_distance_pricefeature', 'min_relprice_pricefeature', 'max_distance_pricefeature','max_relprice_pricefeature'
+                        ]
         for feature in feature_list:
             try:

{virgo_modules-0.0.87 → virgo_modules-0.0.89}/virgo_app/virgo_modules.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: virgo-modules
-Version: 0.0.87
+Version: 0.0.89
 Summary: data processing and statistical modeling using stock market data
 Home-page: https://github.com/miguelmayhem92/virgo_module
 Author: Miguel Mayhuire