PyPI - virgo-modules - Versions diffs - 0.0.88__py3-none-any.whl → 0.0.90__py3-none-any.whl - Mend

virgo-modules 0.0.88py3-none-any.whl → 0.0.90py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of virgo-modules might be problematic. Click here for more details.

Files changed (7) hide show

virgo_modules/src/edge_utils.py CHANGED Viewed

@@ -10,7 +10,7 @@ from virgo_modules.src.ticketer_source import FeatureSelector
 from feature_engine.discretisation import EqualWidthDiscretiser
 from feature_engine.datetime import DatetimeFeatures
-from .ticketer_source import VirgoWinsorizerFeature, InverseHyperbolicSine
+from .ticketer_source import VirgoWinsorizerFeature, InverseHyperbolicSine, FeaturesEntropy
 class produce_model_wrapper:
     """
@@ -90,8 +90,8 @@ class produce_model_wrapper:
         self.model = model
         self.pipe_transform = pipe
         self.pipeline = Pipeline([('pipe_transform',self.pipe_transform), ('model',self.model)])
-        self.features_to_model = self.pipe_transform.fit_transform(self.X_train).columns
         self.pipeline.fit(self.X_train, self.y_train)
+        self.features_to_model = self.pipeline[:-1].transform(self.X_train).columns
 class register_results():
     """
@@ -217,6 +217,7 @@ def data_processing_pipeline_classifier(
         bins_discretize = 10, correlation = 0.85, fillna = True,
         invhypervolsin_features = False,
         date_features_list = False,
+        entropy_set_list = False,
         pipeline_order = 'selector//winzorizer//discretizer//median_inputer//drop//correlation'
         ):
@@ -233,6 +234,7 @@ def data_processing_pipeline_classifier(
                     fillna (boolean): if true to fill na features
                     invhypervolsin_features (list): list of features to apply inverse hyperbolic sine
                     date_features_list (list): list of features to compute from Date field. (list of features from feature_engine)
+                    entropy_set_list (list): list of dictionaries that contains features and targets to compute entropy
                     pipeline_order (str): custom pipeline order eg. selector//winzorizer//discretizer//median_inputer//drop//correlation
             Returns:
                     pipe (obj): pipeline object
@@ -245,7 +247,15 @@ def data_processing_pipeline_classifier(
     median_imputer_pipe = [('median_imputer', MeanMedianImputer())] if fillna else []
     invhypersin_pipe = [('invhypervolsin scaler', InverseHyperbolicSine(features = invhypervolsin_features))] if invhypervolsin_features else []
     datetimeFeatures_pipe = [('date features', DatetimeFeatures(features_to_extract = date_features_list, variables = 'Date', drop_original = False))] if date_features_list else []
+    entropy_pipe = list()
+    if entropy_set_list:
+        for setx_ in entropy_set_list:
+            setx = setx_['set'].split('//')
+            target_ = setx_['target']
+            subpipe_name = '_'.join(setx) + 'entropy'
+            entropy_pipe.append((subpipe_name, FeaturesEntropy(features = setx, target = target_)))
     pipe_dictionary = {
         'selector': select_pipe,
         'winzorizer':winzorizer_pipe,
@@ -255,6 +265,7 @@ def data_processing_pipeline_classifier(
         'median_inputer':median_imputer_pipe,
         'arcsinh_scaler': invhypersin_pipe,
         'date_features': datetimeFeatures_pipe,
+        'entropy_features' : entropy_pipe,
     }
     pipeline_steps = pipeline_order.split('//')

virgo_modules/src/ticketer_source.py CHANGED Viewed

@@ -147,6 +147,109 @@ class FeatureSelector(BaseEstimator, TransformerMixin):
     def transform(self, X, y=None):
         return X[self.columns]
+class FeaturesEntropy(BaseEstimator, TransformerMixin):
+    """
+    Class that creates a feature that calculate entropy for a given feature classes, but it might get some leackeage in the training set.
+    this class is compatible with scikitlearn pipeline
+    Attributes
+    ----------
+    columns : list
+        list of features to select
+    entropy_map: pd.DataFrame
+        dataframe of the map with the entropies per class
+    perc: float
+        percentage of the dates using for calculate the entropy map
+    Methods
+    -------
+    fit(additional="", X=DataFrame, y=None):
+        fit transformation.
+    transform(X=DataFrame, y=None):
+        apply feature transformation
+    """
+    def __init__(self, features, target, feature_name = None, feature_type = 'discrete', perc = 0.5, default_null = 0.99):
+        self.features = features
+        self.feature_type = feature_type
+        self.target = target
+        self.perc = perc
+        self.default_null = default_null
+        if not feature_name:
+            self.feature_name = '_'.join(features)
+            self.feature_name = self.feature_name + '_' + target + '_' + feature_type
+        else:
+            self.feature_name = feature_name
+    def fit(self, X, y=None):
+        unique_dates = list(X['Date'].unique())
+        unique_dates.sort()
+        total_length = len(unique_dates)
+        cut = int(round(total_length*self.perc,0))
+        train_dates = unique_dates[:cut]
+        max_train_date = max(train_dates)
+        X_ = X[X['Date'] <= max_train_date].copy()
+        df = X_.join(y, how = 'left')
+        column_list = [f'{self.feature_type}_signal_{colx}' for colx in self.features]
+        df_aggr = (
+            df
+            .groupby(column_list, as_index = False)
+            .apply(
+                lambda x: pd.Series(
+                    dict(
+                        counts = x[self.target].count(),
+                        trues=(x[self.target] == 1).sum(),
+                        falses=(x[self.target] == 0).sum(),
+                    )
+                )
+            )
+            .assign(
+                trues_rate=lambda x: x['trues'] / x['counts']
+            )
+            .assign(
+                falses_rate=lambda x: x['falses'] / x['counts']
+            )
+            .assign(
+                log2_trues = lambda x: np.log2(1/x['trues_rate'])
+            )
+            .assign(
+                log2_falses = lambda x: np.log2(1/x['falses_rate'])
+            )
+            .assign(
+                comp1 = lambda x: x['trues_rate']*x['log2_trues']
+            )
+            .assign(
+                comp2 = lambda x: x['falses_rate']*x['log2_falses']
+            )
+            .assign(
+                class_entropy = lambda x: np.round(x['comp1']+x['comp2'],3)
+            )
+        )
+        self.column_list = column_list
+        self.entropy_map = (
+            df_aggr
+            [column_list+['class_entropy']]
+            .rename(columns = {'class_entropy': self.feature_name})
+            .copy()
+        )
+        del df, df_aggr, X_
+        return self
+    def transform(self, X, y=None):
+        X = X.join(self.entropy_map.set_index(self.column_list), on=self.column_list, how = 'left')
+        X[self.feature_name] = X[self.feature_name].fillna(self.default_null)
+        return X
 def sharpe_ratio(return_series):
     '''
@@ -2495,9 +2598,8 @@ class produce_model:
         self.model = model
         self.pipe_transform = pipe
         self.pipeline = Pipeline([('pipe_transform',self.pipe_transform), ('model',self.model)])
-        self.features_to_model = self.pipe_transform.fit_transform(self.X_train).columns
         self.pipeline.fit(self.X_train, self.y_train)
+        self.features_to_model = self.pipeline[:-1].transform(self.X_train).columns
 class hmm_feature_selector():
     """

{virgo_modules-0.0.88.dist-info → virgo_modules-0.0.90.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: virgo-modules
-Version: 0.0.88
+Version: 0.0.90
 Summary: data processing and statistical modeling using stock market data
 Home-page: https://github.com/miguelmayhem92/virgo_module
 Author: Miguel Mayhuire

{virgo_modules-0.0.88.dist-info → virgo_modules-0.0.90.dist-info}/RECORD RENAMED Viewed

@@ -1,12 +1,12 @@
 virgo_modules/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 virgo_modules/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 virgo_modules/src/aws_utils.py,sha256=q0l7D7ofo09Lu1QQjv-esheQ06uiSy1Pdq3xMul8zvk,2571
-virgo_modules/src/edge_utils.py,sha256=tMpt0bfnoOyD_qqh4wD6TQeOhaMcGE59DbvIj3qnp-0,13732
+virgo_modules/src/edge_utils.py,sha256=i3Hm3fO-QA-u17jDpnRodLLILMWZ2VTMEkMKijdGKLg,14287
 virgo_modules/src/pull_artifacts.py,sha256=5OPrgR7pcMSdpbevDRhf0ebk7g7ZRjff4NpTIIWAKjE,1989
 virgo_modules/src/re_utils.py,sha256=ndPUW3F0QkljtKLR1dqtBm2I2LtceduSgLRIk3HszWk,72244
-virgo_modules/src/ticketer_source.py,sha256=mhNPWbluKYVqpX0E8Uh6fTXi1Bn7zsG6rHIp_TklZr0,146629
-virgo_modules-0.0.88.dist-info/LICENSE,sha256=pNgFyCYgmimaw0o6V20JupZLROycAnOA_HDDh1tX2V4,1097
-virgo_modules-0.0.88.dist-info/METADATA,sha256=C1I5H8ceh1-j9gZW7nykhZvzs952oy0Aqx9dWXkufBY,1429
-virgo_modules-0.0.88.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
-virgo_modules-0.0.88.dist-info/top_level.txt,sha256=ZjI-qEkDtT-8mFwGAWnXfqPOKEGlIhWRW1es1VyXc60,14
-virgo_modules-0.0.88.dist-info/RECORD,,
+virgo_modules/src/ticketer_source.py,sha256=30xCmfL16SHMPQOs4qKsKSfvfdfv-9IkYY4X9gJgx70,150116
+virgo_modules-0.0.90.dist-info/LICENSE,sha256=pNgFyCYgmimaw0o6V20JupZLROycAnOA_HDDh1tX2V4,1097
+virgo_modules-0.0.90.dist-info/METADATA,sha256=6KCZW4HK_io_AsQjBV733cVNeNlyRKqJ6MdFCFdmTWY,1429
+virgo_modules-0.0.90.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
+virgo_modules-0.0.90.dist-info/top_level.txt,sha256=ZjI-qEkDtT-8mFwGAWnXfqPOKEGlIhWRW1es1VyXc60,14
+virgo_modules-0.0.90.dist-info/RECORD,,

{virgo_modules-0.0.88.dist-info → virgo_modules-0.0.90.dist-info}/LICENSE RENAMED Viewed

File without changes

{virgo_modules-0.0.88.dist-info → virgo_modules-0.0.90.dist-info}/WHEEL RENAMED Viewed

File without changes

{virgo_modules-0.0.88.dist-info → virgo_modules-0.0.90.dist-info}/top_level.txt RENAMED Viewed

File without changes

virgo-modules 0.0.88__py3-none-any.whl → 0.0.90__py3-none-any.whl

Potentially problematic release.

virgo-modules 0.0.88py3-none-any.whl → 0.0.90py3-none-any.whl