PyPI - virgo-modules - Versions diffs - 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl - Mend

virgo-modules 0.7.0py3-none-any.whl → 0.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of virgo-modules might be problematic. Click here for more details.

Files changed (10) hide show

virgo_modules/src/edge_utils/edge_utils.py CHANGED Viewed

@@ -11,7 +11,14 @@ from feature_engine.imputation import  MeanMedianImputer
 from feature_engine.discretisation import EqualWidthDiscretiser
 from feature_engine.datetime import DatetimeFeatures
-from ..transformer_utils import VirgoWinsorizerFeature, InverseHyperbolicSine, FeaturesEntropy, FeatureSelector, InteractionFeatures
+from ..transformer_utils import (
+    VirgoWinsorizerFeature,
+    InverseHyperbolicSine,
+    FeaturesEntropy,
+    FeatureSelector,
+    InteractionFeatures,
+    SplineMarketReturnJumpWaves
+)
 from plotly.subplots import make_subplots
 import plotly.graph_objects as go
@@ -223,6 +230,7 @@ def data_processing_pipeline_classifier(
         date_features_list = False,
         entropy_set_list = False,
         interaction_features_cont = False,
+        spline_regression_config = False,
         pipeline_order = 'selector//winzorizer//discretizer//median_inputer//drop//correlation'
         ):
@@ -254,7 +262,12 @@ def data_processing_pipeline_classifier(
     invhypersin_pipe = [('invhypervolsin scaler', InverseHyperbolicSine(features = invhypervolsin_features))] if invhypervolsin_features else []
     datetimeFeatures_pipe = [('date features', DatetimeFeatures(features_to_extract = date_features_list, variables = 'Date', drop_original = False))] if date_features_list else []
     interaction_features = [("interaction features", InteractionFeatures(interaction_features_cont[0], interaction_features_cont[1]))] if interaction_features_cont else []
+    spline_features = [("spline features", SplineMarketReturnJumpWaves(
+        return_feature_names=spline_regression_config.get("return_feature_names"),
+        target_variables=spline_regression_config.get("target_variables"),
+        feature_label=spline_regression_config.get("feature_label"),
+    ))] if spline_regression_config else []
     entropy_pipe = list()
     if entropy_set_list:
         for setx_ in entropy_set_list:
@@ -274,6 +287,7 @@ def data_processing_pipeline_classifier(
         'date_features': datetimeFeatures_pipe,
         'interaction_features': interaction_features,
         'entropy_features' : entropy_pipe,
+        "spline_features": spline_features,
     }
     pipeline_steps = pipeline_order.split('//')

virgo_modules/src/edge_utils/feature_selection.py ADDED Viewed

@@ -0,0 +1,54 @@
+import random
+from numpy.random import choice
+import numpy as np
+from scipy import stats
+from sklearn.feature_selection import RFE
+class StackRFE:
+    def __init__(self, model, n_features, batch_elim, step_elim, cv, max_iterations):
+        self.model = model
+        self.n_features = n_features
+        self.batch_elim = batch_elim
+        self.step_elim = step_elim
+        self.cv = cv
+        self.max_iterations = max_iterations
+    def _suggest_elimination(self, uniform=False):
+        """
+        suggest based on mean ranking, lower the mean rank higher the prob to be selected
+        """
+        ds = self.feature_rankings
+        ds_mean = {k:np.mean(ds.get(k)) for k in ds}
+        max_ = np.max([x for x in ds_mean.values()])
+        ds_weight = {k: (max_-v+1) for k,v in ds_mean.items()}
+        sum_ = np.sum([x for x in ds_weight.values()])
+        ds_prob = {k: v/sum_ for k,v in ds_weight.items()}
+        result = list(choice(list(ds_prob.keys()), self.batch_elim,p=list(ds_prob.values()), replace=False))
+        if uniform:
+            features = list(ds_prob.keys())
+            random.shuffle(features)
+            result = features[0:self.batch_elim]
+        return result
+    def fit(self, X, y):
+        features = list(X.columns).copy()
+        self.feature_rankings = {f:[1] for f in features}
+        for iteration in range(self.max_iterations):
+            # shuffling
+            if random.random() > 0.5:
+                batch_features = self._suggest_elimination()
+            else:
+                batch_features = self._suggest_elimination()
+            # selector and elimination
+            tmp_feature_ranking = {k: list() for k in batch_features}
+            selector = RFE(self.model, n_features_to_select=self.n_features, step=self.step_elim)
+            for train_index, test_index in self.cv.split(X, y):
+                X_ = X[X.index.get_level_values('i').isin(train_index)][batch_features]
+                y_ = y[y.index.get_level_values('i').isin(train_index)]
+                selector = selector.fit(X_, y_)
+                for k,r in zip(tmp_feature_ranking.keys(), selector.ranking_):
+                    tmp_feature_ranking[k].append(r)
+            rankings = [stats.mode(v).mode for v in tmp_feature_ranking.values()]
+            for f,r in zip(batch_features, rankings):
+                self.feature_rankings[f].append(r)

virgo_modules/src/re_utils.py CHANGED Viewed

@@ -1421,6 +1421,12 @@ def extract_data_traintest(object_stock,features_to_search,configs, target_confi
                 last_signal_featlist = last_signal_featlist.split('//')
                 if feature_name in last_signal_featlist:
                     object_stock.compute_last_signal(feature_name, False)
+    market_interaction_features = configs.get('custom_transformations',{}).get('market_interaction_features', False)
+    if market_interaction_features:
+        for stage in market_interaction_features.keys():
+            method_to_use = market_interaction_features.get(stage).get("method")
+            arguments_to_use = market_interaction_features.get(stage).get("parameters")
+            getattr(object_stock, method_to_use)(**arguments_to_use)
     # geting targets
     object_stock.get_categorical_targets(**target_params_up)
     object_stock.df = object_stock.df.drop(columns = ['target_down']).rename(columns = {'target_up':'target_up_save'})

virgo_modules/src/ticketer_source.py CHANGED Viewed

@@ -809,13 +809,11 @@ class stock_eda_panel(object):
             .transform(lambda x: x.rolling(ma2, min_periods=1).mean())
         )
-        print('--------------------------------------------------------------------')
         if save_features:
             self.log_features_standard(feature_name)
             self.settings_relative_spread_ma = {'ma1':ma1, 'ma2':ma2, 'threshold':threshold}
         if plot:
             self.signal_plotter(feature_name)
     def pair_feature(self, pair_symbol, plot = False):
@@ -871,6 +869,24 @@ class stock_eda_panel(object):
             plt.legend()
             plt.show()
+    def smooth_logrets_interaction_term(self, feature_interact_with, resulting_feature_name="persisted_clip_diff_smooths", rollmean_window = 5, ext_threhold=0.015, persist_days = 3, save_features=False):
+        """
+        create an interaction term that is going to compare the distance of asset wolling window mean and market rolling window mean.
+        then get the outliers or high values using abs and this value persist for some days
+        goal persist big differences of market and asset returns
+        feature_interact_with: name of the market return
+        rollmean_window: rolling window or smoothing number of days
+        ext_threhold: threshold
+        persist_days: number of days to persis the signal
+        """
+        self.df["smooth_log_return"] = self.df['log_return'].rolling(rollmean_window).mean().values
+        self.df["smooth_market_log_return"] = self.df[feature_interact_with].rolling(rollmean_window).mean().values
+        self.df["diff_smooths"] = self.df["smooth_market_log_return"]-self.df["smooth_log_return"]
+        self.df["clip_diff_smooths"] = np.where(np.abs(self.df["diff_smooths"]) > ext_threhold, self.df["diff_smooths"] , 0)
+        self.df[resulting_feature_name] = self.df['clip_diff_smooths'].rolling(persist_days).mean().values
+        self.df = self.df.drop(columns=["smooth_log_return","smooth_market_log_return","diff_smooths","clip_diff_smooths"])
     def calculate_cointegration(self,series_1, series_2):
         """
         calculate cointegration score for two time series

virgo_modules/src/transformer_utils.py CHANGED Viewed

@@ -1,6 +1,11 @@
+import gc
 from sklearn.base import BaseEstimator, TransformerMixin
 import pandas as pd
 import numpy as np
+import statsmodels.api as sm
+from patsy import dmatrix
+import matplotlib.pyplot as plt
 class InverseHyperbolicSine(BaseEstimator, TransformerMixin):
@@ -289,3 +294,108 @@ class InteractionFeatures(BaseEstimator, TransformerMixin):
                 fn = 'iterm_'+f1.replace("norm_","")+"_"+f2.replace("norm_","")
                 X = self.simple_div_interaction(X, f1, f2, fn)
         return X
+class SplineMarketReturnJumpWaves(BaseEstimator, TransformerMixin):
+    """
+    Class that gets a feature returns and performs countings so that a spline regression model can be fitted
+    Attributes
+    ----------
+    return_feature_names : list
+        list of the name of the features to apply spline regresion
+    target_variables : list
+        list of target features
+    feature_label : str
+        prefix for the new features.
+    sample_perc : float
+        sample size of the traninig data taking into consideration time
+    Methods
+    -------
+    fit(additional="", X=DataFrame, y=DataFrame):
+        fit transformation.
+    transform(X=DataFrame, y=None):
+        apply feature transformation
+    """
+    def __init__(self, return_feature_names, target_variables, feature_label,
+                  sample_perc=0.5,parts = 6, e_floor=-0.001,e_top=0.0001, d=3):
+        self.sample_perc = sample_perc
+        self.return_feature_names=return_feature_names
+        self.target_variables = target_variables
+        self.glms = dict()
+        self.feature_label = feature_label
+        self.parts = parts
+        self.e_floor = e_floor
+        self.e_top = e_top
+        self.d = d
+    def fit(self, X, y, plot = False):
+        #complete dataset with y
+        X_set=X.copy()
+        X_set[self.target_variables] = y
+        #sampling
+        if plot:
+            fig, ax = plt.subplots(len(self.return_feature_names),1)
+        for i,return_feature_name in enumerate(self.return_feature_names):
+            X_aggregated = (
+                X_set
+                .groupby("Date",as_index=False)
+                .agg(
+                    count_target_up = ("target_up","sum"),
+                    count_target_down = ("target_down","sum"),
+                    return_feature = (return_feature_name,"max"),
+                )
+                .sort_values("Date",ascending=True)
+                .dropna()
+                .copy()
+            )
+            del X
+            gc.collect()
+            nlines = X_aggregated.shape[0]
+            threshold = int(round((1-nlines*self.sample_perc),0))
+            train_ = X_aggregated.iloc[:threshold,:]
+            self.glms[return_feature_name] = dict()
+            for target in self.target_variables:
+                X = train_[["return_feature"]].round(4).values.reshape(-1, 1)
+                y = np.log(train_.dropna()[f"count_{target}"].values + 1)
+                knot_str = self._get_knot(X)
+                transformed_x = dmatrix(f"bs(train, knots=({knot_str}), degree=3, include_intercept=False)", {"train": X}, return_type='dataframe')
+                model = sm.GLM(y, transformed_x).fit()
+                self.glms[return_feature_name][target] = {
+                    "model":model,
+                }
+                if plot:
+                    x_transfomed = dmatrix(f"bs(valid, knots=({knot_str}), degree={self.d}, include_intercept=False)", {"valid":X}, return_type='dataframe')
+                    pred = model.predict(x_transfomed)
+                    ax[i].scatter(X, np.exp(y),s=2,alpha=0.2)
+                    ax[i].scatter(X, np.exp(pred), alpha=0.2, s=1)
+            #self.X_aggregated = X_aggregated
+        return self
+    def transform(self, X, y=None, plot =False):
+        if plot:
+            fig, ax = plt.subplots(len(self.return_feature_names),1)
+        for i, return_feature_name in enumerate(self.return_feature_names):
+            for target in self.target_variables:
+                model = self.glms[return_feature_name][target].get("model")
+                vect = X[return_feature_name]
+                knot_str = self._get_knot(vect)
+                X_transformed = dmatrix(f"bs(valid, knots=({knot_str}), degree={self.d}, include_intercept=False)",
+                    {"valid":vect.fillna(0)},
+                    return_type='dataframe')
+                X[f"{self.feature_label}_{return_feature_name}_{target}"] = model.predict(
+                    X_transformed
+                )
+                if plot:
+                    pred = model.predict(X_transformed)
+                    ax[i].scatter(X, np.exp(pred), alpha=0.2, s=1)
+        return X
+    def _get_knot(self, input):
+        min_, max_ = np.min(input)-self.e_floor, np.max(input)+self.e_top
+        r = (max_ - min_)/self.parts
+        knot_tuple = [str(i*r+min_) for i,_ in enumerate(range(self.parts),start=0)]
+        knot_str = ",".join(knot_tuple)
+        knot_str = f"({knot_str})"
+        return knot_str

{virgo_modules-0.7.0.dist-info → virgo_modules-0.8.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
-Name: virgo-modules
-Version: 0.7.0
+Metadata-Version: 2.4
+Name: virgo_modules
+Version: 0.8.0
 Summary: data processing and statistical modeling using stock market data
 Home-page: https://github.com/miguelmayhem92/virgo_module
 Author: Miguel Mayhuire
@@ -13,7 +13,18 @@ Requires-Python: >=3.9
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Provides-Extra: dev
-Requires-Dist: pytest >=7.0 ; extra == 'dev'
+Requires-Dist: pytest>=7.0; extra == "dev"
+Dynamic: author
+Dynamic: author-email
+Dynamic: classifier
+Dynamic: description
+Dynamic: description-content-type
+Dynamic: home-page
+Dynamic: license
+Dynamic: license-file
+Dynamic: provides-extra
+Dynamic: requires-python
+Dynamic: summary
 # Virgo Package

{virgo_modules-0.7.0.dist-info → virgo_modules-0.8.0.dist-info}/RECORD RENAMED Viewed

@@ -4,18 +4,19 @@ virgo_modules/src/aws_utils.py,sha256=q0l7D7ofo09Lu1QQjv-esheQ06uiSy1Pdq3xMul8zv
 virgo_modules/src/backtester.py,sha256=OhiWyzDX0PthXGuhChyWUmDN3cLkzVYe95zS4nGtia8,22106
 virgo_modules/src/hmm_utils.py,sha256=D7axAnCdSe1_1EgRyli2PAnM2f6699hTY9GcxjPXG-o,21221
 virgo_modules/src/pull_artifacts.py,sha256=5OPrgR7pcMSdpbevDRhf0ebk7g7ZRjff4NpTIIWAKjE,1989
-virgo_modules/src/re_utils.py,sha256=DBY_VBB1wKm5D7znutpF_66CTLZhJfx54h8Ws0YzdN4,74641
-virgo_modules/src/ticketer_source.py,sha256=aJZNB_YK0JrSrUBUBkAfolIMxcTozNFrZeoNxkhpqK8,102547
-virgo_modules/src/transformer_utils.py,sha256=ysCUp3cB3_7Jr9OHDqhg2_6Vu0k1YVjfqbvQNbxpbhI,8990
+virgo_modules/src/re_utils.py,sha256=GZCkAfgw2tVJRJ_Gw5Yewc14ebiE9wSImPiYQN8FsW0,75095
+virgo_modules/src/ticketer_source.py,sha256=528WhGoANOm4IKnxGSWsbQxxUh3-qlZfvGRNAafMMcE,103883
+virgo_modules/src/transformer_utils.py,sha256=SnYdtsFPnSF6u4UFIat0-X3-qVuUWvv_T46kiB-H0Sk,13682
 virgo_modules/src/edge_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 virgo_modules/src/edge_utils/conformal_utils.py,sha256=cKm4KSM261Eu1FJn4oowKYiKIesW81VbqITIvopGSVk,5410
-virgo_modules/src/edge_utils/edge_utils.py,sha256=7nYPLDNyKqeKIuOOwQi4wsBibzs9gP1HgYMISXJX1Y8,19522
+virgo_modules/src/edge_utils/edge_utils.py,sha256=4uXVWthzJDzkJ4Uq19ZYL9aPcA6CDUS3xYD4FY-a2AM,20018
+virgo_modules/src/edge_utils/feature_selection.py,sha256=HYbQ0JLPDiRYhn-5-C438YEKbuNduDmuvboFC_VkHww,2453
 virgo_modules/src/edge_utils/shap_utils.py,sha256=FgcHkfddvdFSeUqEubYa2ExRGVAWSthqK4b-eKagEmo,2333
 virgo_modules/src/edge_utils/stack_model.py,sha256=QqE91uLo2KauGEj91AVNANB1xE7J4Fa49YOX7k5mFng,4257
 virgo_modules/src/market/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 virgo_modules/src/market/market_tools.py,sha256=vBt66_7E3ANz7avzfeNw_RHMGvG9lh5PRhxmcf_Oyjc,6880
-virgo_modules-0.7.0.dist-info/LICENSE,sha256=pNgFyCYgmimaw0o6V20JupZLROycAnOA_HDDh1tX2V4,1097
-virgo_modules-0.7.0.dist-info/METADATA,sha256=tart49AI1D8oLYtFI7mxY43ReNUxWpsX34PuByszh3Q,876
-virgo_modules-0.7.0.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
-virgo_modules-0.7.0.dist-info/top_level.txt,sha256=ZjI-qEkDtT-8mFwGAWnXfqPOKEGlIhWRW1es1VyXc60,14
-virgo_modules-0.7.0.dist-info/RECORD,,
+virgo_modules-0.8.0.dist-info/licenses/LICENSE,sha256=pNgFyCYgmimaw0o6V20JupZLROycAnOA_HDDh1tX2V4,1097
+virgo_modules-0.8.0.dist-info/METADATA,sha256=sCkdOmbxrEEXvGUIwh6vIl_vIcue5C0BbvRtvP9yows,1122
+virgo_modules-0.8.0.dist-info/WHEEL,sha256=lTU6B6eIfYoiQJTZNc-fyaR6BpL6ehTzU3xGYxn2n8k,91
+virgo_modules-0.8.0.dist-info/top_level.txt,sha256=ZjI-qEkDtT-8mFwGAWnXfqPOKEGlIhWRW1es1VyXc60,14
+virgo_modules-0.8.0.dist-info/RECORD,,

{virgo_modules-0.7.0.dist-info → virgo_modules-0.8.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: bdist_wheel (0.41.2)
+Generator: setuptools (78.1.1)
 Root-Is-Purelib: true
 Tag: py3-none-any

{virgo_modules-0.7.0.dist-info → virgo_modules-0.8.0.dist-info/licenses}/LICENSE RENAMED Viewed

File without changes

{virgo_modules-0.7.0.dist-info → virgo_modules-0.8.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

virgo-modules 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

Potentially problematic release.

virgo-modules 0.7.0py3-none-any.whl → 0.8.0py3-none-any.whl