virgo-modules 0.5.1__tar.gz → 0.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of virgo-modules might be problematic. Click here for more details.

Files changed (28) hide show
  1. {virgo_modules-0.5.1 → virgo_modules-0.7.0}/PKG-INFO +1 -1
  2. {virgo_modules-0.5.1 → virgo_modules-0.7.0}/setup.py +1 -1
  3. virgo_modules-0.7.0/virgo_app/virgo_modules/src/edge_utils/conformal_utils.py +106 -0
  4. virgo_modules-0.7.0/virgo_app/virgo_modules/src/edge_utils/shap_utils.py +54 -0
  5. virgo_modules-0.7.0/virgo_app/virgo_modules/src/market/__init__.py +0 -0
  6. virgo_modules-0.7.0/virgo_app/virgo_modules/src/market/market_tools.py +189 -0
  7. {virgo_modules-0.5.1 → virgo_modules-0.7.0}/virgo_app/virgo_modules/src/ticketer_source.py +62 -31
  8. {virgo_modules-0.5.1 → virgo_modules-0.7.0}/virgo_app/virgo_modules.egg-info/PKG-INFO +1 -1
  9. {virgo_modules-0.5.1 → virgo_modules-0.7.0}/virgo_app/virgo_modules.egg-info/SOURCES.txt +3 -1
  10. virgo_modules-0.5.1/virgo_app/virgo_modules/src/edge_utils/conformal_utils.py +0 -71
  11. virgo_modules-0.5.1/virgo_app/virgo_modules/src/edge_utils/shap_utils.py +0 -83
  12. {virgo_modules-0.5.1 → virgo_modules-0.7.0}/LICENSE +0 -0
  13. {virgo_modules-0.5.1 → virgo_modules-0.7.0}/README.md +0 -0
  14. {virgo_modules-0.5.1 → virgo_modules-0.7.0}/setup.cfg +0 -0
  15. {virgo_modules-0.5.1 → virgo_modules-0.7.0}/virgo_app/virgo_modules/__init__.py +0 -0
  16. {virgo_modules-0.5.1 → virgo_modules-0.7.0}/virgo_app/virgo_modules/src/__init__.py +0 -0
  17. {virgo_modules-0.5.1 → virgo_modules-0.7.0}/virgo_app/virgo_modules/src/aws_utils.py +0 -0
  18. {virgo_modules-0.5.1 → virgo_modules-0.7.0}/virgo_app/virgo_modules/src/backtester.py +0 -0
  19. {virgo_modules-0.5.1 → virgo_modules-0.7.0}/virgo_app/virgo_modules/src/edge_utils/__init__.py +0 -0
  20. {virgo_modules-0.5.1 → virgo_modules-0.7.0}/virgo_app/virgo_modules/src/edge_utils/edge_utils.py +0 -0
  21. {virgo_modules-0.5.1 → virgo_modules-0.7.0}/virgo_app/virgo_modules/src/edge_utils/stack_model.py +0 -0
  22. {virgo_modules-0.5.1 → virgo_modules-0.7.0}/virgo_app/virgo_modules/src/hmm_utils.py +0 -0
  23. {virgo_modules-0.5.1 → virgo_modules-0.7.0}/virgo_app/virgo_modules/src/pull_artifacts.py +0 -0
  24. {virgo_modules-0.5.1 → virgo_modules-0.7.0}/virgo_app/virgo_modules/src/re_utils.py +0 -0
  25. {virgo_modules-0.5.1 → virgo_modules-0.7.0}/virgo_app/virgo_modules/src/transformer_utils.py +0 -0
  26. {virgo_modules-0.5.1 → virgo_modules-0.7.0}/virgo_app/virgo_modules.egg-info/dependency_links.txt +0 -0
  27. {virgo_modules-0.5.1 → virgo_modules-0.7.0}/virgo_app/virgo_modules.egg-info/requires.txt +0 -0
  28. {virgo_modules-0.5.1 → virgo_modules-0.7.0}/virgo_app/virgo_modules.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: virgo_modules
3
- Version: 0.5.1
3
+ Version: 0.7.0
4
4
  Summary: data processing and statistical modeling using stock market data
5
5
  Home-page: https://github.com/miguelmayhem92/virgo_module
6
6
  Author: Miguel Mayhuire
@@ -5,7 +5,7 @@ with open("virgo_app/README.md", "r") as f:
5
5
 
6
6
  setup(
7
7
  name="virgo_modules",
8
- version="0.5.1",
8
+ version="0.7.0",
9
9
  description="data processing and statistical modeling using stock market data",
10
10
  package_dir={"": "virgo_app"},
11
11
  packages=find_packages(where="virgo_app"),
@@ -0,0 +1,106 @@
1
+ from plotly.subplots import make_subplots
2
+ import plotly.graph_objects as go
3
+ from sklearn.pipeline import Pipeline
4
+ import mlflow
5
+ import pandas as pd
6
+ import numpy as np
7
+ from sklearn.base import BaseEstimator, ClassifierMixin
8
+ from mapie.classification import SplitConformalClassifier
9
+
10
+ class ConformalStack(mlflow.pyfunc.PythonModel):
11
+ def __init__(self, model,targets, alphas):
12
+ self.model = model
13
+ self.targets = targets
14
+ self.alphas = alphas
15
+ def fit(self, data):
16
+ self.classifiers = dict()
17
+ for i,target in enumerate(self.targets):
18
+ st = SingleStack(self.model["model"],i)
19
+ st.fit()
20
+ seg_model = Pipeline([
21
+ ('pipe',self.model['pipe_transform']),
22
+ ('modelbase',st)
23
+ ])
24
+ mapie_class = SplitConformalClassifier(seg_model, prefit=True, random_state=123, conformity_score="lac", confidence_level=1-np.array(self.alphas))
25
+ mapie_class.conformalize(data, data[self.targets[i]].values)
26
+ self.classifiers[target] = mapie_class
27
+ def predict_conformal(self, data, ):
28
+ for target in self.targets:
29
+ prefix = target+"_conf"
30
+ _, y_pis = self.classifiers[target].predict_set(data)
31
+ for i,alpha in enumerate(self.alphas):
32
+ data[f'{prefix}-{alpha}'] = y_pis[:,1,i]
33
+ data[f'{prefix}-{alpha}'] = np.where(data[f'{prefix}-{alpha}'] == True,alpha,0)
34
+ return data
35
+
36
+
37
+ class SingleStack(ClassifierMixin, BaseEstimator):
38
+ def __init__(self, model, estimator_index):
39
+ self.model = model
40
+ self.estimator_index = estimator_index
41
+
42
+ def fit(self):
43
+ self._is_fitted = True
44
+ self.classes_ = [0,1]
45
+
46
+ def predict_proba(self, X):
47
+ metas_pred = dict()
48
+ for i,cont in enumerate(self.model.estimators, start=1):
49
+ _,estimator = cont
50
+ meta_pred = estimator.predict_proba(X)
51
+ metas_pred[f"meta{i}0"] = meta_pred[0][:,1]
52
+ metas_pred[f"meta{i}1"] = meta_pred[1][:,1]
53
+ self.meta_preds_df__ = pd.DataFrame(metas_pred)
54
+
55
+ prediction_vector = list()
56
+ for i,cont in enumerate(self.model.meta_estimators, start=0):
57
+ _,estimator = cont
58
+ metacols = [f"meta{j}{i}" for j in range(1,len(self.model.estimators)+1)]
59
+ preds = estimator.predict_proba(self.meta_preds_df__[metacols].values)
60
+ prediction_vector.append(preds)
61
+ return prediction_vector[self.estimator_index]
62
+
63
+ def predict(self, X):
64
+ prediction_vector = list()
65
+ _ = self.predict_proba(X)
66
+ for i,cont in enumerate(self.model.meta_estimators, start=0):
67
+ _,estimator = cont
68
+ metacols = [f"meta{j}{i}" for j in range(1,len(self.model.estimators)+1)]
69
+ preds = estimator.predict(self.meta_preds_df__[metacols].values)
70
+ prediction_vector.append(preds)
71
+
72
+ p = np.array(tuple(prediction_vector))
73
+ return p.reshape((p.shape[1],p.shape[0]))[:,self.estimator_index]
74
+
75
+ def __sklearn_is_fitted__(self):
76
+ return hasattr(self, "_is_fitted") and self._is_fitted
77
+
78
+ def edge_conformal_lines(data, alphas,threshold = 0.6, plot = False, look_back = 750, offset = 0.08):
79
+ ### corect labels ####
80
+ df = data.sort_values('Date').iloc[-look_back:]
81
+ fig = make_subplots(specs=[[{"secondary_y": True}]])
82
+ fig.add_trace(go.Scatter(x=df.Date, y=df.Close,mode='lines+markers',marker = dict(color = 'grey'),line = dict(color = 'grey'),name='Close price'))
83
+ fig.add_trace(go.Scatter(x=df.Date, y=df.proba_target_up,mode='lines',marker = dict(color = 'blue'),showlegend=True,legendgroup='go up', name='go up'),secondary_y=True)
84
+ fig.add_trace(go.Scatter(x=df.Date, y=df.proba_target_down,mode='lines',marker = dict(color = 'coral'),showlegend=True,legendgroup='go down',name='go down'),secondary_y=True)
85
+ for i,alpha in enumerate(alphas, start=1):
86
+ try:
87
+ col_alpha = [x for x in df.columns if str(alpha) in x and 'target_up' in x][0]
88
+ df_ = df[df[col_alpha] != 0]
89
+ fig.add_trace(go.Scatter(x=df_.Date, y=df_.proba_target_up + (offset*i),mode='markers',marker = dict(opacity=0.7,size=10, color = 'blue')
90
+ ,showlegend=False,legendgroup='go up',name='go up', text=df_[col_alpha],textposition="bottom center")
91
+ , secondary_y=True)
92
+ except:
93
+ pass
94
+ try:
95
+ col_alpha = [x for x in df.columns if str(alpha) in x and 'target_down' in x][0]
96
+ df_ = df[df[col_alpha] != 0]
97
+ fig.add_trace(go.Scatter(x=df_.Date, y=df_.proba_target_down + (offset*i),mode='markers',marker = dict(opacity=0.7,size=10, color = 'coral')
98
+ ,showlegend=False,legendgroup='go down', name='go down',text=df_[col_alpha].astype(str),textposition="bottom center")
99
+ , secondary_y=True)
100
+ except:
101
+ pass
102
+ fig.add_shape(type="line", xref="paper", yref="y2",x0=0.02, y0=threshold, x1=0.9, y1=threshold,line=dict(color="red",dash="dash"))
103
+ fig.update_layout(title_text="sirius - edge probabilities conformal",width=1200,height = 500)
104
+ if plot:
105
+ fig.show()
106
+ return fig
@@ -0,0 +1,54 @@
1
+ import shap
2
+ import mlflow
3
+ import pandas as pd
4
+ import numpy as np
5
+ from plotly.subplots import make_subplots
6
+ import plotly.graph_objects as go
7
+
8
+ class StackInterpretor(mlflow.pyfunc.PythonModel):
9
+ def __init__(self, model, targets):
10
+ self.base_estimators = model.estimators_
11
+ self.targets = targets
12
+ def fit_interpretor(self, data):
13
+ interpretors = {}
14
+ for label, predictor in zip(self.targets,self.base_estimators):
15
+ explainer = shap.Explainer(predictor, data)
16
+ interpretors[label] = explainer
17
+ self.interpretors = interpretors
18
+ def get_shap_values(self, data):
19
+ shap_values = dict()
20
+ for label, interpretor in self.interpretors.items():
21
+ shap_value = interpretor(data)
22
+ shap_values[label] = shap_value
23
+ return shap_values
24
+ def register_map(self, mapping):
25
+ self.mapping = mapping
26
+
27
+ def mean_shap(data, explainers, pipe_transform):
28
+ t_data = pipe_transform.transform(data)
29
+ input_features = t_data.columns
30
+ shap_results = explainers.get_shap_values(t_data)
31
+ dict_shap_values = explainers.mapping
32
+ arrays_ = list()
33
+ for k,_ in shap_results.items():
34
+ arrays_.append(shap_results.get(k).values)
35
+ shap_results_mean = np.mean(np.array(arrays_), axis = 0)
36
+ df_shap = pd.DataFrame(shap_results_mean, columns=input_features, index=data.index)
37
+ df_shap['Close'] = data['Close']
38
+ df_shap['Date'] = data['Date']
39
+ df_shap = df_shap[['Date','Close']+list(dict_shap_values.keys())]
40
+ df_shap = df_shap.rename(columns =dict_shap_values)
41
+ return df_shap
42
+
43
+ def edge_shap_lines(data, plot = False, look_back = 750):
44
+ ### corect labels ####
45
+ shap_cols = [col for col in data.columns if col not in ['Date','Close']]
46
+ df = data.sort_values('Date').iloc[-look_back:]
47
+ fig = make_subplots(specs=[[{"secondary_y": True}]])
48
+ fig.add_trace(go.Scatter(x=df.Date, y=df.Close,mode='lines+markers',marker = dict(color = 'grey'),line = dict(color = 'grey'),name='Close price'))
49
+ for col in shap_cols:
50
+ fig.add_trace(go.Scatter(x=df.Date, y=df[col],mode='lines+markers',name=col),secondary_y=True)
51
+ fig.update_layout(title_text="sirius - feature power",width=1200,height = 500)
52
+ if plot:
53
+ fig.show()
54
+ return fig
@@ -0,0 +1,189 @@
1
+ import gc
2
+
3
+ import pandas as pd
4
+ import numpy as np
5
+
6
+ from sklearn.linear_model import HuberRegressor
7
+ from scipy import stats
8
+
9
+ import matplotlib.pyplot as plt
10
+ import seaborn as sns; sns.set()
11
+
12
+ from matplotlib import cm
13
+ import matplotlib.colors as mcolors
14
+
15
+ class MarketAnalysis:
16
+ """
17
+ Class that perform market analysis using robust linear regression
18
+
19
+ Attributes
20
+ ----------
21
+ data : pd.DataFrame
22
+ input data
23
+ market_features : list
24
+ list of market feature (log returns) to apply analysis
25
+ return_cols: str
26
+ main log return feature
27
+ col_map: dict
28
+ dictionary containing rename of market features
29
+
30
+ Methods
31
+ -------
32
+ compute_beta(data=pd.DataFrame, feature_x=str, feature_y=str):
33
+ compute betas given x and y using robust linear regression
34
+ get_correlation(data=pd.DataFrame, feature_x=str, feature_y=str):
35
+ compute correlation given x and y
36
+ produce_beta_report(data=pd.DataFrame):
37
+ produce beta report
38
+ compute_general_report(sample_size=int, offset=int, index=str, subsample_ts=int, show_plot=bool):
39
+ compute full report, global and latest window
40
+ """
41
+
42
+ def __init__(self, data, market_features, return_col, col_map=None):
43
+ self.data = data.dropna()
44
+ self.market_features = market_features
45
+ self.return_cols = return_col
46
+ self.col_map=col_map
47
+
48
+ def compute_beta(self, data, feature_x, feature_y):
49
+ """
50
+ compute betas given x and y using robust linear regression
51
+
52
+ Parameters
53
+ ----------
54
+ data (pd.DataFrame): input data containing analysis features
55
+ feature_x (str): name of the feature x
56
+ feature_y (str): name of the feature y
57
+
58
+ Returns
59
+ -------
60
+ (beta(str), alpha(str))
61
+ """
62
+ x = data[feature_x].values.reshape(-1,1)
63
+ y = data[feature_y].values.reshape(-1,1)
64
+ huber_regr = HuberRegressor(fit_intercept = True)
65
+ huber_regr.fit(x, y)
66
+ beta, alpha = huber_regr.coef_[0], huber_regr.intercept_
67
+ return beta, alpha
68
+
69
+ def get_correlation(self, data, feature_x, feature_y):
70
+ """
71
+ compute correlation given x and y
72
+
73
+ Parameters
74
+ ----------
75
+ data (pd.DataFrame): input data containing analysis features
76
+ feature_x (str): name of the feature x
77
+ feature_y (str): name of the feature y
78
+
79
+ Returns
80
+ -------
81
+ r (float)
82
+ """
83
+ x = data[feature_x]
84
+ y = data[feature_y]
85
+ r = stats.mstats.pearsonr(x, y)[0]
86
+ return r
87
+
88
+ def produce_beta_report(self, data):
89
+ """
90
+ produce beta report
91
+
92
+ Parameters
93
+ ----------
94
+ data (pd.DataFrame): input data containing analysis features
95
+
96
+ Returns
97
+ -------
98
+ report (pd.DataFrame)
99
+ """
100
+ result = {
101
+ "market_index": list(),
102
+ "beta": list(),
103
+ "alpha": list(),
104
+ "r": list()
105
+ }
106
+ for index in self.market_features:
107
+ beta, alpha = self.compute_beta( data, self.return_cols, index)
108
+ r = self.get_correlation( data, self.return_cols, index)
109
+ result["market_index"].append(index)
110
+ result["beta"].append(beta)
111
+ result["alpha"].append(alpha)
112
+ result["r"].append(r)
113
+ pd_result = pd.DataFrame(result)
114
+ pd_result = pd_result.sort_values("r", ascending=False)
115
+ if self.col_map:
116
+ pd_result["map_market_index"] = pd_result.market_index.map(self.col_map)
117
+ return pd_result
118
+
119
+ def compute_general_report(self, sample_size, offset, index=False, subsample_ts=False, show_plot=True):
120
+ """
121
+ compute full report, global and latest window
122
+
123
+ Parameters
124
+ ----------
125
+ sample_size (int): sample size for every beta computation
126
+ offset (int): offset or overlap between samples
127
+ index (str): if provided, bet fit index is taken
128
+ subsample_ts (int): subsample for iterative beta calculation
129
+ show_plot (bool): whether to show plot
130
+
131
+ Returns
132
+ -------
133
+ (report (pd.DataFrame), latest_report (pd.DataFrame), figure (mtpl.plt))
134
+ """
135
+ general_report = self.produce_beta_report(self.data)
136
+ current_report = self.produce_beta_report(self.data.iloc[sample_size:,:])
137
+ if not index:
138
+ index = general_report.head(1).market_index.values[0]
139
+ b = general_report[general_report.market_index == index].beta.values
140
+ a = general_report[general_report.market_index == index].alpha.values
141
+
142
+ figure, ax = plt.subplot_mosaic(
143
+ [["scatter_total", "scatter_sample",'ts','ts']],
144
+ layout="constrained",
145
+ figsize=(18, 5)
146
+ )
147
+ x = self.data[self.return_cols]
148
+ y = self.data[index]
149
+ ax['scatter_total'].scatter(x, y)
150
+ ax['scatter_total'].plot(x, b*x+a, color='red')
151
+
152
+ if subsample_ts:
153
+ merger_df = self.data.iloc[-subsample_ts:,:].copy()
154
+ else:
155
+ merger_df = self.data.copy()
156
+ ax['ts'].plot(merger_df.Date, merger_df.Close, color = 'grey', alpha = 0.3)
157
+ b_array = list()
158
+ for i in range(0,len(merger_df)-sample_size,offset):
159
+ merger_ = merger_df.sort_values('Date', ascending = False).iloc[i:i+sample_size,:]
160
+ b, a = self.compute_beta(merger_, self.return_cols, index)
161
+ x = merger_[self.return_cols]
162
+ y = merger_[index]
163
+ normalize_ = mcolors.Normalize(vmin=-2.0, vmax=2.0)
164
+ colormap_ = cm.jet
165
+ ax['scatter_sample'].plot(x, y,'o', color = 'blue', alpha = 0.1)
166
+ ax['scatter_sample'].plot(x, b*x+a, color=colormap_(normalize_(b)))
167
+ ax['scatter_sample'].set_xlim(-0.08, 0.08)
168
+ ax['scatter_sample'].set_ylim(-0.08, 0.08)
169
+ plot = ax['ts'].scatter(merger_.Date, merger_.Close, color=colormap_(normalize_(b)), s = 10)
170
+ b_array.append(b)
171
+ normalize_ = mcolors.Normalize(vmin=np.min(b_array), vmax=np.max(b_array))
172
+ colormap_ = cm.jet
173
+ x_global = self.data[self.return_cols]
174
+ scalarmappaple = cm.ScalarMappable(norm=normalize_, cmap=colormap_)
175
+ scalarmappaple.set_array(x_global)
176
+ if self.col_map:
177
+ map_index = self.col_map.get(index)
178
+ title = f'market analysis of {map_index}'
179
+ else:
180
+ title = f'market analysis'
181
+ plt.title(title)
182
+ plt.colorbar(scalarmappaple)
183
+ del merger_df
184
+ gc.collect()
185
+ if show_plot:
186
+ plt.show()
187
+ else:
188
+ plt.close()
189
+ return general_report, current_report, figure
@@ -1,7 +1,7 @@
1
1
  import yfinance as yf
2
2
  import pandas as pd
3
3
  import numpy as np
4
- import json
4
+ import gc
5
5
 
6
6
  import matplotlib.pyplot as plt
7
7
  import matplotlib.gridspec as gridspec
@@ -138,6 +138,10 @@ class stock_eda_panel(object):
138
138
  perform analysis of lags of the mean rolling log return
139
139
  compute_clip_bands(feature_name=str,threshold=float):
140
140
  compute outlier detection for a given signal, Note that this follows mean reversion procedure and feature has to be stationary. Also botton and roof resulting signals is attached to the dataframe
141
+ extract_sec_data(symbol=str, base_columns=list(str), rename_columns=dict):
142
+ extract new asset data and merge it to the main asset data
143
+ lag_log_return(lags=int, feature=str, feature_name=str):
144
+ compute log return given some lags
141
145
  signal_plotter(feature_name=str):
142
146
  display analysis plot of a feature with high and low signals
143
147
  log_features_standard(feature_name=str):
@@ -667,6 +671,63 @@ class stock_eda_panel(object):
667
671
  self.df[f'signal_low_{feature_name}'] = np.where( (self.df[f'norm_{feature_name}'] < self.df[f'lower_{feature_name}'] ), 1, 0)
668
672
  self.df[f'signal_up_{feature_name}'] = np.where( (self.df[f'norm_{feature_name}'] > self.df[f'upper_{feature_name}'] ), 1, 0)
669
673
 
674
+ def extract_sec_data(self, symbol, base_columns, rename_columns=None):
675
+ """
676
+ extract new asset data and merge it to the main asset data
677
+
678
+ Parameters
679
+ ----------
680
+ symbol (str): symbol to extract data
681
+ base_columns (list): list of columns to persist
682
+ rename_columns (dict): map of the new column names using pd.DataFrame.rename()
683
+
684
+ Returns
685
+ -------
686
+ None
687
+ """
688
+ begin_date = self.today - relativedelta(days = self.n_days)
689
+ begin_date_str = begin_date.strftime('%Y-%m-%d')
690
+
691
+ stock = yf.Ticker(symbol)
692
+ df = stock.history(period=self.data_window)
693
+ df = df.sort_values('Date')
694
+ df.reset_index(inplace=True)
695
+ df['Date'] = pd.to_datetime(df['Date'], format='mixed',utc=True).dt.date
696
+ df['Date'] = pd.to_datetime(df['Date'])
697
+ df = df[df.Date >= begin_date_str ]
698
+ df = df[base_columns]
699
+ if rename_columns:
700
+ df = df.rename(columns=rename_columns)
701
+ right_df = df.copy()
702
+
703
+ dates_vector = self.df.Date.to_frame()
704
+ right_df = dates_vector.merge(right_df, on ='Date',how = 'left')
705
+ right_df = right_df.fillna(method = 'bfill')
706
+ right_df = right_df.fillna(method = 'ffill')
707
+
708
+ self.df = self.df.merge(right_df, on ='Date',how = 'left')
709
+ self.df = self.df.sort_values("Date")
710
+ del right_df
711
+ gc.collect()
712
+
713
+ def lag_log_return(self, lags, feature, feature_name=False):
714
+ """
715
+ compute log return given some lags
716
+
717
+ Parameters
718
+ ----------
719
+ lags (int): lag to apply log return
720
+ feature (str): feature to apply log return
721
+ feature_name (str): rename resuling name
722
+
723
+ Returns
724
+ -------
725
+ None
726
+ """
727
+
728
+ feature_name = feature_name if feature_name else f"{feature}_log_return"
729
+ self.df[feature_name] = np.log(self.df[feature]/self.df[feature].shift(lags))
730
+
670
731
  def signal_plotter(self, feature_name):
671
732
 
672
733
  """
@@ -2304,33 +2365,3 @@ class analyse_index(stock_eda_panel):
2304
2365
 
2305
2366
  self.states_result = result
2306
2367
 
2307
- def get_relevant_beta(data_market, ticket_name, show_plot = True, save_path = False, save_aws = False, aws_credentials = False):
2308
- '''
2309
- select relevant beta result data of a given asset
2310
-
2311
- Parameters:
2312
- data_market (pd.DataFrame): dataframe of the market results
2313
- ticket_name (str): name of the asset
2314
- show_plot (bool): If tru, plot results
2315
- save_path (str): local path for saving e.g r'C:/path/to/the/file/'
2316
- save_aws (str): remote key in s3 bucket path e.g. 'path/to/file/'
2317
- aws_credentials (dict): dict of the aws credentials
2318
-
2319
- Returns:
2320
- selection (pd.DataFrame): dataframe of the most relevant beta
2321
- '''
2322
- all_betas = data_market[data_market.asset == ticket_name].sort_values('general_r', ascending = False)
2323
- all_betas['gen_r2'] = all_betas.general_r ** 2
2324
- all_betas['sampl_r2'] = all_betas.sample_r ** 2
2325
- selection = all_betas.sort_values('gen_r2',ascending =False).head(2).sort_values('sampl_r2',ascending =False).head(1).drop(columns = ['gen_r2','sampl_r2'])
2326
-
2327
- if show_plot:
2328
- print(selection)
2329
- if save_path:
2330
- result_plot_name = f'market_best_fit.csv'
2331
- selection.to_csv(save_path+result_plot_name)
2332
-
2333
- if save_path and save_aws:
2334
- # upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = f'market_plots/{ticket_name}/'+result_plot_name,input_path = save_path+result_plot_name)
2335
- upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = save_aws + result_plot_name, input_path = save_path + result_plot_name, aws_credentials = aws_credentials)
2336
- return selection
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: virgo-modules
3
- Version: 0.5.1
3
+ Version: 0.7.0
4
4
  Summary: data processing and statistical modeling using stock market data
5
5
  Home-page: https://github.com/miguelmayhem92/virgo_module
6
6
  Author: Miguel Mayhuire
@@ -19,4 +19,6 @@ virgo_app/virgo_modules/src/edge_utils/__init__.py
19
19
  virgo_app/virgo_modules/src/edge_utils/conformal_utils.py
20
20
  virgo_app/virgo_modules/src/edge_utils/edge_utils.py
21
21
  virgo_app/virgo_modules/src/edge_utils/shap_utils.py
22
- virgo_app/virgo_modules/src/edge_utils/stack_model.py
22
+ virgo_app/virgo_modules/src/edge_utils/stack_model.py
23
+ virgo_app/virgo_modules/src/market/__init__.py
24
+ virgo_app/virgo_modules/src/market/market_tools.py
@@ -1,71 +0,0 @@
1
- from plotly.subplots import make_subplots
2
- import plotly.graph_objects as go
3
- from mapie.classification import MapieClassifier
4
- from sklearn.pipeline import Pipeline
5
- import mlflow
6
- import numpy as np
7
-
8
-
9
- def get_conformal_classifiers(model, data, targets):
10
- classfiers = list()
11
- for i, _ in enumerate(model['model'].estimators_):
12
- seg_model = Pipeline([
13
- ('pipe',model['pipe_transform']),
14
- ('model',model['model'].estimators_[i])
15
- ])
16
- mapie_class = MapieClassifier(seg_model, cv='prefit', random_state=123, method="lac")
17
- mapie_class.fit(data, data[targets[i]].values)
18
- classfiers.append(mapie_class)
19
- return classfiers
20
-
21
- def log_confmodels(runid, classifiers):
22
- with mlflow.start_run(run_id=runid) as run:
23
- for i,classifier in enumerate(classifiers):
24
- mlflow.sklearn.log_model(classifier,name = f"conformal_model-{i}")
25
- print('models were logged')
26
-
27
- def load_confmodel(runid, target_variables):
28
- classifiers = list()
29
- for i in range(len(target_variables)):
30
- folder = f"conformal_model-{i}"
31
- model = mlflow.sklearn.load_model(f"runs:/{runid}/{folder}",)
32
- classifiers.append(model)
33
- return classifiers
34
-
35
-
36
- def get_conformal_prediction(classifier, alphas, data, prefix='conf'):
37
- _, y_pis = classifier.predict(data, alpha=alphas)
38
- for i,alpha in enumerate(alphas):
39
- data[f'{prefix}-{alpha}'] = y_pis[:,1,i]
40
- data[f'{prefix}-{alpha}'] = np.where(data[f'{prefix}-{alpha}'] == True,alpha,0)
41
- return data
42
-
43
- def edge_conformal_lines(data, alphas,threshold = 0.6, plot = False, look_back = 750, offset = 0.08):
44
- ### corect labels ####
45
- df = data.sort_values('Date').iloc[-look_back:]
46
- fig = make_subplots(specs=[[{"secondary_y": True}]])
47
- fig.add_trace(go.Scatter(x=df.Date, y=df.Close,mode='lines+markers',marker = dict(color = 'grey'),line = dict(color = 'grey'),name='Close price'))
48
- fig.add_trace(go.Scatter(x=df.Date, y=df.proba_target_up,mode='lines',marker = dict(color = 'blue'),showlegend=True,legendgroup='go up', name='go up'),secondary_y=True)
49
- fig.add_trace(go.Scatter(x=df.Date, y=df.proba_target_down,mode='lines',marker = dict(color = 'coral'),showlegend=True,legendgroup='go down',name='go down'),secondary_y=True)
50
- for i,alpha in enumerate(alphas, start=1):
51
- try:
52
- col_alpha = [x for x in df.columns if str(alpha) in x and 'target_up' in x][0]
53
- df_ = df[df[col_alpha] != 0]
54
- fig.add_trace(go.Scatter(x=df_.Date, y=df_.proba_target_up + (offset*i),mode='markers',marker = dict(opacity=0.7,size=10, color = 'blue')
55
- ,showlegend=False,legendgroup='go up',name='go up', text=df_[col_alpha],textposition="bottom center")
56
- , secondary_y=True)
57
- except:
58
- pass
59
- try:
60
- col_alpha = [x for x in df.columns if str(alpha) in x and 'target_down' in x][0]
61
- df_ = df[df[col_alpha] != 0]
62
- fig.add_trace(go.Scatter(x=df_.Date, y=df_.proba_target_down + (offset*i),mode='markers',marker = dict(opacity=0.7,size=10, color = 'coral')
63
- ,showlegend=False,legendgroup='go down', name='go down',text=df_[col_alpha].astype(str),textposition="bottom center")
64
- , secondary_y=True)
65
- except:
66
- pass
67
- fig.add_shape(type="line", xref="paper", yref="y2",x0=0.02, y0=threshold, x1=0.9, y1=threshold,line=dict(color="red",dash="dash"))
68
- fig.update_layout(title_text="sirius - edge probabilities conformal",width=1200,height = 500)
69
- if plot:
70
- fig.show()
71
- return fig
@@ -1,83 +0,0 @@
1
- import shap
2
- import mlflow
3
- import pandas as pd
4
- import numpy as np
5
- from plotly.subplots import make_subplots
6
- import plotly.graph_objects as go
7
-
8
- def get_explainers(model, data):
9
- explainers = list()
10
- for i, _ in enumerate(model['model'].estimators_):
11
- transf_data = model['pipe_transform'].transform(data)
12
- predictor = model['model'].estimators_[i]
13
- explainer= shap.Explainer(predictor, transf_data)
14
- explainers.append(explainer)
15
- return explainers
16
-
17
- def log_explainer(runid, classifiers):
18
- with mlflow.start_run(run_id=runid) as run:
19
- for i,classifier in enumerate(classifiers):
20
- mlflow.sklearn.log_model(classifier,f"explainer/explainer-{i}")
21
- print('models were logged')
22
-
23
- def load_explainer(runid, target_variables):
24
- explainers = list()
25
- for i in range(len(target_variables)):
26
- folder = f"explainer/explainer-{i}"
27
- model = mlflow.sklearn.load_model(f"runs:/{runid}/{folder}")
28
- explainers.append(model)
29
- return explainers
30
-
31
- def get_shapvalues(explainers, data):
32
- shap_values = {}
33
- for i,explainer in enumerate(explainers):
34
- shap_value_i = explainer(data)
35
- shap_values[i] = shap_value_i
36
- return shap_values
37
-
38
- def get_explainerclusters(model, data, targets):
39
- clustermodels = list()
40
- for i, _ in enumerate(model['model'].estimators_):
41
- transf_data = model['pipe_transform'].transform(data)
42
- Y = data[targets[i]]
43
- cluster_model = shap.utils.hclust(transf_data, Y)
44
- clustermodels.append(cluster_model)
45
- return clustermodels
46
-
47
- def mean_shap(data, explainers, pipe_transform, dict_shap_values):
48
- t_data = pipe_transform.transform(data)
49
- input_features = t_data.columns
50
- shap_results = get_shapvalues(explainers,t_data)
51
- arrays_ = list()
52
- for k,_ in shap_results.items():
53
- arrays_.append(shap_results.get(k).values)
54
- shap_results_mean = np.mean(np.array(arrays_), axis = 0)
55
- df_shap = pd.DataFrame(shap_results_mean, columns=input_features, index=data.index)
56
- df_shap['Close'] = data['Close']
57
- df_shap['Date'] = data['Date']
58
- df_shap = df_shap[['Date','Close']+list(dict_shap_values.keys())]
59
- df_shap = df_shap.rename(columns =dict_shap_values)
60
- return df_shap
61
-
62
- def edge_shap_lines(data, plot = False, look_back = 750):
63
- ### corect labels ####
64
- shap_cols = [col for col in data.columns if col not in ['Date','Close']]
65
- df = data.sort_values('Date').iloc[-look_back:]
66
- fig = make_subplots(specs=[[{"secondary_y": True}]])
67
- fig.add_trace(go.Scatter(x=df.Date, y=df.Close,mode='lines+markers',marker = dict(color = 'grey'),line = dict(color = 'grey'),name='Close price'))
68
- for col in shap_cols:
69
- fig.add_trace(go.Scatter(x=df.Date, y=df[col],mode='lines+markers',name=col),secondary_y=True)
70
- fig.update_layout(title_text="sirius - feature power",width=1200,height = 500)
71
- if plot:
72
- fig.show()
73
- return fig
74
-
75
- def log_top_shap(runid, top_shap):
76
- with mlflow.start_run(run_id=runid) as run:
77
- mlflow.log_dict(top_shap,f"explainer/top_shap.json")
78
- print('artifact was logged')
79
-
80
- def load_top_shap(runid):
81
- folder = f"explainer/top_shap.json"
82
- top_shap = mlflow.artifacts.load_dict(f"runs:/{runid}/{folder}")
83
- return top_shap
File without changes
File without changes
File without changes