virgo-modules 0.5.0__tar.gz → 0.6.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of virgo-modules might be problematic. Click here for more details.

Files changed (26) hide show
  1. {virgo_modules-0.5.0 → virgo_modules-0.6.1}/PKG-INFO +3 -4
  2. {virgo_modules-0.5.0 → virgo_modules-0.6.1}/setup.py +1 -1
  3. virgo_modules-0.6.1/virgo_app/virgo_modules/src/edge_utils/conformal_utils.py +106 -0
  4. {virgo_modules-0.5.0 → virgo_modules-0.6.1}/virgo_app/virgo_modules/src/edge_utils/edge_utils.py +20 -4
  5. virgo_modules-0.6.1/virgo_app/virgo_modules/src/edge_utils/shap_utils.py +54 -0
  6. {virgo_modules-0.5.0 → virgo_modules-0.6.1}/virgo_app/virgo_modules/src/edge_utils/stack_model.py +4 -1
  7. {virgo_modules-0.5.0 → virgo_modules-0.6.1}/virgo_app/virgo_modules.egg-info/PKG-INFO +3 -4
  8. virgo_modules-0.5.0/virgo_app/virgo_modules/src/edge_utils/conformal_utils.py +0 -71
  9. virgo_modules-0.5.0/virgo_app/virgo_modules/src/edge_utils/shap_utils.py +0 -83
  10. {virgo_modules-0.5.0 → virgo_modules-0.6.1}/LICENSE +0 -0
  11. {virgo_modules-0.5.0 → virgo_modules-0.6.1}/README.md +0 -0
  12. {virgo_modules-0.5.0 → virgo_modules-0.6.1}/setup.cfg +0 -0
  13. {virgo_modules-0.5.0 → virgo_modules-0.6.1}/virgo_app/virgo_modules/__init__.py +0 -0
  14. {virgo_modules-0.5.0 → virgo_modules-0.6.1}/virgo_app/virgo_modules/src/__init__.py +0 -0
  15. {virgo_modules-0.5.0 → virgo_modules-0.6.1}/virgo_app/virgo_modules/src/aws_utils.py +0 -0
  16. {virgo_modules-0.5.0 → virgo_modules-0.6.1}/virgo_app/virgo_modules/src/backtester.py +0 -0
  17. {virgo_modules-0.5.0 → virgo_modules-0.6.1}/virgo_app/virgo_modules/src/edge_utils/__init__.py +0 -0
  18. {virgo_modules-0.5.0 → virgo_modules-0.6.1}/virgo_app/virgo_modules/src/hmm_utils.py +0 -0
  19. {virgo_modules-0.5.0 → virgo_modules-0.6.1}/virgo_app/virgo_modules/src/pull_artifacts.py +0 -0
  20. {virgo_modules-0.5.0 → virgo_modules-0.6.1}/virgo_app/virgo_modules/src/re_utils.py +0 -0
  21. {virgo_modules-0.5.0 → virgo_modules-0.6.1}/virgo_app/virgo_modules/src/ticketer_source.py +0 -0
  22. {virgo_modules-0.5.0 → virgo_modules-0.6.1}/virgo_app/virgo_modules/src/transformer_utils.py +0 -0
  23. {virgo_modules-0.5.0 → virgo_modules-0.6.1}/virgo_app/virgo_modules.egg-info/SOURCES.txt +0 -0
  24. {virgo_modules-0.5.0 → virgo_modules-0.6.1}/virgo_app/virgo_modules.egg-info/dependency_links.txt +0 -0
  25. {virgo_modules-0.5.0 → virgo_modules-0.6.1}/virgo_app/virgo_modules.egg-info/requires.txt +0 -0
  26. {virgo_modules-0.5.0 → virgo_modules-0.6.1}/virgo_app/virgo_modules.egg-info/top_level.txt +0 -0
@@ -1,19 +1,19 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: virgo_modules
3
- Version: 0.5.0
3
+ Version: 0.6.1
4
4
  Summary: data processing and statistical modeling using stock market data
5
5
  Home-page: https://github.com/miguelmayhem92/virgo_module
6
6
  Author: Miguel Mayhuire
7
7
  Author-email: miguelmayhem92@gmail.com
8
8
  License: MIT
9
- Platform: UNKNOWN
10
9
  Classifier: License :: OSI Approved :: MIT License
11
10
  Classifier: Programming Language :: Python :: 3.9
12
11
  Classifier: Operating System :: OS Independent
13
12
  Requires-Python: >=3.9
14
13
  Description-Content-Type: text/markdown
15
- Provides-Extra: dev
16
14
  License-File: LICENSE
15
+ Provides-Extra: dev
16
+ Requires-Dist: pytest>=7.0; extra == "dev"
17
17
 
18
18
  # Virgo Package
19
19
 
@@ -34,4 +34,3 @@ obj = stock_eda_panel(stock_code = 'PEP', n_days = 20)
34
34
  obj.get_data()
35
35
  print(obj.df.shape)
36
36
  ```
37
-
@@ -5,7 +5,7 @@ with open("virgo_app/README.md", "r") as f:
5
5
 
6
6
  setup(
7
7
  name="virgo_modules",
8
- version="0.5.0",
8
+ version="0.6.1",
9
9
  description="data processing and statistical modeling using stock market data",
10
10
  package_dir={"": "virgo_app"},
11
11
  packages=find_packages(where="virgo_app"),
@@ -0,0 +1,106 @@
1
+ from plotly.subplots import make_subplots
2
+ import plotly.graph_objects as go
3
+ from sklearn.pipeline import Pipeline
4
+ import mlflow
5
+ import pandas as pd
6
+ import numpy as np
7
+ from sklearn.base import BaseEstimator, ClassifierMixin
8
+ from mapie.classification import SplitConformalClassifier
9
+
10
+ class ConformalStack(mlflow.pyfunc.PythonModel):
11
+ def __init__(self, model,targets, alphas):
12
+ self.model = model
13
+ self.targets = targets
14
+ self.alphas = alphas
15
+ def fit(self, data):
16
+ self.classifiers = dict()
17
+ for i,target in enumerate(self.targets):
18
+ st = SingleStack(self.model["model"],i)
19
+ st.fit()
20
+ seg_model = Pipeline([
21
+ ('pipe',self.model['pipe_transform']),
22
+ ('modelbase',st)
23
+ ])
24
+ mapie_class = SplitConformalClassifier(seg_model, prefit=True, random_state=123, conformity_score="lac", confidence_level=1-np.array(self.alphas))
25
+ mapie_class.conformalize(data, data[self.targets[i]].values)
26
+ self.classifiers[target] = mapie_class
27
+ def predict_conformal(self, data, ):
28
+ for target in self.targets:
29
+ prefix = target+"_conf"
30
+ _, y_pis = self.classifiers[target].predict_set(data)
31
+ for i,alpha in enumerate(self.alphas):
32
+ data[f'{prefix}-{alpha}'] = y_pis[:,1,i]
33
+ data[f'{prefix}-{alpha}'] = np.where(data[f'{prefix}-{alpha}'] == True,alpha,0)
34
+ return data
35
+
36
+
37
+ class SingleStack(ClassifierMixin, BaseEstimator):
38
+ def __init__(self, model, estimator_index):
39
+ self.model = model
40
+ self.estimator_index = estimator_index
41
+
42
+ def fit(self):
43
+ self._is_fitted = True
44
+ self.classes_ = [0,1]
45
+
46
+ def predict_proba(self, X):
47
+ metas_pred = dict()
48
+ for i,cont in enumerate(self.model.estimators, start=1):
49
+ _,estimator = cont
50
+ meta_pred = estimator.predict_proba(X)
51
+ metas_pred[f"meta{i}0"] = meta_pred[0][:,1]
52
+ metas_pred[f"meta{i}1"] = meta_pred[1][:,1]
53
+ self.meta_preds_df__ = pd.DataFrame(metas_pred)
54
+
55
+ prediction_vector = list()
56
+ for i,cont in enumerate(self.model.meta_estimators, start=0):
57
+ _,estimator = cont
58
+ metacols = [f"meta{j}{i}" for j in range(1,len(self.model.estimators)+1)]
59
+ preds = estimator.predict_proba(self.meta_preds_df__[metacols].values)
60
+ prediction_vector.append(preds)
61
+ return prediction_vector[self.estimator_index]
62
+
63
+ def predict(self, X):
64
+ prediction_vector = list()
65
+ _ = self.predict_proba(X)
66
+ for i,cont in enumerate(self.model.meta_estimators, start=0):
67
+ _,estimator = cont
68
+ metacols = [f"meta{j}{i}" for j in range(1,len(self.model.estimators)+1)]
69
+ preds = estimator.predict(self.meta_preds_df__[metacols].values)
70
+ prediction_vector.append(preds)
71
+
72
+ p = np.array(tuple(prediction_vector))
73
+ return p.reshape((p.shape[1],p.shape[0]))[:,self.estimator_index]
74
+
75
+ def __sklearn_is_fitted__(self):
76
+ return hasattr(self, "_is_fitted") and self._is_fitted
77
+
78
+ def edge_conformal_lines(data, alphas,threshold = 0.6, plot = False, look_back = 750, offset = 0.08):
79
+ ### corect labels ####
80
+ df = data.sort_values('Date').iloc[-look_back:]
81
+ fig = make_subplots(specs=[[{"secondary_y": True}]])
82
+ fig.add_trace(go.Scatter(x=df.Date, y=df.Close,mode='lines+markers',marker = dict(color = 'grey'),line = dict(color = 'grey'),name='Close price'))
83
+ fig.add_trace(go.Scatter(x=df.Date, y=df.proba_target_up,mode='lines',marker = dict(color = 'blue'),showlegend=True,legendgroup='go up', name='go up'),secondary_y=True)
84
+ fig.add_trace(go.Scatter(x=df.Date, y=df.proba_target_down,mode='lines',marker = dict(color = 'coral'),showlegend=True,legendgroup='go down',name='go down'),secondary_y=True)
85
+ for i,alpha in enumerate(alphas, start=1):
86
+ try:
87
+ col_alpha = [x for x in df.columns if str(alpha) in x and 'target_up' in x][0]
88
+ df_ = df[df[col_alpha] != 0]
89
+ fig.add_trace(go.Scatter(x=df_.Date, y=df_.proba_target_up + (offset*i),mode='markers',marker = dict(opacity=0.7,size=10, color = 'blue')
90
+ ,showlegend=False,legendgroup='go up',name='go up', text=df_[col_alpha],textposition="bottom center")
91
+ , secondary_y=True)
92
+ except:
93
+ pass
94
+ try:
95
+ col_alpha = [x for x in df.columns if str(alpha) in x and 'target_down' in x][0]
96
+ df_ = df[df[col_alpha] != 0]
97
+ fig.add_trace(go.Scatter(x=df_.Date, y=df_.proba_target_down + (offset*i),mode='markers',marker = dict(opacity=0.7,size=10, color = 'coral')
98
+ ,showlegend=False,legendgroup='go down', name='go down',text=df_[col_alpha].astype(str),textposition="bottom center")
99
+ , secondary_y=True)
100
+ except:
101
+ pass
102
+ fig.add_shape(type="line", xref="paper", yref="y2",x0=0.02, y0=threshold, x1=0.9, y1=threshold,line=dict(color="red",dash="dash"))
103
+ fig.update_layout(title_text="sirius - edge probabilities conformal",width=1200,height = 500)
104
+ if plot:
105
+ fig.show()
106
+ return fig
@@ -1,5 +1,7 @@
1
1
  import numpy as np
2
2
  import itertools
3
+ import random
4
+ import math
3
5
 
4
6
  from sklearn.metrics import roc_auc_score, precision_score, recall_score
5
7
  from sklearn.pipeline import Pipeline
@@ -310,7 +312,7 @@ class ExpandingMultipleTimeSeriesKFold:
310
312
  get number of splits
311
313
  """
312
314
 
313
- def __init__(self, df, window_size = 100, number_window=3, overlap_size = 0):
315
+ def __init__(self, df, window_size = 100, number_window=3, overlap_size = 0, sample_parts = None):
314
316
  """
315
317
  Initialize object
316
318
 
@@ -320,6 +322,7 @@ class ExpandingMultipleTimeSeriesKFold:
320
322
  number_window (int): number of train splits
321
323
  window_size (int): window size data
322
324
  overlap_size (int): overlap size
325
+ sample_individuals (tuple(float, str)): sample partition units to remove from the train set, tuple()
323
326
 
324
327
  Returns
325
328
  -------
@@ -329,6 +332,7 @@ class ExpandingMultipleTimeSeriesKFold:
329
332
  self.number_window = number_window
330
333
  self.window_size = window_size
331
334
  self.overlap_size = overlap_size
335
+ self.sample_parts = sample_parts
332
336
 
333
337
  def split(self, X, y, groups=None):
334
338
  """
@@ -372,9 +376,21 @@ class ExpandingMultipleTimeSeriesKFold:
372
376
  max_train_date = max(train_dates)
373
377
  min_test_date, max_test_date = min(test_dates), max(test_dates)
374
378
 
375
- cut = cut - (self.window_size - self.overlap_size)
376
-
377
- train_index = self.df[self.df.index.get_level_values('Date_i') <= max_train_date].index.get_level_values('i')
379
+ cut = cut - (self.window_size - self.overlap_size)
380
+
381
+ if self.sample_parts:
382
+ sample_part = self.sample_parts[0]
383
+ part_col = self.sample_parts[1]
384
+ unique_parts = list(self.df.index.get_level_values(part_col).unique())
385
+ random.shuffle(unique_parts)
386
+ n_select = math.ceil(len(unique_parts)*sample_part)
387
+ to_drop = unique_parts[0:n_select]
388
+ train_index = self.df[
389
+ (self.df.index.get_level_values('Date_i') <= max_train_date)
390
+ &
391
+ (~self.df.index.get_level_values(part_col).isin(to_drop))].index.get_level_values('i')
392
+ else:
393
+ train_index = self.df[self.df.index.get_level_values('Date_i') <= max_train_date].index.get_level_values('i')
378
394
  test_index = self.df[(self.df.index.get_level_values('Date_i') >= min_test_date) & (self.df.index.get_level_values('Date_i') <= max_test_date)].index.get_level_values('i')
379
395
 
380
396
  yield train_index, test_index
@@ -0,0 +1,54 @@
1
+ import shap
2
+ import mlflow
3
+ import pandas as pd
4
+ import numpy as np
5
+ from plotly.subplots import make_subplots
6
+ import plotly.graph_objects as go
7
+
8
+ class StackInterpretor(mlflow.pyfunc.PythonModel):
9
+ def __init__(self, model, targets):
10
+ self.base_estimators = model.estimators_
11
+ self.targets = targets
12
+ def fit_interpretor(self, data):
13
+ interpretors = {}
14
+ for label, predictor in zip(self.targets,self.base_estimators):
15
+ explainer = shap.Explainer(predictor, data)
16
+ interpretors[label] = explainer
17
+ self.interpretors = interpretors
18
+ def get_shap_values(self, data):
19
+ shap_values = dict()
20
+ for label, interpretor in self.interpretors.items():
21
+ shap_value = interpretor(data)
22
+ shap_values[label] = shap_value
23
+ return shap_values
24
+ def register_map(self, mapping):
25
+ self.mapping = mapping
26
+
27
+ def mean_shap(data, explainers, pipe_transform):
28
+ t_data = pipe_transform.transform(data)
29
+ input_features = t_data.columns
30
+ shap_results = explainers.get_shap_values(t_data)
31
+ dict_shap_values = explainers.mapping
32
+ arrays_ = list()
33
+ for k,_ in shap_results.items():
34
+ arrays_.append(shap_results.get(k).values)
35
+ shap_results_mean = np.mean(np.array(arrays_), axis = 0)
36
+ df_shap = pd.DataFrame(shap_results_mean, columns=input_features, index=data.index)
37
+ df_shap['Close'] = data['Close']
38
+ df_shap['Date'] = data['Date']
39
+ df_shap = df_shap[['Date','Close']+list(dict_shap_values.keys())]
40
+ df_shap = df_shap.rename(columns =dict_shap_values)
41
+ return df_shap
42
+
43
+ def edge_shap_lines(data, plot = False, look_back = 750):
44
+ ### corect labels ####
45
+ shap_cols = [col for col in data.columns if col not in ['Date','Close']]
46
+ df = data.sort_values('Date').iloc[-look_back:]
47
+ fig = make_subplots(specs=[[{"secondary_y": True}]])
48
+ fig.add_trace(go.Scatter(x=df.Date, y=df.Close,mode='lines+markers',marker = dict(color = 'grey'),line = dict(color = 'grey'),name='Close price'))
49
+ for col in shap_cols:
50
+ fig.add_trace(go.Scatter(x=df.Date, y=df[col],mode='lines+markers',name=col),secondary_y=True)
51
+ fig.update_layout(title_text="sirius - feature power",width=1200,height = 500)
52
+ if plot:
53
+ fig.show()
54
+ return fig
@@ -3,7 +3,7 @@ import pandas as pd
3
3
 
4
4
  from sklearn.base import BaseEstimator, ClassifierMixin
5
5
 
6
- class MyStackingClassifierMultiClass(BaseEstimator, ClassifierMixin):
6
+ class MyStackingClassifierMultiClass(ClassifierMixin, BaseEstimator):
7
7
  def __init__(self, estimators, meta_estimators,targets,perc=None,stack_size=None, **kwargs):
8
8
  self.estimators = estimators
9
9
  self.meta_estimators = meta_estimators
@@ -14,6 +14,7 @@ class MyStackingClassifierMultiClass(BaseEstimator, ClassifierMixin):
14
14
  raise Exception('set one option')
15
15
  self.stack_size = stack_size
16
16
  self.perc = perc
17
+
17
18
  def get_index_training(self, X):
18
19
  if self.stack_size:
19
20
  unique_dates = list(X.index.get_level_values('Date_i').unique())
@@ -53,6 +54,8 @@ class MyStackingClassifierMultiClass(BaseEstimator, ClassifierMixin):
53
54
  meta_preds_df[metacols],
54
55
  y[X.index.get_level_values('i').isin(meta_indexes)][self.targets[i]]
55
56
  )
57
+ self.is_fitted_ = True
58
+ self.classes_ = np.array([[0,1],[0,1]])
56
59
 
57
60
  def predict_proba(self, X):
58
61
  metas_pred = dict()
@@ -1,19 +1,19 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: virgo-modules
3
- Version: 0.5.0
3
+ Version: 0.6.1
4
4
  Summary: data processing and statistical modeling using stock market data
5
5
  Home-page: https://github.com/miguelmayhem92/virgo_module
6
6
  Author: Miguel Mayhuire
7
7
  Author-email: miguelmayhem92@gmail.com
8
8
  License: MIT
9
- Platform: UNKNOWN
10
9
  Classifier: License :: OSI Approved :: MIT License
11
10
  Classifier: Programming Language :: Python :: 3.9
12
11
  Classifier: Operating System :: OS Independent
13
12
  Requires-Python: >=3.9
14
13
  Description-Content-Type: text/markdown
15
- Provides-Extra: dev
16
14
  License-File: LICENSE
15
+ Provides-Extra: dev
16
+ Requires-Dist: pytest>=7.0; extra == "dev"
17
17
 
18
18
  # Virgo Package
19
19
 
@@ -34,4 +34,3 @@ obj = stock_eda_panel(stock_code = 'PEP', n_days = 20)
34
34
  obj.get_data()
35
35
  print(obj.df.shape)
36
36
  ```
37
-
@@ -1,71 +0,0 @@
1
- from plotly.subplots import make_subplots
2
- import plotly.graph_objects as go
3
- from mapie.classification import MapieClassifier
4
- from sklearn.pipeline import Pipeline
5
- import mlflow
6
- import numpy as np
7
-
8
-
9
- def get_conformal_classifiers(model, data, targets):
10
- classfiers = list()
11
- for i, _ in enumerate(model['model'].estimators_):
12
- seg_model = Pipeline([
13
- ('pipe',model['pipe_transform']),
14
- ('model',model['model'].estimators_[i])
15
- ])
16
- mapie_class = MapieClassifier(seg_model, cv='prefit', random_state=123, method="lac")
17
- mapie_class.fit(data, data[targets[i]].values)
18
- classfiers.append(mapie_class)
19
- return classfiers
20
-
21
- def log_confmodels(runid, classifiers):
22
- with mlflow.start_run(run_id=runid) as run:
23
- for i,classifier in enumerate(classifiers):
24
- mlflow.sklearn.log_model(classifier,name = f"conformal_model-{i}")
25
- print('models were logged')
26
-
27
- def load_confmodel(runid, target_variables):
28
- classifiers = list()
29
- for i in range(len(target_variables)):
30
- folder = f"conformal_model-{i}"
31
- model = mlflow.sklearn.load_model(f"runs:/{runid}/{folder}",)
32
- classifiers.append(model)
33
- return classifiers
34
-
35
-
36
- def get_conformal_prediction(classifier, alphas, data, prefix='conf'):
37
- _, y_pis = classifier.predict(data, alpha=alphas)
38
- for i,alpha in enumerate(alphas):
39
- data[f'{prefix}-{alpha}'] = y_pis[:,1,i]
40
- data[f'{prefix}-{alpha}'] = np.where(data[f'{prefix}-{alpha}'] == True,alpha,0)
41
- return data
42
-
43
- def edge_conformal_lines(data, alphas,threshold = 0.6, plot = False, look_back = 750, offset = 0.08):
44
- ### corect labels ####
45
- df = data.sort_values('Date').iloc[-look_back:]
46
- fig = make_subplots(specs=[[{"secondary_y": True}]])
47
- fig.add_trace(go.Scatter(x=df.Date, y=df.Close,mode='lines+markers',marker = dict(color = 'grey'),line = dict(color = 'grey'),name='Close price'))
48
- fig.add_trace(go.Scatter(x=df.Date, y=df.proba_target_up,mode='lines',marker = dict(color = 'blue'),showlegend=True,legendgroup='go up', name='go up'),secondary_y=True)
49
- fig.add_trace(go.Scatter(x=df.Date, y=df.proba_target_down,mode='lines',marker = dict(color = 'coral'),showlegend=True,legendgroup='go down',name='go down'),secondary_y=True)
50
- for i,alpha in enumerate(alphas, start=1):
51
- try:
52
- col_alpha = [x for x in df.columns if str(alpha) in x and 'target_up' in x][0]
53
- df_ = df[df[col_alpha] != 0]
54
- fig.add_trace(go.Scatter(x=df_.Date, y=df_.proba_target_up + (offset*i),mode='markers',marker = dict(opacity=0.7,size=10, color = 'blue')
55
- ,showlegend=False,legendgroup='go up',name='go up', text=df_[col_alpha],textposition="bottom center")
56
- , secondary_y=True)
57
- except:
58
- pass
59
- try:
60
- col_alpha = [x for x in df.columns if str(alpha) in x and 'target_down' in x][0]
61
- df_ = df[df[col_alpha] != 0]
62
- fig.add_trace(go.Scatter(x=df_.Date, y=df_.proba_target_down + (offset*i),mode='markers',marker = dict(opacity=0.7,size=10, color = 'coral')
63
- ,showlegend=False,legendgroup='go down', name='go down',text=df_[col_alpha].astype(str),textposition="bottom center")
64
- , secondary_y=True)
65
- except:
66
- pass
67
- fig.add_shape(type="line", xref="paper", yref="y2",x0=0.02, y0=threshold, x1=0.9, y1=threshold,line=dict(color="red",dash="dash"))
68
- fig.update_layout(title_text="sirius - edge probabilities conformal",width=1200,height = 500)
69
- if plot:
70
- fig.show()
71
- return fig
@@ -1,83 +0,0 @@
1
- import shap
2
- import mlflow
3
- import pandas as pd
4
- import numpy as np
5
- from plotly.subplots import make_subplots
6
- import plotly.graph_objects as go
7
-
8
- def get_explainers(model, data):
9
- explainers = list()
10
- for i, _ in enumerate(model['model'].estimators_):
11
- transf_data = model['pipe_transform'].transform(data)
12
- predictor = model['model'].estimators_[i]
13
- explainer= shap.Explainer(predictor, transf_data)
14
- explainers.append(explainer)
15
- return explainers
16
-
17
- def log_explainer(runid, classifiers):
18
- with mlflow.start_run(run_id=runid) as run:
19
- for i,classifier in enumerate(classifiers):
20
- mlflow.sklearn.log_model(classifier,f"explainer/explainer-{i}")
21
- print('models were logged')
22
-
23
- def load_explainer(runid, target_variables):
24
- explainers = list()
25
- for i in range(len(target_variables)):
26
- folder = f"explainer/explainer-{i}"
27
- model = mlflow.sklearn.load_model(f"runs:/{runid}/{folder}")
28
- explainers.append(model)
29
- return explainers
30
-
31
- def get_shapvalues(explainers, data):
32
- shap_values = {}
33
- for i,explainer in enumerate(explainers):
34
- shap_value_i = explainer(data)
35
- shap_values[i] = shap_value_i
36
- return shap_values
37
-
38
- def get_explainerclusters(model, data, targets):
39
- clustermodels = list()
40
- for i, _ in enumerate(model['model'].estimators_):
41
- transf_data = model['pipe_transform'].transform(data)
42
- Y = data[targets[i]]
43
- cluster_model = shap.utils.hclust(transf_data, Y)
44
- clustermodels.append(cluster_model)
45
- return clustermodels
46
-
47
- def mean_shap(data, explainers, pipe_transform, dict_shap_values):
48
- t_data = pipe_transform.transform(data)
49
- input_features = t_data.columns
50
- shap_results = get_shapvalues(explainers,t_data)
51
- arrays_ = list()
52
- for k,_ in shap_results.items():
53
- arrays_.append(shap_results.get(k).values)
54
- shap_results_mean = np.mean(np.array(arrays_), axis = 0)
55
- df_shap = pd.DataFrame(shap_results_mean, columns=input_features, index=data.index)
56
- df_shap['Close'] = data['Close']
57
- df_shap['Date'] = data['Date']
58
- df_shap = df_shap[['Date','Close']+list(dict_shap_values.keys())]
59
- df_shap = df_shap.rename(columns =dict_shap_values)
60
- return df_shap
61
-
62
- def edge_shap_lines(data, plot = False, look_back = 750):
63
- ### corect labels ####
64
- shap_cols = [col for col in data.columns if col not in ['Date','Close']]
65
- df = data.sort_values('Date').iloc[-look_back:]
66
- fig = make_subplots(specs=[[{"secondary_y": True}]])
67
- fig.add_trace(go.Scatter(x=df.Date, y=df.Close,mode='lines+markers',marker = dict(color = 'grey'),line = dict(color = 'grey'),name='Close price'))
68
- for col in shap_cols:
69
- fig.add_trace(go.Scatter(x=df.Date, y=df[col],mode='lines+markers',name=col),secondary_y=True)
70
- fig.update_layout(title_text="sirius - feature power",width=1200,height = 500)
71
- if plot:
72
- fig.show()
73
- return fig
74
-
75
- def log_top_shap(runid, top_shap):
76
- with mlflow.start_run(run_id=runid) as run:
77
- mlflow.log_dict(top_shap,f"explainer/top_shap.json")
78
- print('artifact was logged')
79
-
80
- def load_top_shap(runid):
81
- folder = f"explainer/top_shap.json"
82
- top_shap = mlflow.artifacts.load_dict(f"runs:/{runid}/{folder}")
83
- return top_shap
File without changes
File without changes
File without changes