virgo-modules 0.5.0__tar.gz → 0.6.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of virgo-modules might be problematic. Click here for more details.
- {virgo_modules-0.5.0 → virgo_modules-0.6.1}/PKG-INFO +3 -4
- {virgo_modules-0.5.0 → virgo_modules-0.6.1}/setup.py +1 -1
- virgo_modules-0.6.1/virgo_app/virgo_modules/src/edge_utils/conformal_utils.py +106 -0
- {virgo_modules-0.5.0 → virgo_modules-0.6.1}/virgo_app/virgo_modules/src/edge_utils/edge_utils.py +20 -4
- virgo_modules-0.6.1/virgo_app/virgo_modules/src/edge_utils/shap_utils.py +54 -0
- {virgo_modules-0.5.0 → virgo_modules-0.6.1}/virgo_app/virgo_modules/src/edge_utils/stack_model.py +4 -1
- {virgo_modules-0.5.0 → virgo_modules-0.6.1}/virgo_app/virgo_modules.egg-info/PKG-INFO +3 -4
- virgo_modules-0.5.0/virgo_app/virgo_modules/src/edge_utils/conformal_utils.py +0 -71
- virgo_modules-0.5.0/virgo_app/virgo_modules/src/edge_utils/shap_utils.py +0 -83
- {virgo_modules-0.5.0 → virgo_modules-0.6.1}/LICENSE +0 -0
- {virgo_modules-0.5.0 → virgo_modules-0.6.1}/README.md +0 -0
- {virgo_modules-0.5.0 → virgo_modules-0.6.1}/setup.cfg +0 -0
- {virgo_modules-0.5.0 → virgo_modules-0.6.1}/virgo_app/virgo_modules/__init__.py +0 -0
- {virgo_modules-0.5.0 → virgo_modules-0.6.1}/virgo_app/virgo_modules/src/__init__.py +0 -0
- {virgo_modules-0.5.0 → virgo_modules-0.6.1}/virgo_app/virgo_modules/src/aws_utils.py +0 -0
- {virgo_modules-0.5.0 → virgo_modules-0.6.1}/virgo_app/virgo_modules/src/backtester.py +0 -0
- {virgo_modules-0.5.0 → virgo_modules-0.6.1}/virgo_app/virgo_modules/src/edge_utils/__init__.py +0 -0
- {virgo_modules-0.5.0 → virgo_modules-0.6.1}/virgo_app/virgo_modules/src/hmm_utils.py +0 -0
- {virgo_modules-0.5.0 → virgo_modules-0.6.1}/virgo_app/virgo_modules/src/pull_artifacts.py +0 -0
- {virgo_modules-0.5.0 → virgo_modules-0.6.1}/virgo_app/virgo_modules/src/re_utils.py +0 -0
- {virgo_modules-0.5.0 → virgo_modules-0.6.1}/virgo_app/virgo_modules/src/ticketer_source.py +0 -0
- {virgo_modules-0.5.0 → virgo_modules-0.6.1}/virgo_app/virgo_modules/src/transformer_utils.py +0 -0
- {virgo_modules-0.5.0 → virgo_modules-0.6.1}/virgo_app/virgo_modules.egg-info/SOURCES.txt +0 -0
- {virgo_modules-0.5.0 → virgo_modules-0.6.1}/virgo_app/virgo_modules.egg-info/dependency_links.txt +0 -0
- {virgo_modules-0.5.0 → virgo_modules-0.6.1}/virgo_app/virgo_modules.egg-info/requires.txt +0 -0
- {virgo_modules-0.5.0 → virgo_modules-0.6.1}/virgo_app/virgo_modules.egg-info/top_level.txt +0 -0
|
@@ -1,19 +1,19 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: virgo_modules
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.1
|
|
4
4
|
Summary: data processing and statistical modeling using stock market data
|
|
5
5
|
Home-page: https://github.com/miguelmayhem92/virgo_module
|
|
6
6
|
Author: Miguel Mayhuire
|
|
7
7
|
Author-email: miguelmayhem92@gmail.com
|
|
8
8
|
License: MIT
|
|
9
|
-
Platform: UNKNOWN
|
|
10
9
|
Classifier: License :: OSI Approved :: MIT License
|
|
11
10
|
Classifier: Programming Language :: Python :: 3.9
|
|
12
11
|
Classifier: Operating System :: OS Independent
|
|
13
12
|
Requires-Python: >=3.9
|
|
14
13
|
Description-Content-Type: text/markdown
|
|
15
|
-
Provides-Extra: dev
|
|
16
14
|
License-File: LICENSE
|
|
15
|
+
Provides-Extra: dev
|
|
16
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
17
17
|
|
|
18
18
|
# Virgo Package
|
|
19
19
|
|
|
@@ -34,4 +34,3 @@ obj = stock_eda_panel(stock_code = 'PEP', n_days = 20)
|
|
|
34
34
|
obj.get_data()
|
|
35
35
|
print(obj.df.shape)
|
|
36
36
|
```
|
|
37
|
-
|
|
@@ -5,7 +5,7 @@ with open("virgo_app/README.md", "r") as f:
|
|
|
5
5
|
|
|
6
6
|
setup(
|
|
7
7
|
name="virgo_modules",
|
|
8
|
-
version="0.
|
|
8
|
+
version="0.6.1",
|
|
9
9
|
description="data processing and statistical modeling using stock market data",
|
|
10
10
|
package_dir={"": "virgo_app"},
|
|
11
11
|
packages=find_packages(where="virgo_app"),
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
from plotly.subplots import make_subplots
|
|
2
|
+
import plotly.graph_objects as go
|
|
3
|
+
from sklearn.pipeline import Pipeline
|
|
4
|
+
import mlflow
|
|
5
|
+
import pandas as pd
|
|
6
|
+
import numpy as np
|
|
7
|
+
from sklearn.base import BaseEstimator, ClassifierMixin
|
|
8
|
+
from mapie.classification import SplitConformalClassifier
|
|
9
|
+
|
|
10
|
+
class ConformalStack(mlflow.pyfunc.PythonModel):
|
|
11
|
+
def __init__(self, model,targets, alphas):
|
|
12
|
+
self.model = model
|
|
13
|
+
self.targets = targets
|
|
14
|
+
self.alphas = alphas
|
|
15
|
+
def fit(self, data):
|
|
16
|
+
self.classifiers = dict()
|
|
17
|
+
for i,target in enumerate(self.targets):
|
|
18
|
+
st = SingleStack(self.model["model"],i)
|
|
19
|
+
st.fit()
|
|
20
|
+
seg_model = Pipeline([
|
|
21
|
+
('pipe',self.model['pipe_transform']),
|
|
22
|
+
('modelbase',st)
|
|
23
|
+
])
|
|
24
|
+
mapie_class = SplitConformalClassifier(seg_model, prefit=True, random_state=123, conformity_score="lac", confidence_level=1-np.array(self.alphas))
|
|
25
|
+
mapie_class.conformalize(data, data[self.targets[i]].values)
|
|
26
|
+
self.classifiers[target] = mapie_class
|
|
27
|
+
def predict_conformal(self, data, ):
|
|
28
|
+
for target in self.targets:
|
|
29
|
+
prefix = target+"_conf"
|
|
30
|
+
_, y_pis = self.classifiers[target].predict_set(data)
|
|
31
|
+
for i,alpha in enumerate(self.alphas):
|
|
32
|
+
data[f'{prefix}-{alpha}'] = y_pis[:,1,i]
|
|
33
|
+
data[f'{prefix}-{alpha}'] = np.where(data[f'{prefix}-{alpha}'] == True,alpha,0)
|
|
34
|
+
return data
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class SingleStack(ClassifierMixin, BaseEstimator):
|
|
38
|
+
def __init__(self, model, estimator_index):
|
|
39
|
+
self.model = model
|
|
40
|
+
self.estimator_index = estimator_index
|
|
41
|
+
|
|
42
|
+
def fit(self):
|
|
43
|
+
self._is_fitted = True
|
|
44
|
+
self.classes_ = [0,1]
|
|
45
|
+
|
|
46
|
+
def predict_proba(self, X):
|
|
47
|
+
metas_pred = dict()
|
|
48
|
+
for i,cont in enumerate(self.model.estimators, start=1):
|
|
49
|
+
_,estimator = cont
|
|
50
|
+
meta_pred = estimator.predict_proba(X)
|
|
51
|
+
metas_pred[f"meta{i}0"] = meta_pred[0][:,1]
|
|
52
|
+
metas_pred[f"meta{i}1"] = meta_pred[1][:,1]
|
|
53
|
+
self.meta_preds_df__ = pd.DataFrame(metas_pred)
|
|
54
|
+
|
|
55
|
+
prediction_vector = list()
|
|
56
|
+
for i,cont in enumerate(self.model.meta_estimators, start=0):
|
|
57
|
+
_,estimator = cont
|
|
58
|
+
metacols = [f"meta{j}{i}" for j in range(1,len(self.model.estimators)+1)]
|
|
59
|
+
preds = estimator.predict_proba(self.meta_preds_df__[metacols].values)
|
|
60
|
+
prediction_vector.append(preds)
|
|
61
|
+
return prediction_vector[self.estimator_index]
|
|
62
|
+
|
|
63
|
+
def predict(self, X):
|
|
64
|
+
prediction_vector = list()
|
|
65
|
+
_ = self.predict_proba(X)
|
|
66
|
+
for i,cont in enumerate(self.model.meta_estimators, start=0):
|
|
67
|
+
_,estimator = cont
|
|
68
|
+
metacols = [f"meta{j}{i}" for j in range(1,len(self.model.estimators)+1)]
|
|
69
|
+
preds = estimator.predict(self.meta_preds_df__[metacols].values)
|
|
70
|
+
prediction_vector.append(preds)
|
|
71
|
+
|
|
72
|
+
p = np.array(tuple(prediction_vector))
|
|
73
|
+
return p.reshape((p.shape[1],p.shape[0]))[:,self.estimator_index]
|
|
74
|
+
|
|
75
|
+
def __sklearn_is_fitted__(self):
|
|
76
|
+
return hasattr(self, "_is_fitted") and self._is_fitted
|
|
77
|
+
|
|
78
|
+
def edge_conformal_lines(data, alphas,threshold = 0.6, plot = False, look_back = 750, offset = 0.08):
|
|
79
|
+
### corect labels ####
|
|
80
|
+
df = data.sort_values('Date').iloc[-look_back:]
|
|
81
|
+
fig = make_subplots(specs=[[{"secondary_y": True}]])
|
|
82
|
+
fig.add_trace(go.Scatter(x=df.Date, y=df.Close,mode='lines+markers',marker = dict(color = 'grey'),line = dict(color = 'grey'),name='Close price'))
|
|
83
|
+
fig.add_trace(go.Scatter(x=df.Date, y=df.proba_target_up,mode='lines',marker = dict(color = 'blue'),showlegend=True,legendgroup='go up', name='go up'),secondary_y=True)
|
|
84
|
+
fig.add_trace(go.Scatter(x=df.Date, y=df.proba_target_down,mode='lines',marker = dict(color = 'coral'),showlegend=True,legendgroup='go down',name='go down'),secondary_y=True)
|
|
85
|
+
for i,alpha in enumerate(alphas, start=1):
|
|
86
|
+
try:
|
|
87
|
+
col_alpha = [x for x in df.columns if str(alpha) in x and 'target_up' in x][0]
|
|
88
|
+
df_ = df[df[col_alpha] != 0]
|
|
89
|
+
fig.add_trace(go.Scatter(x=df_.Date, y=df_.proba_target_up + (offset*i),mode='markers',marker = dict(opacity=0.7,size=10, color = 'blue')
|
|
90
|
+
,showlegend=False,legendgroup='go up',name='go up', text=df_[col_alpha],textposition="bottom center")
|
|
91
|
+
, secondary_y=True)
|
|
92
|
+
except:
|
|
93
|
+
pass
|
|
94
|
+
try:
|
|
95
|
+
col_alpha = [x for x in df.columns if str(alpha) in x and 'target_down' in x][0]
|
|
96
|
+
df_ = df[df[col_alpha] != 0]
|
|
97
|
+
fig.add_trace(go.Scatter(x=df_.Date, y=df_.proba_target_down + (offset*i),mode='markers',marker = dict(opacity=0.7,size=10, color = 'coral')
|
|
98
|
+
,showlegend=False,legendgroup='go down', name='go down',text=df_[col_alpha].astype(str),textposition="bottom center")
|
|
99
|
+
, secondary_y=True)
|
|
100
|
+
except:
|
|
101
|
+
pass
|
|
102
|
+
fig.add_shape(type="line", xref="paper", yref="y2",x0=0.02, y0=threshold, x1=0.9, y1=threshold,line=dict(color="red",dash="dash"))
|
|
103
|
+
fig.update_layout(title_text="sirius - edge probabilities conformal",width=1200,height = 500)
|
|
104
|
+
if plot:
|
|
105
|
+
fig.show()
|
|
106
|
+
return fig
|
{virgo_modules-0.5.0 → virgo_modules-0.6.1}/virgo_app/virgo_modules/src/edge_utils/edge_utils.py
RENAMED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
import numpy as np
|
|
2
2
|
import itertools
|
|
3
|
+
import random
|
|
4
|
+
import math
|
|
3
5
|
|
|
4
6
|
from sklearn.metrics import roc_auc_score, precision_score, recall_score
|
|
5
7
|
from sklearn.pipeline import Pipeline
|
|
@@ -310,7 +312,7 @@ class ExpandingMultipleTimeSeriesKFold:
|
|
|
310
312
|
get number of splits
|
|
311
313
|
"""
|
|
312
314
|
|
|
313
|
-
def __init__(self, df, window_size = 100, number_window=3, overlap_size = 0):
|
|
315
|
+
def __init__(self, df, window_size = 100, number_window=3, overlap_size = 0, sample_parts = None):
|
|
314
316
|
"""
|
|
315
317
|
Initialize object
|
|
316
318
|
|
|
@@ -320,6 +322,7 @@ class ExpandingMultipleTimeSeriesKFold:
|
|
|
320
322
|
number_window (int): number of train splits
|
|
321
323
|
window_size (int): window size data
|
|
322
324
|
overlap_size (int): overlap size
|
|
325
|
+
sample_individuals (tuple(float, str)): sample partition units to remove from the train set, tuple()
|
|
323
326
|
|
|
324
327
|
Returns
|
|
325
328
|
-------
|
|
@@ -329,6 +332,7 @@ class ExpandingMultipleTimeSeriesKFold:
|
|
|
329
332
|
self.number_window = number_window
|
|
330
333
|
self.window_size = window_size
|
|
331
334
|
self.overlap_size = overlap_size
|
|
335
|
+
self.sample_parts = sample_parts
|
|
332
336
|
|
|
333
337
|
def split(self, X, y, groups=None):
|
|
334
338
|
"""
|
|
@@ -372,9 +376,21 @@ class ExpandingMultipleTimeSeriesKFold:
|
|
|
372
376
|
max_train_date = max(train_dates)
|
|
373
377
|
min_test_date, max_test_date = min(test_dates), max(test_dates)
|
|
374
378
|
|
|
375
|
-
cut = cut - (self.window_size - self.overlap_size)
|
|
376
|
-
|
|
377
|
-
|
|
379
|
+
cut = cut - (self.window_size - self.overlap_size)
|
|
380
|
+
|
|
381
|
+
if self.sample_parts:
|
|
382
|
+
sample_part = self.sample_parts[0]
|
|
383
|
+
part_col = self.sample_parts[1]
|
|
384
|
+
unique_parts = list(self.df.index.get_level_values(part_col).unique())
|
|
385
|
+
random.shuffle(unique_parts)
|
|
386
|
+
n_select = math.ceil(len(unique_parts)*sample_part)
|
|
387
|
+
to_drop = unique_parts[0:n_select]
|
|
388
|
+
train_index = self.df[
|
|
389
|
+
(self.df.index.get_level_values('Date_i') <= max_train_date)
|
|
390
|
+
&
|
|
391
|
+
(~self.df.index.get_level_values(part_col).isin(to_drop))].index.get_level_values('i')
|
|
392
|
+
else:
|
|
393
|
+
train_index = self.df[self.df.index.get_level_values('Date_i') <= max_train_date].index.get_level_values('i')
|
|
378
394
|
test_index = self.df[(self.df.index.get_level_values('Date_i') >= min_test_date) & (self.df.index.get_level_values('Date_i') <= max_test_date)].index.get_level_values('i')
|
|
379
395
|
|
|
380
396
|
yield train_index, test_index
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import shap
|
|
2
|
+
import mlflow
|
|
3
|
+
import pandas as pd
|
|
4
|
+
import numpy as np
|
|
5
|
+
from plotly.subplots import make_subplots
|
|
6
|
+
import plotly.graph_objects as go
|
|
7
|
+
|
|
8
|
+
class StackInterpretor(mlflow.pyfunc.PythonModel):
|
|
9
|
+
def __init__(self, model, targets):
|
|
10
|
+
self.base_estimators = model.estimators_
|
|
11
|
+
self.targets = targets
|
|
12
|
+
def fit_interpretor(self, data):
|
|
13
|
+
interpretors = {}
|
|
14
|
+
for label, predictor in zip(self.targets,self.base_estimators):
|
|
15
|
+
explainer = shap.Explainer(predictor, data)
|
|
16
|
+
interpretors[label] = explainer
|
|
17
|
+
self.interpretors = interpretors
|
|
18
|
+
def get_shap_values(self, data):
|
|
19
|
+
shap_values = dict()
|
|
20
|
+
for label, interpretor in self.interpretors.items():
|
|
21
|
+
shap_value = interpretor(data)
|
|
22
|
+
shap_values[label] = shap_value
|
|
23
|
+
return shap_values
|
|
24
|
+
def register_map(self, mapping):
|
|
25
|
+
self.mapping = mapping
|
|
26
|
+
|
|
27
|
+
def mean_shap(data, explainers, pipe_transform):
|
|
28
|
+
t_data = pipe_transform.transform(data)
|
|
29
|
+
input_features = t_data.columns
|
|
30
|
+
shap_results = explainers.get_shap_values(t_data)
|
|
31
|
+
dict_shap_values = explainers.mapping
|
|
32
|
+
arrays_ = list()
|
|
33
|
+
for k,_ in shap_results.items():
|
|
34
|
+
arrays_.append(shap_results.get(k).values)
|
|
35
|
+
shap_results_mean = np.mean(np.array(arrays_), axis = 0)
|
|
36
|
+
df_shap = pd.DataFrame(shap_results_mean, columns=input_features, index=data.index)
|
|
37
|
+
df_shap['Close'] = data['Close']
|
|
38
|
+
df_shap['Date'] = data['Date']
|
|
39
|
+
df_shap = df_shap[['Date','Close']+list(dict_shap_values.keys())]
|
|
40
|
+
df_shap = df_shap.rename(columns =dict_shap_values)
|
|
41
|
+
return df_shap
|
|
42
|
+
|
|
43
|
+
def edge_shap_lines(data, plot = False, look_back = 750):
|
|
44
|
+
### corect labels ####
|
|
45
|
+
shap_cols = [col for col in data.columns if col not in ['Date','Close']]
|
|
46
|
+
df = data.sort_values('Date').iloc[-look_back:]
|
|
47
|
+
fig = make_subplots(specs=[[{"secondary_y": True}]])
|
|
48
|
+
fig.add_trace(go.Scatter(x=df.Date, y=df.Close,mode='lines+markers',marker = dict(color = 'grey'),line = dict(color = 'grey'),name='Close price'))
|
|
49
|
+
for col in shap_cols:
|
|
50
|
+
fig.add_trace(go.Scatter(x=df.Date, y=df[col],mode='lines+markers',name=col),secondary_y=True)
|
|
51
|
+
fig.update_layout(title_text="sirius - feature power",width=1200,height = 500)
|
|
52
|
+
if plot:
|
|
53
|
+
fig.show()
|
|
54
|
+
return fig
|
{virgo_modules-0.5.0 → virgo_modules-0.6.1}/virgo_app/virgo_modules/src/edge_utils/stack_model.py
RENAMED
|
@@ -3,7 +3,7 @@ import pandas as pd
|
|
|
3
3
|
|
|
4
4
|
from sklearn.base import BaseEstimator, ClassifierMixin
|
|
5
5
|
|
|
6
|
-
class MyStackingClassifierMultiClass(
|
|
6
|
+
class MyStackingClassifierMultiClass(ClassifierMixin, BaseEstimator):
|
|
7
7
|
def __init__(self, estimators, meta_estimators,targets,perc=None,stack_size=None, **kwargs):
|
|
8
8
|
self.estimators = estimators
|
|
9
9
|
self.meta_estimators = meta_estimators
|
|
@@ -14,6 +14,7 @@ class MyStackingClassifierMultiClass(BaseEstimator, ClassifierMixin):
|
|
|
14
14
|
raise Exception('set one option')
|
|
15
15
|
self.stack_size = stack_size
|
|
16
16
|
self.perc = perc
|
|
17
|
+
|
|
17
18
|
def get_index_training(self, X):
|
|
18
19
|
if self.stack_size:
|
|
19
20
|
unique_dates = list(X.index.get_level_values('Date_i').unique())
|
|
@@ -53,6 +54,8 @@ class MyStackingClassifierMultiClass(BaseEstimator, ClassifierMixin):
|
|
|
53
54
|
meta_preds_df[metacols],
|
|
54
55
|
y[X.index.get_level_values('i').isin(meta_indexes)][self.targets[i]]
|
|
55
56
|
)
|
|
57
|
+
self.is_fitted_ = True
|
|
58
|
+
self.classes_ = np.array([[0,1],[0,1]])
|
|
56
59
|
|
|
57
60
|
def predict_proba(self, X):
|
|
58
61
|
metas_pred = dict()
|
|
@@ -1,19 +1,19 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: virgo-modules
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.1
|
|
4
4
|
Summary: data processing and statistical modeling using stock market data
|
|
5
5
|
Home-page: https://github.com/miguelmayhem92/virgo_module
|
|
6
6
|
Author: Miguel Mayhuire
|
|
7
7
|
Author-email: miguelmayhem92@gmail.com
|
|
8
8
|
License: MIT
|
|
9
|
-
Platform: UNKNOWN
|
|
10
9
|
Classifier: License :: OSI Approved :: MIT License
|
|
11
10
|
Classifier: Programming Language :: Python :: 3.9
|
|
12
11
|
Classifier: Operating System :: OS Independent
|
|
13
12
|
Requires-Python: >=3.9
|
|
14
13
|
Description-Content-Type: text/markdown
|
|
15
|
-
Provides-Extra: dev
|
|
16
14
|
License-File: LICENSE
|
|
15
|
+
Provides-Extra: dev
|
|
16
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
17
17
|
|
|
18
18
|
# Virgo Package
|
|
19
19
|
|
|
@@ -34,4 +34,3 @@ obj = stock_eda_panel(stock_code = 'PEP', n_days = 20)
|
|
|
34
34
|
obj.get_data()
|
|
35
35
|
print(obj.df.shape)
|
|
36
36
|
```
|
|
37
|
-
|
|
@@ -1,71 +0,0 @@
|
|
|
1
|
-
from plotly.subplots import make_subplots
|
|
2
|
-
import plotly.graph_objects as go
|
|
3
|
-
from mapie.classification import MapieClassifier
|
|
4
|
-
from sklearn.pipeline import Pipeline
|
|
5
|
-
import mlflow
|
|
6
|
-
import numpy as np
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
def get_conformal_classifiers(model, data, targets):
|
|
10
|
-
classfiers = list()
|
|
11
|
-
for i, _ in enumerate(model['model'].estimators_):
|
|
12
|
-
seg_model = Pipeline([
|
|
13
|
-
('pipe',model['pipe_transform']),
|
|
14
|
-
('model',model['model'].estimators_[i])
|
|
15
|
-
])
|
|
16
|
-
mapie_class = MapieClassifier(seg_model, cv='prefit', random_state=123, method="lac")
|
|
17
|
-
mapie_class.fit(data, data[targets[i]].values)
|
|
18
|
-
classfiers.append(mapie_class)
|
|
19
|
-
return classfiers
|
|
20
|
-
|
|
21
|
-
def log_confmodels(runid, classifiers):
|
|
22
|
-
with mlflow.start_run(run_id=runid) as run:
|
|
23
|
-
for i,classifier in enumerate(classifiers):
|
|
24
|
-
mlflow.sklearn.log_model(classifier,name = f"conformal_model-{i}")
|
|
25
|
-
print('models were logged')
|
|
26
|
-
|
|
27
|
-
def load_confmodel(runid, target_variables):
|
|
28
|
-
classifiers = list()
|
|
29
|
-
for i in range(len(target_variables)):
|
|
30
|
-
folder = f"conformal_model-{i}"
|
|
31
|
-
model = mlflow.sklearn.load_model(f"runs:/{runid}/{folder}",)
|
|
32
|
-
classifiers.append(model)
|
|
33
|
-
return classifiers
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
def get_conformal_prediction(classifier, alphas, data, prefix='conf'):
|
|
37
|
-
_, y_pis = classifier.predict(data, alpha=alphas)
|
|
38
|
-
for i,alpha in enumerate(alphas):
|
|
39
|
-
data[f'{prefix}-{alpha}'] = y_pis[:,1,i]
|
|
40
|
-
data[f'{prefix}-{alpha}'] = np.where(data[f'{prefix}-{alpha}'] == True,alpha,0)
|
|
41
|
-
return data
|
|
42
|
-
|
|
43
|
-
def edge_conformal_lines(data, alphas,threshold = 0.6, plot = False, look_back = 750, offset = 0.08):
|
|
44
|
-
### corect labels ####
|
|
45
|
-
df = data.sort_values('Date').iloc[-look_back:]
|
|
46
|
-
fig = make_subplots(specs=[[{"secondary_y": True}]])
|
|
47
|
-
fig.add_trace(go.Scatter(x=df.Date, y=df.Close,mode='lines+markers',marker = dict(color = 'grey'),line = dict(color = 'grey'),name='Close price'))
|
|
48
|
-
fig.add_trace(go.Scatter(x=df.Date, y=df.proba_target_up,mode='lines',marker = dict(color = 'blue'),showlegend=True,legendgroup='go up', name='go up'),secondary_y=True)
|
|
49
|
-
fig.add_trace(go.Scatter(x=df.Date, y=df.proba_target_down,mode='lines',marker = dict(color = 'coral'),showlegend=True,legendgroup='go down',name='go down'),secondary_y=True)
|
|
50
|
-
for i,alpha in enumerate(alphas, start=1):
|
|
51
|
-
try:
|
|
52
|
-
col_alpha = [x for x in df.columns if str(alpha) in x and 'target_up' in x][0]
|
|
53
|
-
df_ = df[df[col_alpha] != 0]
|
|
54
|
-
fig.add_trace(go.Scatter(x=df_.Date, y=df_.proba_target_up + (offset*i),mode='markers',marker = dict(opacity=0.7,size=10, color = 'blue')
|
|
55
|
-
,showlegend=False,legendgroup='go up',name='go up', text=df_[col_alpha],textposition="bottom center")
|
|
56
|
-
, secondary_y=True)
|
|
57
|
-
except:
|
|
58
|
-
pass
|
|
59
|
-
try:
|
|
60
|
-
col_alpha = [x for x in df.columns if str(alpha) in x and 'target_down' in x][0]
|
|
61
|
-
df_ = df[df[col_alpha] != 0]
|
|
62
|
-
fig.add_trace(go.Scatter(x=df_.Date, y=df_.proba_target_down + (offset*i),mode='markers',marker = dict(opacity=0.7,size=10, color = 'coral')
|
|
63
|
-
,showlegend=False,legendgroup='go down', name='go down',text=df_[col_alpha].astype(str),textposition="bottom center")
|
|
64
|
-
, secondary_y=True)
|
|
65
|
-
except:
|
|
66
|
-
pass
|
|
67
|
-
fig.add_shape(type="line", xref="paper", yref="y2",x0=0.02, y0=threshold, x1=0.9, y1=threshold,line=dict(color="red",dash="dash"))
|
|
68
|
-
fig.update_layout(title_text="sirius - edge probabilities conformal",width=1200,height = 500)
|
|
69
|
-
if plot:
|
|
70
|
-
fig.show()
|
|
71
|
-
return fig
|
|
@@ -1,83 +0,0 @@
|
|
|
1
|
-
import shap
|
|
2
|
-
import mlflow
|
|
3
|
-
import pandas as pd
|
|
4
|
-
import numpy as np
|
|
5
|
-
from plotly.subplots import make_subplots
|
|
6
|
-
import plotly.graph_objects as go
|
|
7
|
-
|
|
8
|
-
def get_explainers(model, data):
|
|
9
|
-
explainers = list()
|
|
10
|
-
for i, _ in enumerate(model['model'].estimators_):
|
|
11
|
-
transf_data = model['pipe_transform'].transform(data)
|
|
12
|
-
predictor = model['model'].estimators_[i]
|
|
13
|
-
explainer= shap.Explainer(predictor, transf_data)
|
|
14
|
-
explainers.append(explainer)
|
|
15
|
-
return explainers
|
|
16
|
-
|
|
17
|
-
def log_explainer(runid, classifiers):
|
|
18
|
-
with mlflow.start_run(run_id=runid) as run:
|
|
19
|
-
for i,classifier in enumerate(classifiers):
|
|
20
|
-
mlflow.sklearn.log_model(classifier,f"explainer/explainer-{i}")
|
|
21
|
-
print('models were logged')
|
|
22
|
-
|
|
23
|
-
def load_explainer(runid, target_variables):
|
|
24
|
-
explainers = list()
|
|
25
|
-
for i in range(len(target_variables)):
|
|
26
|
-
folder = f"explainer/explainer-{i}"
|
|
27
|
-
model = mlflow.sklearn.load_model(f"runs:/{runid}/{folder}")
|
|
28
|
-
explainers.append(model)
|
|
29
|
-
return explainers
|
|
30
|
-
|
|
31
|
-
def get_shapvalues(explainers, data):
|
|
32
|
-
shap_values = {}
|
|
33
|
-
for i,explainer in enumerate(explainers):
|
|
34
|
-
shap_value_i = explainer(data)
|
|
35
|
-
shap_values[i] = shap_value_i
|
|
36
|
-
return shap_values
|
|
37
|
-
|
|
38
|
-
def get_explainerclusters(model, data, targets):
|
|
39
|
-
clustermodels = list()
|
|
40
|
-
for i, _ in enumerate(model['model'].estimators_):
|
|
41
|
-
transf_data = model['pipe_transform'].transform(data)
|
|
42
|
-
Y = data[targets[i]]
|
|
43
|
-
cluster_model = shap.utils.hclust(transf_data, Y)
|
|
44
|
-
clustermodels.append(cluster_model)
|
|
45
|
-
return clustermodels
|
|
46
|
-
|
|
47
|
-
def mean_shap(data, explainers, pipe_transform, dict_shap_values):
|
|
48
|
-
t_data = pipe_transform.transform(data)
|
|
49
|
-
input_features = t_data.columns
|
|
50
|
-
shap_results = get_shapvalues(explainers,t_data)
|
|
51
|
-
arrays_ = list()
|
|
52
|
-
for k,_ in shap_results.items():
|
|
53
|
-
arrays_.append(shap_results.get(k).values)
|
|
54
|
-
shap_results_mean = np.mean(np.array(arrays_), axis = 0)
|
|
55
|
-
df_shap = pd.DataFrame(shap_results_mean, columns=input_features, index=data.index)
|
|
56
|
-
df_shap['Close'] = data['Close']
|
|
57
|
-
df_shap['Date'] = data['Date']
|
|
58
|
-
df_shap = df_shap[['Date','Close']+list(dict_shap_values.keys())]
|
|
59
|
-
df_shap = df_shap.rename(columns =dict_shap_values)
|
|
60
|
-
return df_shap
|
|
61
|
-
|
|
62
|
-
def edge_shap_lines(data, plot = False, look_back = 750):
|
|
63
|
-
### corect labels ####
|
|
64
|
-
shap_cols = [col for col in data.columns if col not in ['Date','Close']]
|
|
65
|
-
df = data.sort_values('Date').iloc[-look_back:]
|
|
66
|
-
fig = make_subplots(specs=[[{"secondary_y": True}]])
|
|
67
|
-
fig.add_trace(go.Scatter(x=df.Date, y=df.Close,mode='lines+markers',marker = dict(color = 'grey'),line = dict(color = 'grey'),name='Close price'))
|
|
68
|
-
for col in shap_cols:
|
|
69
|
-
fig.add_trace(go.Scatter(x=df.Date, y=df[col],mode='lines+markers',name=col),secondary_y=True)
|
|
70
|
-
fig.update_layout(title_text="sirius - feature power",width=1200,height = 500)
|
|
71
|
-
if plot:
|
|
72
|
-
fig.show()
|
|
73
|
-
return fig
|
|
74
|
-
|
|
75
|
-
def log_top_shap(runid, top_shap):
|
|
76
|
-
with mlflow.start_run(run_id=runid) as run:
|
|
77
|
-
mlflow.log_dict(top_shap,f"explainer/top_shap.json")
|
|
78
|
-
print('artifact was logged')
|
|
79
|
-
|
|
80
|
-
def load_top_shap(runid):
|
|
81
|
-
folder = f"explainer/top_shap.json"
|
|
82
|
-
top_shap = mlflow.artifacts.load_dict(f"runs:/{runid}/{folder}")
|
|
83
|
-
return top_shap
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{virgo_modules-0.5.0 → virgo_modules-0.6.1}/virgo_app/virgo_modules/src/edge_utils/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{virgo_modules-0.5.0 → virgo_modules-0.6.1}/virgo_app/virgo_modules/src/transformer_utils.py
RENAMED
|
File without changes
|
|
File without changes
|
{virgo_modules-0.5.0 → virgo_modules-0.6.1}/virgo_app/virgo_modules.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|