virgo-modules 0.2.10__tar.gz → 0.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of virgo-modules might be problematic. Click here for more details.
- {virgo_modules-0.2.10 → virgo_modules-0.3.1}/PKG-INFO +1 -1
- {virgo_modules-0.2.10 → virgo_modules-0.3.1}/setup.py +1 -1
- virgo_modules-0.3.1/virgo_app/virgo_modules/src/edge_utils/__init__.py +0 -0
- virgo_modules-0.3.1/virgo_app/virgo_modules/src/edge_utils/conformal_utils.py +71 -0
- {virgo_modules-0.2.10/virgo_app/virgo_modules/src → virgo_modules-0.3.1/virgo_app/virgo_modules/src/edge_utils}/edge_utils.py +30 -2
- virgo_modules-0.3.1/virgo_app/virgo_modules/src/edge_utils/shap_utils.py +83 -0
- {virgo_modules-0.2.10 → virgo_modules-0.3.1}/virgo_app/virgo_modules/src/re_utils.py +8 -25
- {virgo_modules-0.2.10 → virgo_modules-0.3.1}/virgo_app/virgo_modules.egg-info/PKG-INFO +1 -1
- {virgo_modules-0.2.10 → virgo_modules-0.3.1}/virgo_app/virgo_modules.egg-info/SOURCES.txt +5 -2
- {virgo_modules-0.2.10 → virgo_modules-0.3.1}/LICENSE +0 -0
- {virgo_modules-0.2.10 → virgo_modules-0.3.1}/README.md +0 -0
- {virgo_modules-0.2.10 → virgo_modules-0.3.1}/setup.cfg +0 -0
- {virgo_modules-0.2.10 → virgo_modules-0.3.1}/virgo_app/virgo_modules/__init__.py +0 -0
- {virgo_modules-0.2.10 → virgo_modules-0.3.1}/virgo_app/virgo_modules/src/__init__.py +0 -0
- {virgo_modules-0.2.10 → virgo_modules-0.3.1}/virgo_app/virgo_modules/src/aws_utils.py +0 -0
- {virgo_modules-0.2.10 → virgo_modules-0.3.1}/virgo_app/virgo_modules/src/backtester.py +0 -0
- {virgo_modules-0.2.10 → virgo_modules-0.3.1}/virgo_app/virgo_modules/src/hmm_utils.py +0 -0
- {virgo_modules-0.2.10 → virgo_modules-0.3.1}/virgo_app/virgo_modules/src/pull_artifacts.py +0 -0
- {virgo_modules-0.2.10 → virgo_modules-0.3.1}/virgo_app/virgo_modules/src/ticketer_source.py +0 -0
- {virgo_modules-0.2.10 → virgo_modules-0.3.1}/virgo_app/virgo_modules/src/transformer_utils.py +0 -0
- {virgo_modules-0.2.10 → virgo_modules-0.3.1}/virgo_app/virgo_modules.egg-info/dependency_links.txt +0 -0
- {virgo_modules-0.2.10 → virgo_modules-0.3.1}/virgo_app/virgo_modules.egg-info/requires.txt +0 -0
- {virgo_modules-0.2.10 → virgo_modules-0.3.1}/virgo_app/virgo_modules.egg-info/top_level.txt +0 -0
|
@@ -5,7 +5,7 @@ with open("virgo_app/README.md", "r") as f:
|
|
|
5
5
|
|
|
6
6
|
setup(
|
|
7
7
|
name="virgo_modules",
|
|
8
|
-
version="0.
|
|
8
|
+
version="0.3.1",
|
|
9
9
|
description="data processing and statistical modeling using stock market data",
|
|
10
10
|
package_dir={"": "virgo_app"},
|
|
11
11
|
packages=find_packages(where="virgo_app"),
|
|
File without changes
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
from plotly.subplots import make_subplots
|
|
2
|
+
import plotly.graph_objects as go
|
|
3
|
+
from mapie.classification import MapieClassifier
|
|
4
|
+
from sklearn.pipeline import Pipeline
|
|
5
|
+
import mlflow
|
|
6
|
+
import numpy as np
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def get_conformal_classifiers(model, data, targets):
|
|
10
|
+
classfiers = list()
|
|
11
|
+
for i, _ in enumerate(model['model'].estimators_):
|
|
12
|
+
seg_model = Pipeline([
|
|
13
|
+
('pipe',model['pipe_transform']),
|
|
14
|
+
('model',model['model'].estimators_[i])
|
|
15
|
+
])
|
|
16
|
+
mapie_class = MapieClassifier(seg_model, cv='prefit', random_state=123, method="lac")
|
|
17
|
+
mapie_class.fit(data, data[targets[i]].values)
|
|
18
|
+
classfiers.append(mapie_class)
|
|
19
|
+
return classfiers
|
|
20
|
+
|
|
21
|
+
def log_confmodels(runid, classifiers):
|
|
22
|
+
with mlflow.start_run(run_id=runid) as run:
|
|
23
|
+
for i,classifier in enumerate(classifiers):
|
|
24
|
+
mlflow.sklearn.log_model(classifier,f"conformal/conformal_model-{i}")
|
|
25
|
+
print('models were logged')
|
|
26
|
+
|
|
27
|
+
def load_confmodel(runid, target_variables):
|
|
28
|
+
classifiers = list()
|
|
29
|
+
for i in range(len(target_variables)):
|
|
30
|
+
folder = f"conformal/conformal_model-{i}"
|
|
31
|
+
model = mlflow.sklearn.load_model(f"runs:/{runid}/{folder}")
|
|
32
|
+
classifiers.append(model)
|
|
33
|
+
return classifiers
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def get_conformal_prediction(classifier, alphas, data, prefix='conf'):
|
|
37
|
+
_, y_pis = classifier.predict(data, alpha=alphas)
|
|
38
|
+
for i,alpha in enumerate(alphas):
|
|
39
|
+
data[f'{prefix}-{alpha}'] = y_pis[:,1,i]
|
|
40
|
+
data[f'{prefix}-{alpha}'] = np.where(data[f'{prefix}-{alpha}'] == True,alpha,0)
|
|
41
|
+
return data
|
|
42
|
+
|
|
43
|
+
def edge_conformal_lines(data, alphas,threshold = 0.6, plot = False, look_back = 750, offset = 0.08):
|
|
44
|
+
### corect labels ####
|
|
45
|
+
df = data.sort_values('Date').iloc[-look_back:]
|
|
46
|
+
fig = make_subplots(specs=[[{"secondary_y": True}]])
|
|
47
|
+
fig.add_trace(go.Scatter(x=df.Date, y=df.Close,mode='lines+markers',marker = dict(color = 'grey'),line = dict(color = 'grey'),name='Close price'))
|
|
48
|
+
fig.add_trace(go.Scatter(x=df.Date, y=df.proba_target_up,mode='lines',marker = dict(color = 'blue'),showlegend=True,legendgroup='go up', name='go up'),secondary_y=True)
|
|
49
|
+
fig.add_trace(go.Scatter(x=df.Date, y=df.proba_target_down,mode='lines',marker = dict(color = 'coral'),showlegend=True,legendgroup='go down',name='go down'),secondary_y=True)
|
|
50
|
+
for i,alpha in enumerate(alphas, start=1):
|
|
51
|
+
try:
|
|
52
|
+
col_alpha = [x for x in df.columns if str(alpha) in x and 'target_up' in x][0]
|
|
53
|
+
df_ = df[df[col_alpha] != 0]
|
|
54
|
+
fig.add_trace(go.Scatter(x=df_.Date, y=df_.proba_target_up + (offset*i),mode='markers',marker = dict(opacity=0.7,size=10, color = 'blue')
|
|
55
|
+
,showlegend=False,legendgroup='go up',name='go up', text=df_[col_alpha],textposition="bottom center")
|
|
56
|
+
, secondary_y=True)
|
|
57
|
+
except:
|
|
58
|
+
pass
|
|
59
|
+
try:
|
|
60
|
+
col_alpha = [x for x in df.columns if str(alpha) in x and 'target_down' in x][0]
|
|
61
|
+
df_ = df[df[col_alpha] != 0]
|
|
62
|
+
fig.add_trace(go.Scatter(x=df_.Date, y=df_.proba_target_down + (offset*i),mode='markers',marker = dict(opacity=0.7,size=10, color = 'coral')
|
|
63
|
+
,showlegend=False,legendgroup='go down', name='go down',text=df_[col_alpha].astype(str),textposition="bottom center")
|
|
64
|
+
, secondary_y=True)
|
|
65
|
+
except:
|
|
66
|
+
pass
|
|
67
|
+
fig.add_shape(type="line", xref="paper", yref="y2",x0=0.02, y0=threshold, x1=0.9, y1=threshold,line=dict(color="red",dash="dash"))
|
|
68
|
+
fig.update_layout(title_text="sirius - edge probabilities conformal",width=1200,height = 500)
|
|
69
|
+
if plot:
|
|
70
|
+
fig.show()
|
|
71
|
+
return fig
|
|
@@ -9,7 +9,10 @@ from feature_engine.imputation import MeanMedianImputer
|
|
|
9
9
|
from feature_engine.discretisation import EqualWidthDiscretiser
|
|
10
10
|
from feature_engine.datetime import DatetimeFeatures
|
|
11
11
|
|
|
12
|
-
from
|
|
12
|
+
from ..transformer_utils import VirgoWinsorizerFeature, InverseHyperbolicSine, FeaturesEntropy, FeatureSelector
|
|
13
|
+
|
|
14
|
+
from plotly.subplots import make_subplots
|
|
15
|
+
import plotly.graph_objects as go
|
|
13
16
|
|
|
14
17
|
class produce_model_wrapper:
|
|
15
18
|
"""
|
|
@@ -386,4 +389,29 @@ class ExpandingMultipleTimeSeriesKFold:
|
|
|
386
389
|
-------
|
|
387
390
|
number_window (int): number of splits
|
|
388
391
|
"""
|
|
389
|
-
return self.number_window
|
|
392
|
+
return self.number_window
|
|
393
|
+
|
|
394
|
+
def edge_probas_lines(data, threshold, plot = False, look_back = 750):
|
|
395
|
+
"""
|
|
396
|
+
produce a plotly plot of edges and closing prices
|
|
397
|
+
|
|
398
|
+
Parameters:
|
|
399
|
+
data (pd.DataFrame): asset data with edge probabilities
|
|
400
|
+
plot (boolean): if true, display plot
|
|
401
|
+
threshold (float): edge threshold
|
|
402
|
+
look_back (int): number of rows back to display
|
|
403
|
+
|
|
404
|
+
Returns:
|
|
405
|
+
fig (obj): plotly go object
|
|
406
|
+
"""
|
|
407
|
+
df = data[['Date','Close','proba_target_down','proba_target_up']].iloc[-look_back:]
|
|
408
|
+
|
|
409
|
+
fig = make_subplots(specs=[[{"secondary_y": True}]])
|
|
410
|
+
fig.add_trace(go.Scatter(x=df.Date, y=df.Close,mode='lines+markers',name='Close price'))
|
|
411
|
+
fig.add_trace(go.Scatter(x=df.Date, y=df.proba_target_down,mode='lines',marker = dict(color = 'coral'),name='go down'),secondary_y=True)
|
|
412
|
+
fig.add_trace(go.Scatter(x=df.Date, y=df.proba_target_up,mode='lines',marker = dict(opacity=0.1,size=80), name='go up'),secondary_y=True)
|
|
413
|
+
fig.add_shape(type="line", xref="paper", yref="y2",x0=0.02, y0=threshold, x1=0.9, y1=threshold,line=dict(color="red",dash="dash"),)
|
|
414
|
+
fig.update_layout(title_text="sirius - edge probabilities",width=1200,height = 500)
|
|
415
|
+
if plot:
|
|
416
|
+
fig.show()
|
|
417
|
+
return fig
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import shap
|
|
2
|
+
import mlflow
|
|
3
|
+
import pandas as pd
|
|
4
|
+
import numpy as np
|
|
5
|
+
from plotly.subplots import make_subplots
|
|
6
|
+
import plotly.graph_objects as go
|
|
7
|
+
|
|
8
|
+
def get_explainers(model, data):
|
|
9
|
+
explainers = list()
|
|
10
|
+
for i, _ in enumerate(model['model'].estimators_):
|
|
11
|
+
transf_data = model['pipe_transform'].transform(data)
|
|
12
|
+
predictor = model['model'].estimators_[i]
|
|
13
|
+
explainer= shap.Explainer(predictor, transf_data)
|
|
14
|
+
explainers.append(explainer)
|
|
15
|
+
return explainers
|
|
16
|
+
|
|
17
|
+
def log_explainer(runid, classifiers):
|
|
18
|
+
with mlflow.start_run(run_id=runid) as run:
|
|
19
|
+
for i,classifier in enumerate(classifiers):
|
|
20
|
+
mlflow.sklearn.log_model(classifier,f"explainer/explainer-{i}")
|
|
21
|
+
print('models were logged')
|
|
22
|
+
|
|
23
|
+
def load_explainer(runid, target_variables):
|
|
24
|
+
explainers = list()
|
|
25
|
+
for i in range(len(target_variables)):
|
|
26
|
+
folder = f"explainer/explainer-{i}"
|
|
27
|
+
model = mlflow.sklearn.load_model(f"runs:/{runid}/{folder}")
|
|
28
|
+
explainers.append(model)
|
|
29
|
+
return explainers
|
|
30
|
+
|
|
31
|
+
def get_shapvalues(explainers, data):
|
|
32
|
+
shap_values = {}
|
|
33
|
+
for i,explainer in enumerate(explainers):
|
|
34
|
+
shap_value_i = explainer(data)
|
|
35
|
+
shap_values[i] = shap_value_i
|
|
36
|
+
return shap_values
|
|
37
|
+
|
|
38
|
+
def get_explainerclusters(model, data, targets):
|
|
39
|
+
clustermodels = list()
|
|
40
|
+
for i, _ in enumerate(model['model'].estimators_):
|
|
41
|
+
transf_data = model['pipe_transform'].transform(data)
|
|
42
|
+
Y = data[targets[i]]
|
|
43
|
+
cluster_model = shap.utils.hclust(transf_data, Y)
|
|
44
|
+
clustermodels.append(cluster_model)
|
|
45
|
+
return clustermodels
|
|
46
|
+
|
|
47
|
+
def mean_shap(data, explainers, pipe_transform, dict_shap_values):
|
|
48
|
+
t_data = pipe_transform.transform(data)
|
|
49
|
+
input_features = t_data.columns
|
|
50
|
+
shap_results = get_shapvalues(explainers,t_data)
|
|
51
|
+
arrays_ = list()
|
|
52
|
+
for k,_ in shap_results.items():
|
|
53
|
+
arrays_.append(shap_results.get(k).values)
|
|
54
|
+
shap_results_mean = np.mean(np.array(arrays_), axis = 0)
|
|
55
|
+
df_shap = pd.DataFrame(shap_results_mean, columns=input_features, index=data.index)
|
|
56
|
+
df_shap['Close'] = data['Close']
|
|
57
|
+
df_shap['Date'] = data['Date']
|
|
58
|
+
df_shap = df_shap[['Date','Close']+list(dict_shap_values.keys())]
|
|
59
|
+
df_shap = df_shap.rename(columns =dict_shap_values)
|
|
60
|
+
return df_shap
|
|
61
|
+
|
|
62
|
+
def edge_shap_lines(data, plot = False, look_back = 750):
|
|
63
|
+
### corect labels ####
|
|
64
|
+
shap_cols = [col for col in data.columns if col not in ['Date','Close']]
|
|
65
|
+
df = data.sort_values('Date').iloc[-look_back:]
|
|
66
|
+
fig = make_subplots(specs=[[{"secondary_y": True}]])
|
|
67
|
+
fig.add_trace(go.Scatter(x=df.Date, y=df.Close,mode='lines+markers',marker = dict(color = 'grey'),line = dict(color = 'grey'),name='Close price'))
|
|
68
|
+
for col in shap_cols:
|
|
69
|
+
fig.add_trace(go.Scatter(x=df.Date, y=df[col],mode='lines+markers',name=col),secondary_y=True)
|
|
70
|
+
fig.update_layout(title_text="sirius - feature power",width=1200,height = 500)
|
|
71
|
+
if plot:
|
|
72
|
+
fig.show()
|
|
73
|
+
return fig
|
|
74
|
+
|
|
75
|
+
def log_top_shap(runid, top_shap):
|
|
76
|
+
with mlflow.start_run(run_id=runid) as run:
|
|
77
|
+
mlflow.log_dict(top_shap,f"explainer/top_shap.json")
|
|
78
|
+
print('artifact was logged')
|
|
79
|
+
|
|
80
|
+
def load_top_shap(runid):
|
|
81
|
+
folder = f"explainer/top_shap.json"
|
|
82
|
+
top_shap = mlflow.artifacts.load_dict(f"runs:/{runid}/{folder}")
|
|
83
|
+
return top_shap
|
|
@@ -1574,8 +1574,9 @@ def produce_probas(model,data, target_variables):
|
|
|
1574
1574
|
"""
|
|
1575
1575
|
label_prediction = ['proba_'+x for x in target_variables]
|
|
1576
1576
|
predictions = model.predict_proba(data)
|
|
1577
|
+
if isinstance(predictions, list):
|
|
1578
|
+
predictions = np.array([ x[:,1].T for x in predictions]).T
|
|
1577
1579
|
predictions = pd.DataFrame(predictions, columns = label_prediction, index = data.index)
|
|
1578
|
-
|
|
1579
1580
|
result_df = pd.concat([data, predictions], axis=1)
|
|
1580
1581
|
result_df = result_df[['Date'] + target_variables + label_prediction]
|
|
1581
1582
|
|
|
@@ -1604,27 +1605,9 @@ def produce_signals(result_df, feature_name, threshold, label_prediction):
|
|
|
1604
1605
|
|
|
1605
1606
|
return result_df
|
|
1606
1607
|
|
|
1607
|
-
def
|
|
1608
|
-
|
|
1609
|
-
|
|
1610
|
-
|
|
1611
|
-
|
|
1612
|
-
|
|
1613
|
-
plot (boolean): if true, display plot
|
|
1614
|
-
threshold (float): edge threshold
|
|
1615
|
-
look_back (int): number of rows back to display
|
|
1616
|
-
|
|
1617
|
-
Returns:
|
|
1618
|
-
fig (obj): plotly go object
|
|
1619
|
-
"""
|
|
1620
|
-
df = data[['Date','Close','proba_target_down','proba_target_up']].iloc[-look_back:]
|
|
1621
|
-
|
|
1622
|
-
fig = make_subplots(specs=[[{"secondary_y": True}]])
|
|
1623
|
-
fig.add_trace(go.Scatter(x=df.Date, y=df.Close,mode='lines+markers',name='Close price'))
|
|
1624
|
-
fig.add_trace(go.Scatter(x=df.Date, y=df.proba_target_down,mode='lines',marker = dict(color = 'coral'),name='go down'),secondary_y=True)
|
|
1625
|
-
fig.add_trace(go.Scatter(x=df.Date, y=df.proba_target_up,mode='lines',marker = dict(opacity=0.1,size=80), name='go up'),secondary_y=True)
|
|
1626
|
-
fig.add_shape(type="line", xref="paper", yref="y2",x0=0.02, y0=threshold, x1=0.9, y1=threshold,line=dict(color="red",dash="dash"),)
|
|
1627
|
-
fig.update_layout(title_text="sirius - edge probabilities",width=1200,height = 500)
|
|
1628
|
-
if plot:
|
|
1629
|
-
fig.show()
|
|
1630
|
-
return fig
|
|
1608
|
+
def clean_cols(data, patterns):
|
|
1609
|
+
drop_cols = list()
|
|
1610
|
+
for pattern in patterns:
|
|
1611
|
+
drop_cols = drop_cols + [ x for x in data.columns if pattern in x]
|
|
1612
|
+
data = data.drop(columns = drop_cols)
|
|
1613
|
+
return data
|
|
@@ -10,9 +10,12 @@ virgo_app/virgo_modules.egg-info/top_level.txt
|
|
|
10
10
|
virgo_app/virgo_modules/src/__init__.py
|
|
11
11
|
virgo_app/virgo_modules/src/aws_utils.py
|
|
12
12
|
virgo_app/virgo_modules/src/backtester.py
|
|
13
|
-
virgo_app/virgo_modules/src/edge_utils.py
|
|
14
13
|
virgo_app/virgo_modules/src/hmm_utils.py
|
|
15
14
|
virgo_app/virgo_modules/src/pull_artifacts.py
|
|
16
15
|
virgo_app/virgo_modules/src/re_utils.py
|
|
17
16
|
virgo_app/virgo_modules/src/ticketer_source.py
|
|
18
|
-
virgo_app/virgo_modules/src/transformer_utils.py
|
|
17
|
+
virgo_app/virgo_modules/src/transformer_utils.py
|
|
18
|
+
virgo_app/virgo_modules/src/edge_utils/__init__.py
|
|
19
|
+
virgo_app/virgo_modules/src/edge_utils/conformal_utils.py
|
|
20
|
+
virgo_app/virgo_modules/src/edge_utils/edge_utils.py
|
|
21
|
+
virgo_app/virgo_modules/src/edge_utils/shap_utils.py
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{virgo_modules-0.2.10 → virgo_modules-0.3.1}/virgo_app/virgo_modules/src/transformer_utils.py
RENAMED
|
File without changes
|
{virgo_modules-0.2.10 → virgo_modules-0.3.1}/virgo_app/virgo_modules.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|