virgo-modules 0.2.10__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of virgo-modules might be problematic. Click here for more details.

File without changes
@@ -0,0 +1,71 @@
1
+ from plotly.subplots import make_subplots
2
+ import plotly.graph_objects as go
3
+ from mapie.classification import MapieClassifier
4
+ from sklearn.pipeline import Pipeline
5
+ import mlflow
6
+ import numpy as np
7
+
8
+
9
+ def get_conformal_classifiers(model, data, targets):
10
+ classfiers = list()
11
+ for i, _ in enumerate(model['model'].estimators_):
12
+ seg_model = Pipeline([
13
+ ('pipe',model['pipe_transform']),
14
+ ('model',model['model'].estimators_[i])
15
+ ])
16
+ mapie_class = MapieClassifier(seg_model, cv='prefit', random_state=123, method="lac")
17
+ mapie_class.fit(data, data[targets[i]].values)
18
+ classfiers.append(mapie_class)
19
+ return classfiers
20
+
21
+ def log_confmodels(runid, classifiers):
22
+ with mlflow.start_run(run_id=runid) as run:
23
+ for i,classifier in enumerate(classifiers):
24
+ mlflow.sklearn.log_model(classifier,f"conformal/conformal_model-{i}")
25
+ print('models were logged')
26
+
27
+ def load_confmodel(runid, target_variables):
28
+ classifiers = list()
29
+ for i in range(len(target_variables)):
30
+ folder = f"conformal/conformal_model-{i}"
31
+ model = mlflow.sklearn.load_model(f"runs:/{runid}/{folder}")
32
+ classifiers.append(model)
33
+ return classifiers
34
+
35
+
36
+ def get_conformal_prediction(classifier, alphas, data, prefix='conf'):
37
+ _, y_pis = classifier.predict(data, alpha=alphas)
38
+ for i,alpha in enumerate(alphas):
39
+ data[f'{prefix}-{alpha}'] = y_pis[:,1,i]
40
+ data[f'{prefix}-{alpha}'] = np.where(data[f'{prefix}-{alpha}'] == True,alpha,0)
41
+ return data
42
+
43
+ def edge_conformal_lines(data, alphas,threshold = 0.6, plot = False, look_back = 750, offset = 0.08):
44
+ ### corect labels ####
45
+ df = data.sort_values('Date').iloc[-look_back:]
46
+ fig = make_subplots(specs=[[{"secondary_y": True}]])
47
+ fig.add_trace(go.Scatter(x=df.Date, y=df.Close,mode='lines+markers',marker = dict(color = 'grey'),line = dict(color = 'grey'),name='Close price'))
48
+ fig.add_trace(go.Scatter(x=df.Date, y=df.proba_target_up,mode='lines',marker = dict(color = 'blue'),showlegend=True,legendgroup='go up', name='go up'),secondary_y=True)
49
+ fig.add_trace(go.Scatter(x=df.Date, y=df.proba_target_down,mode='lines',marker = dict(color = 'coral'),showlegend=True,legendgroup='go down',name='go down'),secondary_y=True)
50
+ for i,alpha in enumerate(alphas, start=1):
51
+ try:
52
+ col_alpha = [x for x in df.columns if str(alpha) in x and 'target_up' in x][0]
53
+ df_ = df[df[col_alpha] != 0]
54
+ fig.add_trace(go.Scatter(x=df_.Date, y=df_.proba_target_up + (offset*i),mode='markers',marker = dict(opacity=0.7,size=10, color = 'blue')
55
+ ,showlegend=False,legendgroup='go up',name='go up', text=df_[col_alpha],textposition="bottom center")
56
+ , secondary_y=True)
57
+ except:
58
+ pass
59
+ try:
60
+ col_alpha = [x for x in df.columns if str(alpha) in x and 'target_down' in x][0]
61
+ df_ = df[df[col_alpha] != 0]
62
+ fig.add_trace(go.Scatter(x=df_.Date, y=df_.proba_target_down + (offset*i),mode='markers',marker = dict(opacity=0.7,size=10, color = 'coral')
63
+ ,showlegend=False,legendgroup='go down', name='go down',text=df_[col_alpha].astype(str),textposition="bottom center")
64
+ , secondary_y=True)
65
+ except:
66
+ pass
67
+ fig.add_shape(type="line", xref="paper", yref="y2",x0=0.02, y0=threshold, x1=0.9, y1=threshold,line=dict(color="red",dash="dash"))
68
+ fig.update_layout(title_text="sirius - edge probabilities conformal",width=1200,height = 500)
69
+ if plot:
70
+ fig.show()
71
+ return fig
@@ -9,7 +9,10 @@ from feature_engine.imputation import MeanMedianImputer
9
9
  from feature_engine.discretisation import EqualWidthDiscretiser
10
10
  from feature_engine.datetime import DatetimeFeatures
11
11
 
12
- from .transformer_utils import VirgoWinsorizerFeature, InverseHyperbolicSine, FeaturesEntropy, FeatureSelector
12
+ from ..transformer_utils import VirgoWinsorizerFeature, InverseHyperbolicSine, FeaturesEntropy, FeatureSelector
13
+
14
+ from plotly.subplots import make_subplots
15
+ import plotly.graph_objects as go
13
16
 
14
17
  class produce_model_wrapper:
15
18
  """
@@ -386,4 +389,29 @@ class ExpandingMultipleTimeSeriesKFold:
386
389
  -------
387
390
  number_window (int): number of splits
388
391
  """
389
- return self.number_window
392
+ return self.number_window
393
+
394
+ def edge_probas_lines(data, threshold, plot = False, look_back = 750):
395
+ """
396
+ produce a plotly plot of edges and closing prices
397
+
398
+ Parameters:
399
+ data (pd.DataFrame): asset data with edge probabilities
400
+ plot (boolean): if true, display plot
401
+ threshold (float): edge threshold
402
+ look_back (int): number of rows back to display
403
+
404
+ Returns:
405
+ fig (obj): plotly go object
406
+ """
407
+ df = data[['Date','Close','proba_target_down','proba_target_up']].iloc[-look_back:]
408
+
409
+ fig = make_subplots(specs=[[{"secondary_y": True}]])
410
+ fig.add_trace(go.Scatter(x=df.Date, y=df.Close,mode='lines+markers',name='Close price'))
411
+ fig.add_trace(go.Scatter(x=df.Date, y=df.proba_target_down,mode='lines',marker = dict(color = 'coral'),name='go down'),secondary_y=True)
412
+ fig.add_trace(go.Scatter(x=df.Date, y=df.proba_target_up,mode='lines',marker = dict(opacity=0.1,size=80), name='go up'),secondary_y=True)
413
+ fig.add_shape(type="line", xref="paper", yref="y2",x0=0.02, y0=threshold, x1=0.9, y1=threshold,line=dict(color="red",dash="dash"),)
414
+ fig.update_layout(title_text="sirius - edge probabilities",width=1200,height = 500)
415
+ if plot:
416
+ fig.show()
417
+ return fig
@@ -0,0 +1,81 @@
1
+ import shap
2
+ import mlflow
3
+ import pandas as pd
4
+ import numpy as np
5
+ from plotly.subplots import make_subplots
6
+ import plotly.graph_objects as go
7
+
8
+ def get_explainers(model, data):
9
+ explainers = list()
10
+ for i, _ in enumerate(model['model'].estimators_):
11
+ transf_data = model['pipe_transform'].transform(data)
12
+ predictor = model['model'].estimators_[i]
13
+ explainer= shap.Explainer(predictor, transf_data)
14
+ explainers.append(explainer)
15
+ return explainers
16
+
17
+ def log_explainer(runid, classifiers):
18
+ with mlflow.start_run(run_id=runid) as run:
19
+ for i,classifier in enumerate(classifiers):
20
+ mlflow.sklearn.log_model(classifier,f"explainer/explainer-{i}")
21
+ print('models were logged')
22
+
23
+ def load_explainer(runid, target_variables):
24
+ explainers = list()
25
+ for i in range(len(target_variables)):
26
+ folder = f"explainer/explainer-{i}"
27
+ model = mlflow.sklearn.load_model(f"runs:/{runid}/{folder}")
28
+ explainers.append(model)
29
+ return explainers
30
+
31
+ def get_shapvalues(explainers, data):
32
+ shap_values = {}
33
+ for i,explainer in enumerate(explainers):
34
+ shap_value_i = explainer(data)
35
+ shap_values[i] = shap_value_i
36
+ return shap_values
37
+
38
+ def get_explainerclusters(model, data, targets):
39
+ clustermodels = list()
40
+ for i, _ in enumerate(model['model'].estimators_):
41
+ transf_data = model['pipe_transform'].transform(data)
42
+ Y = data[targets[i]]
43
+ cluster_model = shap.utils.hclust(transf_data, Y)
44
+ clustermodels.append(cluster_model)
45
+ return clustermodels
46
+
47
+ def mean_shap(data, explainers, pipe_transform):
48
+ t_data = pipe_transform.transform(data)
49
+ input_features = t_data.columns
50
+ shap_results = get_shapvalues(explainers,t_data)
51
+ arrays_ = list()
52
+ for k,_ in shap_results.items():
53
+ arrays_.append(shap_results.get(k).values)
54
+ shap_results_mean = np.mean(np.array(arrays_), axis = 0)
55
+ df_shap = pd.DataFrame(shap_results_mean, columns=input_features, index=data.index)
56
+ df_shap['Close'] = data['Close']
57
+ df_shap['Date'] = data['Date']
58
+ return df_shap
59
+
60
+ def edge_shap_lines(data, dict_shap_values, plot = False, look_back = 750):
61
+ ### corect labels ####
62
+ df = data.sort_values('Date').iloc[-look_back:]
63
+ fig = make_subplots(specs=[[{"secondary_y": True}]])
64
+ fig.add_trace(go.Scatter(x=df.Date, y=df.Close,mode='lines+markers',marker = dict(color = 'grey'),line = dict(color = 'grey'),name='Close price'))
65
+ top_features_shap = dict_shap_values.keys()
66
+ for col in top_features_shap:
67
+ fig.add_trace(go.Scatter(x=df.Date, y=df[col],mode='lines+markers',name=dict_shap_values.get(col).get('tag')),secondary_y=True)
68
+ fig.update_layout(title_text="sirius - feature power",width=1200,height = 500)
69
+ if plot:
70
+ fig.show()
71
+ return fig
72
+
73
+ def log_top_shap(runid, top_shap):
74
+ with mlflow.start_run(run_id=runid) as run:
75
+ mlflow.log_dict(top_shap,f"explainer/top_shap.json")
76
+ print('artifact was logged')
77
+
78
+ def load_top_shap(runid):
79
+ folder = f"explainer/top_shap.json"
80
+ top_shap = mlflow.artifacts.load_dict(f"runs:/{runid}/{folder}")
81
+ return top_shap
@@ -1574,8 +1574,9 @@ def produce_probas(model,data, target_variables):
1574
1574
  """
1575
1575
  label_prediction = ['proba_'+x for x in target_variables]
1576
1576
  predictions = model.predict_proba(data)
1577
+ if isinstance(predictions, list):
1578
+ predictions = np.array([ x[:,1].T for x in predictions]).T
1577
1579
  predictions = pd.DataFrame(predictions, columns = label_prediction, index = data.index)
1578
-
1579
1580
  result_df = pd.concat([data, predictions], axis=1)
1580
1581
  result_df = result_df[['Date'] + target_variables + label_prediction]
1581
1582
 
@@ -1604,27 +1605,9 @@ def produce_signals(result_df, feature_name, threshold, label_prediction):
1604
1605
 
1605
1606
  return result_df
1606
1607
 
1607
- def edge_probas_lines(data, threshold, plot = False, look_back = 750):
1608
- """
1609
- produce a plotly plot of edges and closing prices
1610
-
1611
- Parameters:
1612
- data (pd.DataFrame): asset data with edge probabilities
1613
- plot (boolean): if true, display plot
1614
- threshold (float): edge threshold
1615
- look_back (int): number of rows back to display
1616
-
1617
- Returns:
1618
- fig (obj): plotly go object
1619
- """
1620
- df = data[['Date','Close','proba_target_down','proba_target_up']].iloc[-look_back:]
1621
-
1622
- fig = make_subplots(specs=[[{"secondary_y": True}]])
1623
- fig.add_trace(go.Scatter(x=df.Date, y=df.Close,mode='lines+markers',name='Close price'))
1624
- fig.add_trace(go.Scatter(x=df.Date, y=df.proba_target_down,mode='lines',marker = dict(color = 'coral'),name='go down'),secondary_y=True)
1625
- fig.add_trace(go.Scatter(x=df.Date, y=df.proba_target_up,mode='lines',marker = dict(opacity=0.1,size=80), name='go up'),secondary_y=True)
1626
- fig.add_shape(type="line", xref="paper", yref="y2",x0=0.02, y0=threshold, x1=0.9, y1=threshold,line=dict(color="red",dash="dash"),)
1627
- fig.update_layout(title_text="sirius - edge probabilities",width=1200,height = 500)
1628
- if plot:
1629
- fig.show()
1630
- return fig
1608
+ def clean_cols(data, patterns):
1609
+ drop_cols = list()
1610
+ for pattern in patterns:
1611
+ drop_cols = drop_cols + [ x for x in data.columns if pattern in x]
1612
+ data = data.drop(columns = drop_cols)
1613
+ return data
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: virgo-modules
3
- Version: 0.2.10
3
+ Version: 0.3.0
4
4
  Summary: data processing and statistical modeling using stock market data
5
5
  Home-page: https://github.com/miguelmayhem92/virgo_module
6
6
  Author: Miguel Mayhuire
@@ -0,0 +1,18 @@
1
+ virgo_modules/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ virgo_modules/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ virgo_modules/src/aws_utils.py,sha256=q0l7D7ofo09Lu1QQjv-esheQ06uiSy1Pdq3xMul8zvk,2571
4
+ virgo_modules/src/backtester.py,sha256=OhiWyzDX0PthXGuhChyWUmDN3cLkzVYe95zS4nGtia8,22106
5
+ virgo_modules/src/hmm_utils.py,sha256=fFWxmh9q3rjiKRHnxNk9k7O4fDrxVxkmp3pbpLvktjc,21116
6
+ virgo_modules/src/pull_artifacts.py,sha256=5OPrgR7pcMSdpbevDRhf0ebk7g7ZRjff4NpTIIWAKjE,1989
7
+ virgo_modules/src/re_utils.py,sha256=n5LD2IKURNsBsWeQ_xw98lh1YwLdl3efNLNT7qwFGzM,73863
8
+ virgo_modules/src/ticketer_source.py,sha256=4vT8YweRtS28Zs66DBUG2rZ5EcFk9S0hiqNS_fz_I1c,100017
9
+ virgo_modules/src/transformer_utils.py,sha256=LLwKYZRq5hrPVimnq3taD0Lh-q3Bq21fy1I4Icbnxi8,7677
10
+ virgo_modules/src/edge_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
+ virgo_modules/src/edge_utils/conformal_utils.py,sha256=4gAOh2vxeWGR7iMF3TKnfQcLHlaH9M5B2lcmzo7V8AY,3755
12
+ virgo_modules/src/edge_utils/edge_utils.py,sha256=U3GVyLgz0gEaR7AGjWseFcWt-IOusZvXJjwpBFknxNs,15643
13
+ virgo_modules/src/edge_utils/shap_utils.py,sha256=877gfucW-iLbf-aStgIcAng1XxQi_AgJsVjvdAzzWc8,3233
14
+ virgo_modules-0.3.0.dist-info/LICENSE,sha256=pNgFyCYgmimaw0o6V20JupZLROycAnOA_HDDh1tX2V4,1097
15
+ virgo_modules-0.3.0.dist-info/METADATA,sha256=FuPBQsfBwgD6NtAeCYtNjkdTyEsVSaEk31zlhhBMv2M,883
16
+ virgo_modules-0.3.0.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
17
+ virgo_modules-0.3.0.dist-info/top_level.txt,sha256=ZjI-qEkDtT-8mFwGAWnXfqPOKEGlIhWRW1es1VyXc60,14
18
+ virgo_modules-0.3.0.dist-info/RECORD,,
@@ -1,15 +0,0 @@
1
- virgo_modules/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- virgo_modules/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- virgo_modules/src/aws_utils.py,sha256=q0l7D7ofo09Lu1QQjv-esheQ06uiSy1Pdq3xMul8zvk,2571
4
- virgo_modules/src/backtester.py,sha256=OhiWyzDX0PthXGuhChyWUmDN3cLkzVYe95zS4nGtia8,22106
5
- virgo_modules/src/edge_utils.py,sha256=XN2oEOwADXF9IGNUGx0Ai8B1yDAiU2WDateLnEJh5FE,14243
6
- virgo_modules/src/hmm_utils.py,sha256=fFWxmh9q3rjiKRHnxNk9k7O4fDrxVxkmp3pbpLvktjc,21116
7
- virgo_modules/src/pull_artifacts.py,sha256=5OPrgR7pcMSdpbevDRhf0ebk7g7ZRjff4NpTIIWAKjE,1989
8
- virgo_modules/src/re_utils.py,sha256=PoHFqYiccUrKZJ2wvDbisrK1Uo9ekX8laAeGc2XxLxY,74852
9
- virgo_modules/src/ticketer_source.py,sha256=4vT8YweRtS28Zs66DBUG2rZ5EcFk9S0hiqNS_fz_I1c,100017
10
- virgo_modules/src/transformer_utils.py,sha256=LLwKYZRq5hrPVimnq3taD0Lh-q3Bq21fy1I4Icbnxi8,7677
11
- virgo_modules-0.2.10.dist-info/LICENSE,sha256=pNgFyCYgmimaw0o6V20JupZLROycAnOA_HDDh1tX2V4,1097
12
- virgo_modules-0.2.10.dist-info/METADATA,sha256=-9U5ff9siZLh3T97ZAieJooEJKYfTVMpIM3nw7PiZeA,884
13
- virgo_modules-0.2.10.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
14
- virgo_modules-0.2.10.dist-info/top_level.txt,sha256=ZjI-qEkDtT-8mFwGAWnXfqPOKEGlIhWRW1es1VyXc60,14
15
- virgo_modules-0.2.10.dist-info/RECORD,,