virgo-modules 0.5.1__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of virgo-modules might be problematic. Click here for more details.
- virgo_modules/src/edge_utils/conformal_utils.py +67 -32
- virgo_modules/src/edge_utils/shap_utils.py +23 -52
- virgo_modules/src/market/__init__.py +0 -0
- virgo_modules/src/market/market_tools.py +189 -0
- virgo_modules/src/ticketer_source.py +62 -31
- {virgo_modules-0.5.1.dist-info → virgo_modules-0.7.0.dist-info}/METADATA +1 -1
- {virgo_modules-0.5.1.dist-info → virgo_modules-0.7.0.dist-info}/RECORD +10 -8
- {virgo_modules-0.5.1.dist-info → virgo_modules-0.7.0.dist-info}/LICENSE +0 -0
- {virgo_modules-0.5.1.dist-info → virgo_modules-0.7.0.dist-info}/WHEEL +0 -0
- {virgo_modules-0.5.1.dist-info → virgo_modules-0.7.0.dist-info}/top_level.txt +0 -0
|
@@ -1,44 +1,79 @@
|
|
|
1
1
|
from plotly.subplots import make_subplots
|
|
2
2
|
import plotly.graph_objects as go
|
|
3
|
-
from mapie.classification import MapieClassifier
|
|
4
3
|
from sklearn.pipeline import Pipeline
|
|
5
4
|
import mlflow
|
|
5
|
+
import pandas as pd
|
|
6
6
|
import numpy as np
|
|
7
|
+
from sklearn.base import BaseEstimator, ClassifierMixin
|
|
8
|
+
from mapie.classification import SplitConformalClassifier
|
|
7
9
|
|
|
10
|
+
class ConformalStack(mlflow.pyfunc.PythonModel):
|
|
11
|
+
def __init__(self, model,targets, alphas):
|
|
12
|
+
self.model = model
|
|
13
|
+
self.targets = targets
|
|
14
|
+
self.alphas = alphas
|
|
15
|
+
def fit(self, data):
|
|
16
|
+
self.classifiers = dict()
|
|
17
|
+
for i,target in enumerate(self.targets):
|
|
18
|
+
st = SingleStack(self.model["model"],i)
|
|
19
|
+
st.fit()
|
|
20
|
+
seg_model = Pipeline([
|
|
21
|
+
('pipe',self.model['pipe_transform']),
|
|
22
|
+
('modelbase',st)
|
|
23
|
+
])
|
|
24
|
+
mapie_class = SplitConformalClassifier(seg_model, prefit=True, random_state=123, conformity_score="lac", confidence_level=1-np.array(self.alphas))
|
|
25
|
+
mapie_class.conformalize(data, data[self.targets[i]].values)
|
|
26
|
+
self.classifiers[target] = mapie_class
|
|
27
|
+
def predict_conformal(self, data, ):
|
|
28
|
+
for target in self.targets:
|
|
29
|
+
prefix = target+"_conf"
|
|
30
|
+
_, y_pis = self.classifiers[target].predict_set(data)
|
|
31
|
+
for i,alpha in enumerate(self.alphas):
|
|
32
|
+
data[f'{prefix}-{alpha}'] = y_pis[:,1,i]
|
|
33
|
+
data[f'{prefix}-{alpha}'] = np.where(data[f'{prefix}-{alpha}'] == True,alpha,0)
|
|
34
|
+
return data
|
|
35
|
+
|
|
8
36
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
mapie_class.fit(data, data[targets[i]].values)
|
|
18
|
-
classfiers.append(mapie_class)
|
|
19
|
-
return classfiers
|
|
37
|
+
class SingleStack(ClassifierMixin, BaseEstimator):
|
|
38
|
+
def __init__(self, model, estimator_index):
|
|
39
|
+
self.model = model
|
|
40
|
+
self.estimator_index = estimator_index
|
|
41
|
+
|
|
42
|
+
def fit(self):
|
|
43
|
+
self._is_fitted = True
|
|
44
|
+
self.classes_ = [0,1]
|
|
20
45
|
|
|
21
|
-
def
|
|
22
|
-
|
|
23
|
-
for i,
|
|
24
|
-
|
|
25
|
-
|
|
46
|
+
def predict_proba(self, X):
|
|
47
|
+
metas_pred = dict()
|
|
48
|
+
for i,cont in enumerate(self.model.estimators, start=1):
|
|
49
|
+
_,estimator = cont
|
|
50
|
+
meta_pred = estimator.predict_proba(X)
|
|
51
|
+
metas_pred[f"meta{i}0"] = meta_pred[0][:,1]
|
|
52
|
+
metas_pred[f"meta{i}1"] = meta_pred[1][:,1]
|
|
53
|
+
self.meta_preds_df__ = pd.DataFrame(metas_pred)
|
|
26
54
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
55
|
+
prediction_vector = list()
|
|
56
|
+
for i,cont in enumerate(self.model.meta_estimators, start=0):
|
|
57
|
+
_,estimator = cont
|
|
58
|
+
metacols = [f"meta{j}{i}" for j in range(1,len(self.model.estimators)+1)]
|
|
59
|
+
preds = estimator.predict_proba(self.meta_preds_df__[metacols].values)
|
|
60
|
+
prediction_vector.append(preds)
|
|
61
|
+
return prediction_vector[self.estimator_index]
|
|
62
|
+
|
|
63
|
+
def predict(self, X):
|
|
64
|
+
prediction_vector = list()
|
|
65
|
+
_ = self.predict_proba(X)
|
|
66
|
+
for i,cont in enumerate(self.model.meta_estimators, start=0):
|
|
67
|
+
_,estimator = cont
|
|
68
|
+
metacols = [f"meta{j}{i}" for j in range(1,len(self.model.estimators)+1)]
|
|
69
|
+
preds = estimator.predict(self.meta_preds_df__[metacols].values)
|
|
70
|
+
prediction_vector.append(preds)
|
|
71
|
+
|
|
72
|
+
p = np.array(tuple(prediction_vector))
|
|
73
|
+
return p.reshape((p.shape[1],p.shape[0]))[:,self.estimator_index]
|
|
74
|
+
|
|
75
|
+
def __sklearn_is_fitted__(self):
|
|
76
|
+
return hasattr(self, "_is_fitted") and self._is_fitted
|
|
42
77
|
|
|
43
78
|
def edge_conformal_lines(data, alphas,threshold = 0.6, plot = False, look_back = 750, offset = 0.08):
|
|
44
79
|
### corect labels ####
|
|
@@ -5,49 +5,30 @@ import numpy as np
|
|
|
5
5
|
from plotly.subplots import make_subplots
|
|
6
6
|
import plotly.graph_objects as go
|
|
7
7
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
explainers.append(model)
|
|
29
|
-
return explainers
|
|
30
|
-
|
|
31
|
-
def get_shapvalues(explainers, data):
|
|
32
|
-
shap_values = {}
|
|
33
|
-
for i,explainer in enumerate(explainers):
|
|
34
|
-
shap_value_i = explainer(data)
|
|
35
|
-
shap_values[i] = shap_value_i
|
|
36
|
-
return shap_values
|
|
37
|
-
|
|
38
|
-
def get_explainerclusters(model, data, targets):
|
|
39
|
-
clustermodels = list()
|
|
40
|
-
for i, _ in enumerate(model['model'].estimators_):
|
|
41
|
-
transf_data = model['pipe_transform'].transform(data)
|
|
42
|
-
Y = data[targets[i]]
|
|
43
|
-
cluster_model = shap.utils.hclust(transf_data, Y)
|
|
44
|
-
clustermodels.append(cluster_model)
|
|
45
|
-
return clustermodels
|
|
46
|
-
|
|
47
|
-
def mean_shap(data, explainers, pipe_transform, dict_shap_values):
|
|
8
|
+
class StackInterpretor(mlflow.pyfunc.PythonModel):
|
|
9
|
+
def __init__(self, model, targets):
|
|
10
|
+
self.base_estimators = model.estimators_
|
|
11
|
+
self.targets = targets
|
|
12
|
+
def fit_interpretor(self, data):
|
|
13
|
+
interpretors = {}
|
|
14
|
+
for label, predictor in zip(self.targets,self.base_estimators):
|
|
15
|
+
explainer = shap.Explainer(predictor, data)
|
|
16
|
+
interpretors[label] = explainer
|
|
17
|
+
self.interpretors = interpretors
|
|
18
|
+
def get_shap_values(self, data):
|
|
19
|
+
shap_values = dict()
|
|
20
|
+
for label, interpretor in self.interpretors.items():
|
|
21
|
+
shap_value = interpretor(data)
|
|
22
|
+
shap_values[label] = shap_value
|
|
23
|
+
return shap_values
|
|
24
|
+
def register_map(self, mapping):
|
|
25
|
+
self.mapping = mapping
|
|
26
|
+
|
|
27
|
+
def mean_shap(data, explainers, pipe_transform):
|
|
48
28
|
t_data = pipe_transform.transform(data)
|
|
49
29
|
input_features = t_data.columns
|
|
50
|
-
shap_results =
|
|
30
|
+
shap_results = explainers.get_shap_values(t_data)
|
|
31
|
+
dict_shap_values = explainers.mapping
|
|
51
32
|
arrays_ = list()
|
|
52
33
|
for k,_ in shap_results.items():
|
|
53
34
|
arrays_.append(shap_results.get(k).values)
|
|
@@ -70,14 +51,4 @@ def edge_shap_lines(data, plot = False, look_back = 750):
|
|
|
70
51
|
fig.update_layout(title_text="sirius - feature power",width=1200,height = 500)
|
|
71
52
|
if plot:
|
|
72
53
|
fig.show()
|
|
73
|
-
return fig
|
|
74
|
-
|
|
75
|
-
def log_top_shap(runid, top_shap):
|
|
76
|
-
with mlflow.start_run(run_id=runid) as run:
|
|
77
|
-
mlflow.log_dict(top_shap,f"explainer/top_shap.json")
|
|
78
|
-
print('artifact was logged')
|
|
79
|
-
|
|
80
|
-
def load_top_shap(runid):
|
|
81
|
-
folder = f"explainer/top_shap.json"
|
|
82
|
-
top_shap = mlflow.artifacts.load_dict(f"runs:/{runid}/{folder}")
|
|
83
|
-
return top_shap
|
|
54
|
+
return fig
|
|
File without changes
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
import gc
|
|
2
|
+
|
|
3
|
+
import pandas as pd
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
from sklearn.linear_model import HuberRegressor
|
|
7
|
+
from scipy import stats
|
|
8
|
+
|
|
9
|
+
import matplotlib.pyplot as plt
|
|
10
|
+
import seaborn as sns; sns.set()
|
|
11
|
+
|
|
12
|
+
from matplotlib import cm
|
|
13
|
+
import matplotlib.colors as mcolors
|
|
14
|
+
|
|
15
|
+
class MarketAnalysis:
|
|
16
|
+
"""
|
|
17
|
+
Class that perform market analysis using robust linear regression
|
|
18
|
+
|
|
19
|
+
Attributes
|
|
20
|
+
----------
|
|
21
|
+
data : pd.DataFrame
|
|
22
|
+
input data
|
|
23
|
+
market_features : list
|
|
24
|
+
list of market feature (log returns) to apply analysis
|
|
25
|
+
return_cols: str
|
|
26
|
+
main log return feature
|
|
27
|
+
col_map: dict
|
|
28
|
+
dictionary containing rename of market features
|
|
29
|
+
|
|
30
|
+
Methods
|
|
31
|
+
-------
|
|
32
|
+
compute_beta(data=pd.DataFrame, feature_x=str, feature_y=str):
|
|
33
|
+
compute betas given x and y using robust linear regression
|
|
34
|
+
get_correlation(data=pd.DataFrame, feature_x=str, feature_y=str):
|
|
35
|
+
compute correlation given x and y
|
|
36
|
+
produce_beta_report(data=pd.DataFrame):
|
|
37
|
+
produce beta report
|
|
38
|
+
compute_general_report(sample_size=int, offset=int, index=str, subsample_ts=int, show_plot=bool):
|
|
39
|
+
compute full report, global and latest window
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
def __init__(self, data, market_features, return_col, col_map=None):
|
|
43
|
+
self.data = data.dropna()
|
|
44
|
+
self.market_features = market_features
|
|
45
|
+
self.return_cols = return_col
|
|
46
|
+
self.col_map=col_map
|
|
47
|
+
|
|
48
|
+
def compute_beta(self, data, feature_x, feature_y):
|
|
49
|
+
"""
|
|
50
|
+
compute betas given x and y using robust linear regression
|
|
51
|
+
|
|
52
|
+
Parameters
|
|
53
|
+
----------
|
|
54
|
+
data (pd.DataFrame): input data containing analysis features
|
|
55
|
+
feature_x (str): name of the feature x
|
|
56
|
+
feature_y (str): name of the feature y
|
|
57
|
+
|
|
58
|
+
Returns
|
|
59
|
+
-------
|
|
60
|
+
(beta(str), alpha(str))
|
|
61
|
+
"""
|
|
62
|
+
x = data[feature_x].values.reshape(-1,1)
|
|
63
|
+
y = data[feature_y].values.reshape(-1,1)
|
|
64
|
+
huber_regr = HuberRegressor(fit_intercept = True)
|
|
65
|
+
huber_regr.fit(x, y)
|
|
66
|
+
beta, alpha = huber_regr.coef_[0], huber_regr.intercept_
|
|
67
|
+
return beta, alpha
|
|
68
|
+
|
|
69
|
+
def get_correlation(self, data, feature_x, feature_y):
|
|
70
|
+
"""
|
|
71
|
+
compute correlation given x and y
|
|
72
|
+
|
|
73
|
+
Parameters
|
|
74
|
+
----------
|
|
75
|
+
data (pd.DataFrame): input data containing analysis features
|
|
76
|
+
feature_x (str): name of the feature x
|
|
77
|
+
feature_y (str): name of the feature y
|
|
78
|
+
|
|
79
|
+
Returns
|
|
80
|
+
-------
|
|
81
|
+
r (float)
|
|
82
|
+
"""
|
|
83
|
+
x = data[feature_x]
|
|
84
|
+
y = data[feature_y]
|
|
85
|
+
r = stats.mstats.pearsonr(x, y)[0]
|
|
86
|
+
return r
|
|
87
|
+
|
|
88
|
+
def produce_beta_report(self, data):
|
|
89
|
+
"""
|
|
90
|
+
produce beta report
|
|
91
|
+
|
|
92
|
+
Parameters
|
|
93
|
+
----------
|
|
94
|
+
data (pd.DataFrame): input data containing analysis features
|
|
95
|
+
|
|
96
|
+
Returns
|
|
97
|
+
-------
|
|
98
|
+
report (pd.DataFrame)
|
|
99
|
+
"""
|
|
100
|
+
result = {
|
|
101
|
+
"market_index": list(),
|
|
102
|
+
"beta": list(),
|
|
103
|
+
"alpha": list(),
|
|
104
|
+
"r": list()
|
|
105
|
+
}
|
|
106
|
+
for index in self.market_features:
|
|
107
|
+
beta, alpha = self.compute_beta( data, self.return_cols, index)
|
|
108
|
+
r = self.get_correlation( data, self.return_cols, index)
|
|
109
|
+
result["market_index"].append(index)
|
|
110
|
+
result["beta"].append(beta)
|
|
111
|
+
result["alpha"].append(alpha)
|
|
112
|
+
result["r"].append(r)
|
|
113
|
+
pd_result = pd.DataFrame(result)
|
|
114
|
+
pd_result = pd_result.sort_values("r", ascending=False)
|
|
115
|
+
if self.col_map:
|
|
116
|
+
pd_result["map_market_index"] = pd_result.market_index.map(self.col_map)
|
|
117
|
+
return pd_result
|
|
118
|
+
|
|
119
|
+
def compute_general_report(self, sample_size, offset, index=False, subsample_ts=False, show_plot=True):
|
|
120
|
+
"""
|
|
121
|
+
compute full report, global and latest window
|
|
122
|
+
|
|
123
|
+
Parameters
|
|
124
|
+
----------
|
|
125
|
+
sample_size (int): sample size for every beta computation
|
|
126
|
+
offset (int): offset or overlap between samples
|
|
127
|
+
index (str): if provided, bet fit index is taken
|
|
128
|
+
subsample_ts (int): subsample for iterative beta calculation
|
|
129
|
+
show_plot (bool): whether to show plot
|
|
130
|
+
|
|
131
|
+
Returns
|
|
132
|
+
-------
|
|
133
|
+
(report (pd.DataFrame), latest_report (pd.DataFrame), figure (mtpl.plt))
|
|
134
|
+
"""
|
|
135
|
+
general_report = self.produce_beta_report(self.data)
|
|
136
|
+
current_report = self.produce_beta_report(self.data.iloc[sample_size:,:])
|
|
137
|
+
if not index:
|
|
138
|
+
index = general_report.head(1).market_index.values[0]
|
|
139
|
+
b = general_report[general_report.market_index == index].beta.values
|
|
140
|
+
a = general_report[general_report.market_index == index].alpha.values
|
|
141
|
+
|
|
142
|
+
figure, ax = plt.subplot_mosaic(
|
|
143
|
+
[["scatter_total", "scatter_sample",'ts','ts']],
|
|
144
|
+
layout="constrained",
|
|
145
|
+
figsize=(18, 5)
|
|
146
|
+
)
|
|
147
|
+
x = self.data[self.return_cols]
|
|
148
|
+
y = self.data[index]
|
|
149
|
+
ax['scatter_total'].scatter(x, y)
|
|
150
|
+
ax['scatter_total'].plot(x, b*x+a, color='red')
|
|
151
|
+
|
|
152
|
+
if subsample_ts:
|
|
153
|
+
merger_df = self.data.iloc[-subsample_ts:,:].copy()
|
|
154
|
+
else:
|
|
155
|
+
merger_df = self.data.copy()
|
|
156
|
+
ax['ts'].plot(merger_df.Date, merger_df.Close, color = 'grey', alpha = 0.3)
|
|
157
|
+
b_array = list()
|
|
158
|
+
for i in range(0,len(merger_df)-sample_size,offset):
|
|
159
|
+
merger_ = merger_df.sort_values('Date', ascending = False).iloc[i:i+sample_size,:]
|
|
160
|
+
b, a = self.compute_beta(merger_, self.return_cols, index)
|
|
161
|
+
x = merger_[self.return_cols]
|
|
162
|
+
y = merger_[index]
|
|
163
|
+
normalize_ = mcolors.Normalize(vmin=-2.0, vmax=2.0)
|
|
164
|
+
colormap_ = cm.jet
|
|
165
|
+
ax['scatter_sample'].plot(x, y,'o', color = 'blue', alpha = 0.1)
|
|
166
|
+
ax['scatter_sample'].plot(x, b*x+a, color=colormap_(normalize_(b)))
|
|
167
|
+
ax['scatter_sample'].set_xlim(-0.08, 0.08)
|
|
168
|
+
ax['scatter_sample'].set_ylim(-0.08, 0.08)
|
|
169
|
+
plot = ax['ts'].scatter(merger_.Date, merger_.Close, color=colormap_(normalize_(b)), s = 10)
|
|
170
|
+
b_array.append(b)
|
|
171
|
+
normalize_ = mcolors.Normalize(vmin=np.min(b_array), vmax=np.max(b_array))
|
|
172
|
+
colormap_ = cm.jet
|
|
173
|
+
x_global = self.data[self.return_cols]
|
|
174
|
+
scalarmappaple = cm.ScalarMappable(norm=normalize_, cmap=colormap_)
|
|
175
|
+
scalarmappaple.set_array(x_global)
|
|
176
|
+
if self.col_map:
|
|
177
|
+
map_index = self.col_map.get(index)
|
|
178
|
+
title = f'market analysis of {map_index}'
|
|
179
|
+
else:
|
|
180
|
+
title = f'market analysis'
|
|
181
|
+
plt.title(title)
|
|
182
|
+
plt.colorbar(scalarmappaple)
|
|
183
|
+
del merger_df
|
|
184
|
+
gc.collect()
|
|
185
|
+
if show_plot:
|
|
186
|
+
plt.show()
|
|
187
|
+
else:
|
|
188
|
+
plt.close()
|
|
189
|
+
return general_report, current_report, figure
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import yfinance as yf
|
|
2
2
|
import pandas as pd
|
|
3
3
|
import numpy as np
|
|
4
|
-
import
|
|
4
|
+
import gc
|
|
5
5
|
|
|
6
6
|
import matplotlib.pyplot as plt
|
|
7
7
|
import matplotlib.gridspec as gridspec
|
|
@@ -138,6 +138,10 @@ class stock_eda_panel(object):
|
|
|
138
138
|
perform analysis of lags of the mean rolling log return
|
|
139
139
|
compute_clip_bands(feature_name=str,threshold=float):
|
|
140
140
|
compute outlier detection for a given signal, Note that this follows mean reversion procedure and feature has to be stationary. Also botton and roof resulting signals is attached to the dataframe
|
|
141
|
+
extract_sec_data(symbol=str, base_columns=list(str), rename_columns=dict):
|
|
142
|
+
extract new asset data and merge it to the main asset data
|
|
143
|
+
lag_log_return(lags=int, feature=str, feature_name=str):
|
|
144
|
+
compute log return given some lags
|
|
141
145
|
signal_plotter(feature_name=str):
|
|
142
146
|
display analysis plot of a feature with high and low signals
|
|
143
147
|
log_features_standard(feature_name=str):
|
|
@@ -667,6 +671,63 @@ class stock_eda_panel(object):
|
|
|
667
671
|
self.df[f'signal_low_{feature_name}'] = np.where( (self.df[f'norm_{feature_name}'] < self.df[f'lower_{feature_name}'] ), 1, 0)
|
|
668
672
|
self.df[f'signal_up_{feature_name}'] = np.where( (self.df[f'norm_{feature_name}'] > self.df[f'upper_{feature_name}'] ), 1, 0)
|
|
669
673
|
|
|
674
|
+
def extract_sec_data(self, symbol, base_columns, rename_columns=None):
|
|
675
|
+
"""
|
|
676
|
+
extract new asset data and merge it to the main asset data
|
|
677
|
+
|
|
678
|
+
Parameters
|
|
679
|
+
----------
|
|
680
|
+
symbol (str): symbol to extract data
|
|
681
|
+
base_columns (list): list of columns to persist
|
|
682
|
+
rename_columns (dict): map of the new column names using pd.DataFrame.rename()
|
|
683
|
+
|
|
684
|
+
Returns
|
|
685
|
+
-------
|
|
686
|
+
None
|
|
687
|
+
"""
|
|
688
|
+
begin_date = self.today - relativedelta(days = self.n_days)
|
|
689
|
+
begin_date_str = begin_date.strftime('%Y-%m-%d')
|
|
690
|
+
|
|
691
|
+
stock = yf.Ticker(symbol)
|
|
692
|
+
df = stock.history(period=self.data_window)
|
|
693
|
+
df = df.sort_values('Date')
|
|
694
|
+
df.reset_index(inplace=True)
|
|
695
|
+
df['Date'] = pd.to_datetime(df['Date'], format='mixed',utc=True).dt.date
|
|
696
|
+
df['Date'] = pd.to_datetime(df['Date'])
|
|
697
|
+
df = df[df.Date >= begin_date_str ]
|
|
698
|
+
df = df[base_columns]
|
|
699
|
+
if rename_columns:
|
|
700
|
+
df = df.rename(columns=rename_columns)
|
|
701
|
+
right_df = df.copy()
|
|
702
|
+
|
|
703
|
+
dates_vector = self.df.Date.to_frame()
|
|
704
|
+
right_df = dates_vector.merge(right_df, on ='Date',how = 'left')
|
|
705
|
+
right_df = right_df.fillna(method = 'bfill')
|
|
706
|
+
right_df = right_df.fillna(method = 'ffill')
|
|
707
|
+
|
|
708
|
+
self.df = self.df.merge(right_df, on ='Date',how = 'left')
|
|
709
|
+
self.df = self.df.sort_values("Date")
|
|
710
|
+
del right_df
|
|
711
|
+
gc.collect()
|
|
712
|
+
|
|
713
|
+
def lag_log_return(self, lags, feature, feature_name=False):
|
|
714
|
+
"""
|
|
715
|
+
compute log return given some lags
|
|
716
|
+
|
|
717
|
+
Parameters
|
|
718
|
+
----------
|
|
719
|
+
lags (int): lag to apply log return
|
|
720
|
+
feature (str): feature to apply log return
|
|
721
|
+
feature_name (str): rename resuling name
|
|
722
|
+
|
|
723
|
+
Returns
|
|
724
|
+
-------
|
|
725
|
+
None
|
|
726
|
+
"""
|
|
727
|
+
|
|
728
|
+
feature_name = feature_name if feature_name else f"{feature}_log_return"
|
|
729
|
+
self.df[feature_name] = np.log(self.df[feature]/self.df[feature].shift(lags))
|
|
730
|
+
|
|
670
731
|
def signal_plotter(self, feature_name):
|
|
671
732
|
|
|
672
733
|
"""
|
|
@@ -2304,33 +2365,3 @@ class analyse_index(stock_eda_panel):
|
|
|
2304
2365
|
|
|
2305
2366
|
self.states_result = result
|
|
2306
2367
|
|
|
2307
|
-
def get_relevant_beta(data_market, ticket_name, show_plot = True, save_path = False, save_aws = False, aws_credentials = False):
|
|
2308
|
-
'''
|
|
2309
|
-
select relevant beta result data of a given asset
|
|
2310
|
-
|
|
2311
|
-
Parameters:
|
|
2312
|
-
data_market (pd.DataFrame): dataframe of the market results
|
|
2313
|
-
ticket_name (str): name of the asset
|
|
2314
|
-
show_plot (bool): If tru, plot results
|
|
2315
|
-
save_path (str): local path for saving e.g r'C:/path/to/the/file/'
|
|
2316
|
-
save_aws (str): remote key in s3 bucket path e.g. 'path/to/file/'
|
|
2317
|
-
aws_credentials (dict): dict of the aws credentials
|
|
2318
|
-
|
|
2319
|
-
Returns:
|
|
2320
|
-
selection (pd.DataFrame): dataframe of the most relevant beta
|
|
2321
|
-
'''
|
|
2322
|
-
all_betas = data_market[data_market.asset == ticket_name].sort_values('general_r', ascending = False)
|
|
2323
|
-
all_betas['gen_r2'] = all_betas.general_r ** 2
|
|
2324
|
-
all_betas['sampl_r2'] = all_betas.sample_r ** 2
|
|
2325
|
-
selection = all_betas.sort_values('gen_r2',ascending =False).head(2).sort_values('sampl_r2',ascending =False).head(1).drop(columns = ['gen_r2','sampl_r2'])
|
|
2326
|
-
|
|
2327
|
-
if show_plot:
|
|
2328
|
-
print(selection)
|
|
2329
|
-
if save_path:
|
|
2330
|
-
result_plot_name = f'market_best_fit.csv'
|
|
2331
|
-
selection.to_csv(save_path+result_plot_name)
|
|
2332
|
-
|
|
2333
|
-
if save_path and save_aws:
|
|
2334
|
-
# upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = f'market_plots/{ticket_name}/'+result_plot_name,input_path = save_path+result_plot_name)
|
|
2335
|
-
upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = save_aws + result_plot_name, input_path = save_path + result_plot_name, aws_credentials = aws_credentials)
|
|
2336
|
-
return selection
|
|
@@ -5,15 +5,17 @@ virgo_modules/src/backtester.py,sha256=OhiWyzDX0PthXGuhChyWUmDN3cLkzVYe95zS4nGti
|
|
|
5
5
|
virgo_modules/src/hmm_utils.py,sha256=D7axAnCdSe1_1EgRyli2PAnM2f6699hTY9GcxjPXG-o,21221
|
|
6
6
|
virgo_modules/src/pull_artifacts.py,sha256=5OPrgR7pcMSdpbevDRhf0ebk7g7ZRjff4NpTIIWAKjE,1989
|
|
7
7
|
virgo_modules/src/re_utils.py,sha256=DBY_VBB1wKm5D7znutpF_66CTLZhJfx54h8Ws0YzdN4,74641
|
|
8
|
-
virgo_modules/src/ticketer_source.py,sha256=
|
|
8
|
+
virgo_modules/src/ticketer_source.py,sha256=aJZNB_YK0JrSrUBUBkAfolIMxcTozNFrZeoNxkhpqK8,102547
|
|
9
9
|
virgo_modules/src/transformer_utils.py,sha256=ysCUp3cB3_7Jr9OHDqhg2_6Vu0k1YVjfqbvQNbxpbhI,8990
|
|
10
10
|
virgo_modules/src/edge_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
|
-
virgo_modules/src/edge_utils/conformal_utils.py,sha256=
|
|
11
|
+
virgo_modules/src/edge_utils/conformal_utils.py,sha256=cKm4KSM261Eu1FJn4oowKYiKIesW81VbqITIvopGSVk,5410
|
|
12
12
|
virgo_modules/src/edge_utils/edge_utils.py,sha256=7nYPLDNyKqeKIuOOwQi4wsBibzs9gP1HgYMISXJX1Y8,19522
|
|
13
|
-
virgo_modules/src/edge_utils/shap_utils.py,sha256=
|
|
13
|
+
virgo_modules/src/edge_utils/shap_utils.py,sha256=FgcHkfddvdFSeUqEubYa2ExRGVAWSthqK4b-eKagEmo,2333
|
|
14
14
|
virgo_modules/src/edge_utils/stack_model.py,sha256=QqE91uLo2KauGEj91AVNANB1xE7J4Fa49YOX7k5mFng,4257
|
|
15
|
-
virgo_modules
|
|
16
|
-
virgo_modules
|
|
17
|
-
virgo_modules-0.
|
|
18
|
-
virgo_modules-0.
|
|
19
|
-
virgo_modules-0.
|
|
15
|
+
virgo_modules/src/market/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
16
|
+
virgo_modules/src/market/market_tools.py,sha256=vBt66_7E3ANz7avzfeNw_RHMGvG9lh5PRhxmcf_Oyjc,6880
|
|
17
|
+
virgo_modules-0.7.0.dist-info/LICENSE,sha256=pNgFyCYgmimaw0o6V20JupZLROycAnOA_HDDh1tX2V4,1097
|
|
18
|
+
virgo_modules-0.7.0.dist-info/METADATA,sha256=tart49AI1D8oLYtFI7mxY43ReNUxWpsX34PuByszh3Q,876
|
|
19
|
+
virgo_modules-0.7.0.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
|
|
20
|
+
virgo_modules-0.7.0.dist-info/top_level.txt,sha256=ZjI-qEkDtT-8mFwGAWnXfqPOKEGlIhWRW1es1VyXc60,14
|
|
21
|
+
virgo_modules-0.7.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|