virgo-modules 0.0.72__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- virgo_modules/__init__.py +1 -0
- virgo_modules/src/aws_utils.py +35 -3
- virgo_modules/src/backtester.py +474 -0
- virgo_modules/src/edge_utils/__init__.py +0 -0
- virgo_modules/src/edge_utils/conformal_utils.py +106 -0
- virgo_modules/src/edge_utils/edge_utils.py +502 -0
- virgo_modules/src/edge_utils/feature_selection.py +66 -0
- virgo_modules/src/edge_utils/shap_utils.py +54 -0
- virgo_modules/src/edge_utils/stack_model.py +94 -0
- virgo_modules/src/hmm_utils.py +494 -0
- virgo_modules/src/market/__init__.py +0 -0
- virgo_modules/src/market/market_tools.py +189 -0
- virgo_modules/src/markowitz/__init__.py +0 -0
- virgo_modules/src/markowitz/markowitz_utils.py +44 -0
- virgo_modules/src/re_utils.py +628 -85
- virgo_modules/src/ticketer_source.py +1351 -1066
- virgo_modules/src/transformer_utils.py +401 -0
- {virgo_modules-0.0.72.dist-info → virgo_modules-0.9.0.dist-info}/METADATA +16 -22
- virgo_modules-0.9.0.dist-info/RECORD +24 -0
- {virgo_modules-0.0.72.dist-info → virgo_modules-0.9.0.dist-info}/WHEEL +1 -1
- virgo_modules/src/edge_utils.py +0 -178
- virgo_modules-0.0.72.dist-info/RECORD +0 -12
- {virgo_modules-0.0.72.dist-info → virgo_modules-0.9.0.dist-info/licenses}/LICENSE +0 -0
- {virgo_modules-0.0.72.dist-info → virgo_modules-0.9.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
import gc
|
|
2
|
+
|
|
3
|
+
import pandas as pd
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
from sklearn.linear_model import HuberRegressor
|
|
7
|
+
from scipy import stats
|
|
8
|
+
|
|
9
|
+
import matplotlib.pyplot as plt
|
|
10
|
+
import seaborn as sns; sns.set()
|
|
11
|
+
|
|
12
|
+
from matplotlib import cm
|
|
13
|
+
import matplotlib.colors as mcolors
|
|
14
|
+
|
|
15
|
+
class MarketAnalysis:
|
|
16
|
+
"""
|
|
17
|
+
Class that perform market analysis using robust linear regression
|
|
18
|
+
|
|
19
|
+
Attributes
|
|
20
|
+
----------
|
|
21
|
+
data : pd.DataFrame
|
|
22
|
+
input data
|
|
23
|
+
market_features : list
|
|
24
|
+
list of market feature (log returns) to apply analysis
|
|
25
|
+
return_cols: str
|
|
26
|
+
main log return feature
|
|
27
|
+
col_map: dict
|
|
28
|
+
dictionary containing rename of market features
|
|
29
|
+
|
|
30
|
+
Methods
|
|
31
|
+
-------
|
|
32
|
+
compute_beta(data=pd.DataFrame, feature_x=str, feature_y=str):
|
|
33
|
+
compute betas given x and y using robust linear regression
|
|
34
|
+
get_correlation(data=pd.DataFrame, feature_x=str, feature_y=str):
|
|
35
|
+
compute correlation given x and y
|
|
36
|
+
produce_beta_report(data=pd.DataFrame):
|
|
37
|
+
produce beta report
|
|
38
|
+
compute_general_report(sample_size=int, offset=int, index=str, subsample_ts=int, show_plot=bool):
|
|
39
|
+
compute full report, global and latest window
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
def __init__(self, data, market_features, return_col, col_map=None):
|
|
43
|
+
self.data = data.dropna()
|
|
44
|
+
self.market_features = market_features
|
|
45
|
+
self.return_cols = return_col
|
|
46
|
+
self.col_map=col_map
|
|
47
|
+
|
|
48
|
+
def compute_beta(self, data, feature_x, feature_y):
|
|
49
|
+
"""
|
|
50
|
+
compute betas given x and y using robust linear regression
|
|
51
|
+
|
|
52
|
+
Parameters
|
|
53
|
+
----------
|
|
54
|
+
data (pd.DataFrame): input data containing analysis features
|
|
55
|
+
feature_x (str): name of the feature x
|
|
56
|
+
feature_y (str): name of the feature y
|
|
57
|
+
|
|
58
|
+
Returns
|
|
59
|
+
-------
|
|
60
|
+
(beta(str), alpha(str))
|
|
61
|
+
"""
|
|
62
|
+
x = data[feature_x].values.reshape(-1,1)
|
|
63
|
+
y = data[feature_y].values.reshape(-1,1)
|
|
64
|
+
huber_regr = HuberRegressor(fit_intercept = True)
|
|
65
|
+
huber_regr.fit(x, y)
|
|
66
|
+
beta, alpha = huber_regr.coef_[0], huber_regr.intercept_
|
|
67
|
+
return beta, alpha
|
|
68
|
+
|
|
69
|
+
def get_correlation(self, data, feature_x, feature_y):
|
|
70
|
+
"""
|
|
71
|
+
compute correlation given x and y
|
|
72
|
+
|
|
73
|
+
Parameters
|
|
74
|
+
----------
|
|
75
|
+
data (pd.DataFrame): input data containing analysis features
|
|
76
|
+
feature_x (str): name of the feature x
|
|
77
|
+
feature_y (str): name of the feature y
|
|
78
|
+
|
|
79
|
+
Returns
|
|
80
|
+
-------
|
|
81
|
+
r (float)
|
|
82
|
+
"""
|
|
83
|
+
x = data[feature_x]
|
|
84
|
+
y = data[feature_y]
|
|
85
|
+
r = stats.mstats.pearsonr(x, y)[0]
|
|
86
|
+
return r
|
|
87
|
+
|
|
88
|
+
def produce_beta_report(self, data):
|
|
89
|
+
"""
|
|
90
|
+
produce beta report
|
|
91
|
+
|
|
92
|
+
Parameters
|
|
93
|
+
----------
|
|
94
|
+
data (pd.DataFrame): input data containing analysis features
|
|
95
|
+
|
|
96
|
+
Returns
|
|
97
|
+
-------
|
|
98
|
+
report (pd.DataFrame)
|
|
99
|
+
"""
|
|
100
|
+
result = {
|
|
101
|
+
"market_index": list(),
|
|
102
|
+
"beta": list(),
|
|
103
|
+
"alpha": list(),
|
|
104
|
+
"r": list()
|
|
105
|
+
}
|
|
106
|
+
for index in self.market_features:
|
|
107
|
+
beta, alpha = self.compute_beta( data, self.return_cols, index)
|
|
108
|
+
r = self.get_correlation( data, self.return_cols, index)
|
|
109
|
+
result["market_index"].append(index)
|
|
110
|
+
result["beta"].append(beta)
|
|
111
|
+
result["alpha"].append(alpha)
|
|
112
|
+
result["r"].append(r)
|
|
113
|
+
pd_result = pd.DataFrame(result)
|
|
114
|
+
pd_result = pd_result.sort_values("r", ascending=False)
|
|
115
|
+
if self.col_map:
|
|
116
|
+
pd_result["map_market_index"] = pd_result.market_index.map(self.col_map)
|
|
117
|
+
return pd_result
|
|
118
|
+
|
|
119
|
+
def compute_general_report(self, sample_size, offset, index=False, subsample_ts=False, show_plot=True):
|
|
120
|
+
"""
|
|
121
|
+
compute full report, global and latest window
|
|
122
|
+
|
|
123
|
+
Parameters
|
|
124
|
+
----------
|
|
125
|
+
sample_size (int): sample size for every beta computation
|
|
126
|
+
offset (int): offset or overlap between samples
|
|
127
|
+
index (str): if provided, bet fit index is taken
|
|
128
|
+
subsample_ts (int): subsample for iterative beta calculation
|
|
129
|
+
show_plot (bool): whether to show plot
|
|
130
|
+
|
|
131
|
+
Returns
|
|
132
|
+
-------
|
|
133
|
+
(report (pd.DataFrame), latest_report (pd.DataFrame), figure (mtpl.plt))
|
|
134
|
+
"""
|
|
135
|
+
general_report = self.produce_beta_report(self.data)
|
|
136
|
+
current_report = self.produce_beta_report(self.data.iloc[sample_size:,:])
|
|
137
|
+
if not index:
|
|
138
|
+
index = general_report.head(1).market_index.values[0]
|
|
139
|
+
b = general_report[general_report.market_index == index].beta.values
|
|
140
|
+
a = general_report[general_report.market_index == index].alpha.values
|
|
141
|
+
|
|
142
|
+
figure, ax = plt.subplot_mosaic(
|
|
143
|
+
[["scatter_total", "scatter_sample",'ts','ts']],
|
|
144
|
+
layout="constrained",
|
|
145
|
+
figsize=(18, 5)
|
|
146
|
+
)
|
|
147
|
+
x = self.data[self.return_cols]
|
|
148
|
+
y = self.data[index]
|
|
149
|
+
ax['scatter_total'].scatter(x, y)
|
|
150
|
+
ax['scatter_total'].plot(x, b*x+a, color='red')
|
|
151
|
+
|
|
152
|
+
if subsample_ts:
|
|
153
|
+
merger_df = self.data.iloc[-subsample_ts:,:].copy()
|
|
154
|
+
else:
|
|
155
|
+
merger_df = self.data.copy()
|
|
156
|
+
ax['ts'].plot(merger_df.Date, merger_df.Close, color = 'grey', alpha = 0.3)
|
|
157
|
+
b_array = list()
|
|
158
|
+
for i in range(0,len(merger_df)-sample_size,offset):
|
|
159
|
+
merger_ = merger_df.sort_values('Date', ascending = False).iloc[i:i+sample_size,:]
|
|
160
|
+
b, a = self.compute_beta(merger_, self.return_cols, index)
|
|
161
|
+
x = merger_[self.return_cols]
|
|
162
|
+
y = merger_[index]
|
|
163
|
+
normalize_ = mcolors.Normalize(vmin=-2.0, vmax=2.0)
|
|
164
|
+
colormap_ = cm.jet
|
|
165
|
+
ax['scatter_sample'].plot(x, y,'o', color = 'blue', alpha = 0.1)
|
|
166
|
+
ax['scatter_sample'].plot(x, b*x+a, color=colormap_(normalize_(b)))
|
|
167
|
+
ax['scatter_sample'].set_xlim(-0.08, 0.08)
|
|
168
|
+
ax['scatter_sample'].set_ylim(-0.08, 0.08)
|
|
169
|
+
plot = ax['ts'].scatter(merger_.Date, merger_.Close, color=colormap_(normalize_(b)), s = 10)
|
|
170
|
+
b_array.append(b)
|
|
171
|
+
normalize_ = mcolors.Normalize(vmin=np.min(b_array), vmax=np.max(b_array))
|
|
172
|
+
colormap_ = cm.jet
|
|
173
|
+
x_global = self.data[self.return_cols]
|
|
174
|
+
scalarmappaple = cm.ScalarMappable(norm=normalize_, cmap=colormap_)
|
|
175
|
+
scalarmappaple.set_array(x_global)
|
|
176
|
+
if self.col_map:
|
|
177
|
+
map_index = self.col_map.get(index)
|
|
178
|
+
title = f'market analysis of {map_index}'
|
|
179
|
+
else:
|
|
180
|
+
title = f'market analysis'
|
|
181
|
+
plt.title(title)
|
|
182
|
+
plt.colorbar(scalarmappaple)
|
|
183
|
+
del merger_df
|
|
184
|
+
gc.collect()
|
|
185
|
+
if show_plot:
|
|
186
|
+
plt.show()
|
|
187
|
+
else:
|
|
188
|
+
plt.close()
|
|
189
|
+
return general_report, current_report, figure
|
|
File without changes
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
from scipy import optimize
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
class MarkowitzOptimizer:
|
|
6
|
+
def __init__(self, data, return_cols, window_cov=10):
|
|
7
|
+
self.data_ = data[["Date"]+return_cols].dropna().sort_values("Date").copy()
|
|
8
|
+
self.window_cov = window_cov
|
|
9
|
+
self.return_cols = return_cols
|
|
10
|
+
self.n_features = len(return_cols)
|
|
11
|
+
|
|
12
|
+
def execute_markowitz(self):
|
|
13
|
+
cons = {'type':'eq','fun':self._check_sum}
|
|
14
|
+
bounds = tuple((0,1) for _ in range(self.n_features)) # weights bounds
|
|
15
|
+
init_guess = [1/self.n_features for _ in range(self.n_features)] ## initial guess of weiths
|
|
16
|
+
feature_result_names = [f"optimal_{x}" for x in self.return_cols]
|
|
17
|
+
self.data_[feature_result_names] = np.nan
|
|
18
|
+
|
|
19
|
+
for i in range(self.window_cov,len(self.data_)):
|
|
20
|
+
self.cov = self.data_.iloc[i-self.window_cov:i,:][self.return_cols].cov()
|
|
21
|
+
self.returns = self.data_.iloc[i][self.return_cols].values
|
|
22
|
+
|
|
23
|
+
opt_results = optimize.minimize(self._neg_sr, init_guess, constraints=cons, bounds=bounds, method='SLSQP')
|
|
24
|
+
optimal_weights = opt_results.x
|
|
25
|
+
self.data_.iloc[i,-self.n_features:] = [float(x) for x in optimal_weights]
|
|
26
|
+
|
|
27
|
+
self.data_[feature_result_names] = self.data_[feature_result_names].astype(float).round(6)
|
|
28
|
+
self.data_["sum_weights"] = self.data_[feature_result_names].sum(axis=1)
|
|
29
|
+
self.feature_result_names = feature_result_names
|
|
30
|
+
return self.data_
|
|
31
|
+
|
|
32
|
+
def _get_ret_vol_sr(self, weights):
|
|
33
|
+
weights = np.array(weights)
|
|
34
|
+
ret = self.returns.dot(weights)
|
|
35
|
+
vol = np.sqrt(weights.T.dot(self.cov.dot(weights)))
|
|
36
|
+
sr = ret / vol
|
|
37
|
+
return np.array([ret, vol, sr])
|
|
38
|
+
|
|
39
|
+
def _neg_sr(self, weights):
|
|
40
|
+
return self._get_ret_vol_sr(weights)[-1] * -1
|
|
41
|
+
|
|
42
|
+
@staticmethod
|
|
43
|
+
def _check_sum(weights):
|
|
44
|
+
return np.sum(weights) - 1
|