virgo-modules 0.0.72__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,189 @@
1
+ import gc
2
+
3
+ import pandas as pd
4
+ import numpy as np
5
+
6
+ from sklearn.linear_model import HuberRegressor
7
+ from scipy import stats
8
+
9
+ import matplotlib.pyplot as plt
10
+ import seaborn as sns; sns.set()
11
+
12
+ from matplotlib import cm
13
+ import matplotlib.colors as mcolors
14
+
15
+ class MarketAnalysis:
16
+ """
17
+ Class that perform market analysis using robust linear regression
18
+
19
+ Attributes
20
+ ----------
21
+ data : pd.DataFrame
22
+ input data
23
+ market_features : list
24
+ list of market feature (log returns) to apply analysis
25
+ return_cols: str
26
+ main log return feature
27
+ col_map: dict
28
+ dictionary containing rename of market features
29
+
30
+ Methods
31
+ -------
32
+ compute_beta(data=pd.DataFrame, feature_x=str, feature_y=str):
33
+ compute betas given x and y using robust linear regression
34
+ get_correlation(data=pd.DataFrame, feature_x=str, feature_y=str):
35
+ compute correlation given x and y
36
+ produce_beta_report(data=pd.DataFrame):
37
+ produce beta report
38
+ compute_general_report(sample_size=int, offset=int, index=str, subsample_ts=int, show_plot=bool):
39
+ compute full report, global and latest window
40
+ """
41
+
42
+ def __init__(self, data, market_features, return_col, col_map=None):
43
+ self.data = data.dropna()
44
+ self.market_features = market_features
45
+ self.return_cols = return_col
46
+ self.col_map=col_map
47
+
48
+ def compute_beta(self, data, feature_x, feature_y):
49
+ """
50
+ compute betas given x and y using robust linear regression
51
+
52
+ Parameters
53
+ ----------
54
+ data (pd.DataFrame): input data containing analysis features
55
+ feature_x (str): name of the feature x
56
+ feature_y (str): name of the feature y
57
+
58
+ Returns
59
+ -------
60
+ (beta(str), alpha(str))
61
+ """
62
+ x = data[feature_x].values.reshape(-1,1)
63
+ y = data[feature_y].values.reshape(-1,1)
64
+ huber_regr = HuberRegressor(fit_intercept = True)
65
+ huber_regr.fit(x, y)
66
+ beta, alpha = huber_regr.coef_[0], huber_regr.intercept_
67
+ return beta, alpha
68
+
69
+ def get_correlation(self, data, feature_x, feature_y):
70
+ """
71
+ compute correlation given x and y
72
+
73
+ Parameters
74
+ ----------
75
+ data (pd.DataFrame): input data containing analysis features
76
+ feature_x (str): name of the feature x
77
+ feature_y (str): name of the feature y
78
+
79
+ Returns
80
+ -------
81
+ r (float)
82
+ """
83
+ x = data[feature_x]
84
+ y = data[feature_y]
85
+ r = stats.mstats.pearsonr(x, y)[0]
86
+ return r
87
+
88
+ def produce_beta_report(self, data):
89
+ """
90
+ produce beta report
91
+
92
+ Parameters
93
+ ----------
94
+ data (pd.DataFrame): input data containing analysis features
95
+
96
+ Returns
97
+ -------
98
+ report (pd.DataFrame)
99
+ """
100
+ result = {
101
+ "market_index": list(),
102
+ "beta": list(),
103
+ "alpha": list(),
104
+ "r": list()
105
+ }
106
+ for index in self.market_features:
107
+ beta, alpha = self.compute_beta( data, self.return_cols, index)
108
+ r = self.get_correlation( data, self.return_cols, index)
109
+ result["market_index"].append(index)
110
+ result["beta"].append(beta)
111
+ result["alpha"].append(alpha)
112
+ result["r"].append(r)
113
+ pd_result = pd.DataFrame(result)
114
+ pd_result = pd_result.sort_values("r", ascending=False)
115
+ if self.col_map:
116
+ pd_result["map_market_index"] = pd_result.market_index.map(self.col_map)
117
+ return pd_result
118
+
119
+ def compute_general_report(self, sample_size, offset, index=False, subsample_ts=False, show_plot=True):
120
+ """
121
+ compute full report, global and latest window
122
+
123
+ Parameters
124
+ ----------
125
+ sample_size (int): sample size for every beta computation
126
+ offset (int): offset or overlap between samples
127
+ index (str): if provided, bet fit index is taken
128
+ subsample_ts (int): subsample for iterative beta calculation
129
+ show_plot (bool): whether to show plot
130
+
131
+ Returns
132
+ -------
133
+ (report (pd.DataFrame), latest_report (pd.DataFrame), figure (mtpl.plt))
134
+ """
135
+ general_report = self.produce_beta_report(self.data)
136
+ current_report = self.produce_beta_report(self.data.iloc[sample_size:,:])
137
+ if not index:
138
+ index = general_report.head(1).market_index.values[0]
139
+ b = general_report[general_report.market_index == index].beta.values
140
+ a = general_report[general_report.market_index == index].alpha.values
141
+
142
+ figure, ax = plt.subplot_mosaic(
143
+ [["scatter_total", "scatter_sample",'ts','ts']],
144
+ layout="constrained",
145
+ figsize=(18, 5)
146
+ )
147
+ x = self.data[self.return_cols]
148
+ y = self.data[index]
149
+ ax['scatter_total'].scatter(x, y)
150
+ ax['scatter_total'].plot(x, b*x+a, color='red')
151
+
152
+ if subsample_ts:
153
+ merger_df = self.data.iloc[-subsample_ts:,:].copy()
154
+ else:
155
+ merger_df = self.data.copy()
156
+ ax['ts'].plot(merger_df.Date, merger_df.Close, color = 'grey', alpha = 0.3)
157
+ b_array = list()
158
+ for i in range(0,len(merger_df)-sample_size,offset):
159
+ merger_ = merger_df.sort_values('Date', ascending = False).iloc[i:i+sample_size,:]
160
+ b, a = self.compute_beta(merger_, self.return_cols, index)
161
+ x = merger_[self.return_cols]
162
+ y = merger_[index]
163
+ normalize_ = mcolors.Normalize(vmin=-2.0, vmax=2.0)
164
+ colormap_ = cm.jet
165
+ ax['scatter_sample'].plot(x, y,'o', color = 'blue', alpha = 0.1)
166
+ ax['scatter_sample'].plot(x, b*x+a, color=colormap_(normalize_(b)))
167
+ ax['scatter_sample'].set_xlim(-0.08, 0.08)
168
+ ax['scatter_sample'].set_ylim(-0.08, 0.08)
169
+ plot = ax['ts'].scatter(merger_.Date, merger_.Close, color=colormap_(normalize_(b)), s = 10)
170
+ b_array.append(b)
171
+ normalize_ = mcolors.Normalize(vmin=np.min(b_array), vmax=np.max(b_array))
172
+ colormap_ = cm.jet
173
+ x_global = self.data[self.return_cols]
174
+ scalarmappaple = cm.ScalarMappable(norm=normalize_, cmap=colormap_)
175
+ scalarmappaple.set_array(x_global)
176
+ if self.col_map:
177
+ map_index = self.col_map.get(index)
178
+ title = f'market analysis of {map_index}'
179
+ else:
180
+ title = f'market analysis'
181
+ plt.title(title)
182
+ plt.colorbar(scalarmappaple)
183
+ del merger_df
184
+ gc.collect()
185
+ if show_plot:
186
+ plt.show()
187
+ else:
188
+ plt.close()
189
+ return general_report, current_report, figure
File without changes
@@ -0,0 +1,44 @@
1
+ from scipy import optimize
2
+ import pandas as pd
3
+ import numpy as np
4
+
5
+ class MarkowitzOptimizer:
6
+ def __init__(self, data, return_cols, window_cov=10):
7
+ self.data_ = data[["Date"]+return_cols].dropna().sort_values("Date").copy()
8
+ self.window_cov = window_cov
9
+ self.return_cols = return_cols
10
+ self.n_features = len(return_cols)
11
+
12
+ def execute_markowitz(self):
13
+ cons = {'type':'eq','fun':self._check_sum}
14
+ bounds = tuple((0,1) for _ in range(self.n_features)) # weights bounds
15
+ init_guess = [1/self.n_features for _ in range(self.n_features)] ## initial guess of weiths
16
+ feature_result_names = [f"optimal_{x}" for x in self.return_cols]
17
+ self.data_[feature_result_names] = np.nan
18
+
19
+ for i in range(self.window_cov,len(self.data_)):
20
+ self.cov = self.data_.iloc[i-self.window_cov:i,:][self.return_cols].cov()
21
+ self.returns = self.data_.iloc[i][self.return_cols].values
22
+
23
+ opt_results = optimize.minimize(self._neg_sr, init_guess, constraints=cons, bounds=bounds, method='SLSQP')
24
+ optimal_weights = opt_results.x
25
+ self.data_.iloc[i,-self.n_features:] = [float(x) for x in optimal_weights]
26
+
27
+ self.data_[feature_result_names] = self.data_[feature_result_names].astype(float).round(6)
28
+ self.data_["sum_weights"] = self.data_[feature_result_names].sum(axis=1)
29
+ self.feature_result_names = feature_result_names
30
+ return self.data_
31
+
32
+ def _get_ret_vol_sr(self, weights):
33
+ weights = np.array(weights)
34
+ ret = self.returns.dot(weights)
35
+ vol = np.sqrt(weights.T.dot(self.cov.dot(weights)))
36
+ sr = ret / vol
37
+ return np.array([ret, vol, sr])
38
+
39
+ def _neg_sr(self, weights):
40
+ return self._get_ret_vol_sr(weights)[-1] * -1
41
+
42
+ @staticmethod
43
+ def _check_sum(weights):
44
+ return np.sum(weights) - 1