virgo-modules 0.0.90__py3-none-any.whl → 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of virgo-modules might be problematic. Click here for more details.
- virgo_modules/src/backtester.py +360 -0
- virgo_modules/src/re_utils.py +2 -1
- virgo_modules/src/ticketer_source.py +411 -409
- {virgo_modules-0.0.90.dist-info → virgo_modules-0.1.0.dist-info}/METADATA +1 -1
- virgo_modules-0.1.0.dist-info/RECORD +13 -0
- virgo_modules-0.0.90.dist-info/RECORD +0 -12
- {virgo_modules-0.0.90.dist-info → virgo_modules-0.1.0.dist-info}/LICENSE +0 -0
- {virgo_modules-0.0.90.dist-info → virgo_modules-0.1.0.dist-info}/WHEEL +0 -0
- {virgo_modules-0.0.90.dist-info → virgo_modules-0.1.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,360 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import numpy as np
|
|
3
|
+
import json
|
|
4
|
+
|
|
5
|
+
import matplotlib.pyplot as plt
|
|
6
|
+
import matplotlib.gridspec as gridspec
|
|
7
|
+
import seaborn as sns; sns.set()
|
|
8
|
+
|
|
9
|
+
import warnings
|
|
10
|
+
warnings.filterwarnings('ignore')
|
|
11
|
+
|
|
12
|
+
from .aws_utils import upload_file_to_aws
|
|
13
|
+
|
|
14
|
+
def sharpe_ratio(return_series):
|
|
15
|
+
|
|
16
|
+
'''
|
|
17
|
+
calculate sharpe ratio for given array.
|
|
18
|
+
|
|
19
|
+
Parameters:
|
|
20
|
+
return_series (pd.series): pandas series of the asset returns
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
sharpe (float): sharpe ratio
|
|
24
|
+
'''
|
|
25
|
+
|
|
26
|
+
N = 255 # Trading days in the year (change to 365 for crypto)
|
|
27
|
+
rf = 0.005 # Half a percent risk free rare
|
|
28
|
+
mean = return_series.mean() * N -rf
|
|
29
|
+
sigma = return_series.std() * np.sqrt(N)
|
|
30
|
+
sharpe = round(mean / sigma, 3)
|
|
31
|
+
return sharpe
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class SignalAnalyserObject:
|
|
35
|
+
"""
|
|
36
|
+
Class that produces back-tests analysis for a given feature
|
|
37
|
+
|
|
38
|
+
Attributes
|
|
39
|
+
----------
|
|
40
|
+
symbol_name : str
|
|
41
|
+
stock or asset to assess
|
|
42
|
+
feature_name : str
|
|
43
|
+
test_size: int
|
|
44
|
+
testing data size
|
|
45
|
+
show_plot: boolean
|
|
46
|
+
save_path: str
|
|
47
|
+
if available, save result locally
|
|
48
|
+
save_aws: str
|
|
49
|
+
if available, save result locally
|
|
50
|
+
aws_credentials: dict
|
|
51
|
+
signal_position: int
|
|
52
|
+
if available, signal position to open a position
|
|
53
|
+
df: pd.DataFrame:
|
|
54
|
+
transformed data of the selected feature to perform back-test
|
|
55
|
+
|
|
56
|
+
Methods
|
|
57
|
+
-------
|
|
58
|
+
signal_analyser(days_list=list):
|
|
59
|
+
given a signal position for either botton or roof signal, calculate the espected return and distributions for a time scope in the days list (time horizons)
|
|
60
|
+
create_backtest_signal(days_strategy=int, high_exit=float, low_exit=float, open_in_list=list):
|
|
61
|
+
create a back-test analysis using the test data using some opening anc closing postion criterias
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
def __init__(self, data,symbol_name, feature_name, test_size, signal_position = False, correct_signals = False, show_plot = True, save_path = False, save_aws = False, aws_credentials = False, return_fig = False):
|
|
65
|
+
"""
|
|
66
|
+
Initialize object
|
|
67
|
+
|
|
68
|
+
Parameters
|
|
69
|
+
----------
|
|
70
|
+
data (pd.DataFrame): data
|
|
71
|
+
ticket_name (str): name of the asset
|
|
72
|
+
feature_name (str): name of the features
|
|
73
|
+
test_size (int): size of the test data
|
|
74
|
+
signal_position (int): signal position to open the position, False by default
|
|
75
|
+
correct_signals (int): clean abnormal signals using interpolation
|
|
76
|
+
show_plot (boolean): if true show plot for every method
|
|
77
|
+
save_path (str): if true, save results in file e.g r'C:/path/to/the/file/'
|
|
78
|
+
save_aws (str): if true, export results to remote repo e.g. 'path/to/file/'
|
|
79
|
+
aws_credentials (dict): credentials for aws
|
|
80
|
+
return_fig (boolean): if true, methods will return objects
|
|
81
|
+
|
|
82
|
+
Returns
|
|
83
|
+
-------
|
|
84
|
+
None
|
|
85
|
+
"""
|
|
86
|
+
self.ticket_name = symbol_name
|
|
87
|
+
self.feature_name=feature_name
|
|
88
|
+
self.test_size=test_size
|
|
89
|
+
self.show_plot = show_plot
|
|
90
|
+
self.save_path = save_path
|
|
91
|
+
self.save_aws = save_aws
|
|
92
|
+
self.aws_credentials = aws_credentials
|
|
93
|
+
self.return_fig = return_fig
|
|
94
|
+
self.signal_position = signal_position
|
|
95
|
+
## preprocessing
|
|
96
|
+
up_signal, low_signal= f'signal_up_{feature_name}', f'signal_low_{feature_name}'
|
|
97
|
+
features_base = ['Date', up_signal, low_signal, 'Close','Open','High','Low']
|
|
98
|
+
|
|
99
|
+
df = data[features_base].sort_values('Date')
|
|
100
|
+
|
|
101
|
+
df['signal_type'] = np.where(
|
|
102
|
+
df[up_signal] == 1,
|
|
103
|
+
'up',
|
|
104
|
+
np.where(
|
|
105
|
+
df[low_signal] == 1,
|
|
106
|
+
'down',
|
|
107
|
+
'no signal'
|
|
108
|
+
)
|
|
109
|
+
)
|
|
110
|
+
def correct_sygnals(df,correct_i = 1):
|
|
111
|
+
### signal cleaning
|
|
112
|
+
for i in range(1+correct_i, len(df)-1):
|
|
113
|
+
start_i, end_i = i-(correct_i+1), i+1
|
|
114
|
+
dfw = df.iloc[start_i: end_i,]
|
|
115
|
+
before_type = dfw.iloc[0].signal_type
|
|
116
|
+
after_type = dfw.iloc[-1].signal_type
|
|
117
|
+
window_types = dfw.iloc[1:-1].signal_type.unique()
|
|
118
|
+
n_window_type = len(window_types)
|
|
119
|
+
if n_window_type == 1:
|
|
120
|
+
if (before_type == after_type) and (window_types[0] != after_type):
|
|
121
|
+
df.iloc[start_i+1: end_i-1, df.columns.get_loc('signal_type')] = before_type
|
|
122
|
+
return df.copy()
|
|
123
|
+
|
|
124
|
+
if correct_signals:
|
|
125
|
+
for correct_i in range(1,correct_signals+1):
|
|
126
|
+
df = correct_sygnals(df,correct_i = correct_i)
|
|
127
|
+
df[up_signal] = np.where(df['signal_type'] == 'up', 1,0)
|
|
128
|
+
df[low_signal] = np.where(df['signal_type'] == 'down', 1,0)
|
|
129
|
+
|
|
130
|
+
## indexing chains
|
|
131
|
+
df['lag_signal_type'] = df['signal_type'].shift(1)
|
|
132
|
+
df['lag_Date'] = df['Date'].shift(1)
|
|
133
|
+
df['span'] = (pd.to_datetime(df['Date']) - pd.to_datetime(df['lag_Date'])).dt.days - 1
|
|
134
|
+
df['break'] = np.where((df['span'] > 3) & (df['lag_signal_type'] == df['signal_type']), 1, 0)
|
|
135
|
+
df['break'] = np.where((df['lag_signal_type'] != df['signal_type']), 1, df['break'])
|
|
136
|
+
df['chain_id'] = df.sort_values(['Date']).groupby(['break']).cumcount() + 1
|
|
137
|
+
df['chain_id'] = np.where(df['break'] == 1, df['chain_id'], np.nan )
|
|
138
|
+
df['chain_id'] = df['chain_id'].fillna(method = 'ffill')
|
|
139
|
+
|
|
140
|
+
df['internal_rn'] = df.sort_values(['Date']).groupby(['chain_id']).cumcount() + 1
|
|
141
|
+
df['inv_internal_rn'] = df.sort_values(['Date'],ascending = False).groupby(['chain_id']).cumcount() + 1
|
|
142
|
+
|
|
143
|
+
df['first_in_chain'] = np.where(df['internal_rn'] == 1, True, False)
|
|
144
|
+
df['last_in_chain'] = np.where(df['inv_internal_rn'] == 1, True, False)
|
|
145
|
+
|
|
146
|
+
df['span'] = (pd.to_datetime(df['Date']) - pd.to_datetime(df['lag_Date'])).dt.days - 1
|
|
147
|
+
self.df = df.drop(columns = ['span','break','lag_signal_type','lag_Date']).copy()
|
|
148
|
+
|
|
149
|
+
def signal_analyser(self, days_list):
|
|
150
|
+
"""
|
|
151
|
+
Initialize object
|
|
152
|
+
|
|
153
|
+
Parameters
|
|
154
|
+
----------
|
|
155
|
+
days_list (list): list of integers to calculate expected returns
|
|
156
|
+
|
|
157
|
+
Returns
|
|
158
|
+
-------
|
|
159
|
+
if returns_fig is true, returns a matplotlib fig
|
|
160
|
+
"""
|
|
161
|
+
signal_position = self.signal_position
|
|
162
|
+
df = self.df.iloc[0:-self.test_size,:].copy()
|
|
163
|
+
returns_list = list()
|
|
164
|
+
|
|
165
|
+
for days in days_list:
|
|
166
|
+
feature_ = f'return_{days}d'
|
|
167
|
+
df[feature_] = (df['Close'].shift(-days)/df['Close']-1)*100
|
|
168
|
+
returns_list.append(feature_)
|
|
169
|
+
|
|
170
|
+
df['open_long'] = np.where(df.last_in_chain == True, True, np.nan)
|
|
171
|
+
df['open_short'] = np.where(df.first_in_chain == True, True, np.nan)
|
|
172
|
+
|
|
173
|
+
# plotting
|
|
174
|
+
fig, axs = plt.subplots(1, 4, figsize = (20,5))
|
|
175
|
+
palette ={"go down": "tomato", "go up": "lightblue"}
|
|
176
|
+
|
|
177
|
+
df2 = df[df.signal_type.isin(['up','down'])]
|
|
178
|
+
df2['lag_Date'] = df2['Date'].shift(1)
|
|
179
|
+
df2['lag_signal_type'] = df2['signal_type'].shift(1)
|
|
180
|
+
df2 = df2[df2.lag_signal_type != df2.signal_type]
|
|
181
|
+
df2['span'] = (pd.to_datetime(df2['Date']) - pd.to_datetime(df2['lag_Date'])).dt.days - 1
|
|
182
|
+
sns.violinplot(data=df2, y="span",ax = axs[0], color = 'lightblue', linewidth=0.7,inner="quart")
|
|
183
|
+
sns.stripplot(data=df2, y="span",ax = axs[0], jitter=True, zorder=1)
|
|
184
|
+
axs[0].set_title('span between last signals')
|
|
185
|
+
|
|
186
|
+
df.signal_type = df.signal_type.map({'up':'go down', 'down': 'go up'})
|
|
187
|
+
df_ = df[df.last_in_chain == True]
|
|
188
|
+
df_['part'] = '-'
|
|
189
|
+
sns.violinplot(data=df_, y="internal_rn", x='part', ax = axs[1], hue="signal_type", inner="quart",palette = palette,gap=0.1, split=True, linewidth=0.7)
|
|
190
|
+
axs[1].set_title('signal duration distribution')
|
|
191
|
+
|
|
192
|
+
if signal_position:
|
|
193
|
+
for feature in returns_list:
|
|
194
|
+
df[feature] = df[feature].shift(-signal_position)
|
|
195
|
+
|
|
196
|
+
df_melt = df[df.open_long == 1].melt(id_vars=['signal_type'], value_vars=returns_list, var_name='time', value_name='value')
|
|
197
|
+
df_melt = df_melt.dropna()
|
|
198
|
+
sns.violinplot(data=df_melt, x="time", y="value", hue="signal_type",ax = axs[2], split=True, gap=0.1, inner="quart",palette = palette, linewidth=0.8)
|
|
199
|
+
axs[2].axhline(y=0, color='grey', linestyle='--')
|
|
200
|
+
axs[2].set_title('E. returns - end of the signal')
|
|
201
|
+
|
|
202
|
+
df_melt = df[df.open_short == 1].melt(id_vars=['signal_type'], value_vars=returns_list, var_name='time', value_name='value')
|
|
203
|
+
df_melt = df_melt.dropna()
|
|
204
|
+
sns.violinplot(data=df_melt, x="time", y="value", hue="signal_type",ax = axs[3], split=True, gap=0.1, inner="quart",palette = palette, linewidth=0.8)
|
|
205
|
+
axs[3].axhline(y=0, color='grey', linestyle='--')
|
|
206
|
+
axs[3].set_title('E. returns - start of the signal')
|
|
207
|
+
|
|
208
|
+
if self.show_plot:
|
|
209
|
+
plt.show()
|
|
210
|
+
|
|
211
|
+
if self.save_path:
|
|
212
|
+
result_plot_name = f'signals_strategy_distribution_{self.feature_name}.png'
|
|
213
|
+
fig.savefig(self.save_path+result_plot_name)
|
|
214
|
+
# pickle.dump(axs, open(self.save_path+result_plot_name, 'wb'))
|
|
215
|
+
|
|
216
|
+
if self.save_path and self.save_aws:
|
|
217
|
+
# upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = f'market_plots/{self.ticket_name}/'+result_plot_name, input_path = self.save_path+result_plot_name)
|
|
218
|
+
upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = self.save_aws + result_plot_name, input_path = self.save_path + result_plot_name, aws_credentials = self.aws_credentials)
|
|
219
|
+
if not self.show_plot:
|
|
220
|
+
plt.close()
|
|
221
|
+
|
|
222
|
+
del df
|
|
223
|
+
|
|
224
|
+
if self.return_fig:
|
|
225
|
+
return fig
|
|
226
|
+
|
|
227
|
+
def create_backtest_signal(self,days_strategy, high_exit = False, low_exit = False, open_in_list = ['down']):
|
|
228
|
+
"""
|
|
229
|
+
Initialize object
|
|
230
|
+
|
|
231
|
+
Parameters
|
|
232
|
+
----------
|
|
233
|
+
days_strategy (int): position horizon
|
|
234
|
+
high_exit (float): max threshold to close position
|
|
235
|
+
low_exit (float): min threshold to close position, this parameter has to be positive
|
|
236
|
+
open_in_list (list): list of strings ("down","up") to assess signals
|
|
237
|
+
Returns
|
|
238
|
+
-------
|
|
239
|
+
if returns_fig is true, returns a matplotlib fig and list of dicts containing analysis
|
|
240
|
+
"""
|
|
241
|
+
asset_1 = 'Close'
|
|
242
|
+
up_signal, low_signal= f'signal_up_{self.feature_name}', f'signal_low_{self.feature_name}'
|
|
243
|
+
signal_position = self.signal_position
|
|
244
|
+
dft = self.df.iloc[-self.test_size:,:].reset_index(drop=True).copy()
|
|
245
|
+
|
|
246
|
+
dft['lrets_bench'] = np.log(dft[asset_1]/dft[asset_1].shift(1))
|
|
247
|
+
dft['bench_prod'] = dft['lrets_bench'].cumsum()
|
|
248
|
+
dft['bench_prod_exp'] = np.exp(dft['bench_prod']) - 1
|
|
249
|
+
|
|
250
|
+
map_ = {'down':'END LOW TREND', 'up': 'BEGINNING HIGH TREND'}
|
|
251
|
+
|
|
252
|
+
open_in_list_items = len(open_in_list)
|
|
253
|
+
fig, axs = plt.subplots(1,open_in_list_items, figsize = (7*open_in_list_items,6))
|
|
254
|
+
messages = list()
|
|
255
|
+
for i, open_in in enumerate(open_in_list):
|
|
256
|
+
axs_ = axs if open_in_list_items == 1 else axs[i]
|
|
257
|
+
if open_in == 'down':
|
|
258
|
+
dft['open_long'] = np.where((dft.last_in_chain == True) & (dft.signal_type == 'down'), True, np.nan) # open strat
|
|
259
|
+
elif open_in == 'up':
|
|
260
|
+
dft['open_long'] = np.where((dft.first_in_chain == True) & (dft.signal_type == 'up'), True, np.nan) # open strat
|
|
261
|
+
|
|
262
|
+
def chain_position(dft):
|
|
263
|
+
dft['open_long_id'] = np.where(dft['open_long'] == True, dft.chain_id, np.nan)
|
|
264
|
+
dft['open_long_id'] = dft['open_long_id'].fillna(method = 'ffill')
|
|
265
|
+
dft['open_long_rn'] = dft.sort_values(['Date']).groupby(['open_long_id']).cumcount() + 1
|
|
266
|
+
return dft
|
|
267
|
+
|
|
268
|
+
if signal_position:
|
|
269
|
+
dft['open_long'] = dft.sort_values(['Date'])['open_long'].shift(signal_position)
|
|
270
|
+
|
|
271
|
+
dft = chain_position(dft)
|
|
272
|
+
dft['flag'] = np.where(dft['open_long_rn'] < days_strategy, 1,0)
|
|
273
|
+
|
|
274
|
+
if high_exit and low_exit:
|
|
275
|
+
dft['open_strat'] = np.where(dft.open_long == True, dft.Open, np.nan) # open strat
|
|
276
|
+
dft['open_strat'] = dft['open_strat'].fillna(method = 'ffill')
|
|
277
|
+
dft['open_strat'] = np.where(dft.flag == 1, dft.open_strat, np.nan)
|
|
278
|
+
dft['high_strat_ret'] = (dft['High']/dft['open_strat']-1)*100
|
|
279
|
+
dft['low_strat_ret'] = (dft['Low']/dft['open_strat']-1)*100
|
|
280
|
+
dft['max_step_chain'] = dft.groupby(['open_long_id'])['open_long_rn'].transform('max')
|
|
281
|
+
dft['high_exit'] = np.where(((dft['high_strat_ret'] >= high_exit) | (dft['open_long_rn'] == days_strategy) | (dft['max_step_chain'] == dft['open_long_rn'])), 1, np.nan)
|
|
282
|
+
dft['low_exit'] = np.where((dft['low_strat_ret'] <= low_exit), -1, np.nan)
|
|
283
|
+
|
|
284
|
+
dft["exit_type"] = dft[["high_exit", "low_exit"]].max(axis=1)
|
|
285
|
+
dft['exit_type'] = np.where(dft["exit_type"] == 1, 1, np.where(dft["exit_type"] == -1,-1,np.nan))
|
|
286
|
+
dft['exit'] = np.where(dft['exit_type'].isnull(), np.nan, 1)
|
|
287
|
+
dft['exit_order'] = dft.sort_values(['Date']).groupby(['open_long_id','exit']).cumcount() + 1
|
|
288
|
+
dft['exit'] = np.where(dft['exit_order'] == 1, True, np.nan)
|
|
289
|
+
dft = dft.drop(columns = ['exit_order'])
|
|
290
|
+
## if last signal is near
|
|
291
|
+
max_id = dft.open_long_id.max()
|
|
292
|
+
dft['max_internal_rn'] = dft.sort_values(['Date']).groupby(['open_long_id']).open_long_rn.transform('max')
|
|
293
|
+
dft['exit'] = np.where((dft.open_long_id == max_id) & (dft.max_internal_rn < days_strategy) & (dft.max_internal_rn == dft.open_long_rn), 1, dft['exit'])
|
|
294
|
+
|
|
295
|
+
dft['exit_step'] = np.where(dft.exit == 1, dft.open_long_rn, np.nan)
|
|
296
|
+
dft['exit_step'] = dft.sort_values(['Date']).groupby(['open_long_id']).exit_step.transform('max')
|
|
297
|
+
|
|
298
|
+
dft['flag'] = np.where(dft.open_long_rn <= dft.exit_step, 1, 0)
|
|
299
|
+
|
|
300
|
+
dft['lrets_strat'] = np.log(dft[asset_1].shift(-1)/dft[asset_1]) * dft['flag']
|
|
301
|
+
dft['lrets_strat'] = np.where(dft['lrets_strat'].isna(),-0.0,dft['lrets_strat'])
|
|
302
|
+
dft['lrets_prod'] = dft['lrets_strat'].cumsum()
|
|
303
|
+
dft['strat_prod_exp'] = np.exp(dft['lrets_prod']) - 1
|
|
304
|
+
|
|
305
|
+
bench_rets = round(dft['bench_prod_exp'].values[-1]*100,1)
|
|
306
|
+
strat_rets = round(dft['strat_prod_exp'].values[-1]*100,1)
|
|
307
|
+
|
|
308
|
+
bench_sr = round(sharpe_ratio(dft.bench_prod_exp.dropna()),1)
|
|
309
|
+
strat_sr = round(sharpe_ratio(dft.strat_prod_exp.dropna()),1)
|
|
310
|
+
|
|
311
|
+
message1 = f'{bench_rets}%'
|
|
312
|
+
message2 = f'{strat_rets}%'
|
|
313
|
+
|
|
314
|
+
messages_ = {
|
|
315
|
+
'type strategy':map_[open_in],
|
|
316
|
+
'benchmark return:':message1,
|
|
317
|
+
'benchmark sharpe ratio:': bench_sr,
|
|
318
|
+
'strategy return:':message2,
|
|
319
|
+
'strategy sharpe ratio:': strat_sr,
|
|
320
|
+
}
|
|
321
|
+
messages.append(messages_)
|
|
322
|
+
if self.show_plot:
|
|
323
|
+
print('----------------------------')
|
|
324
|
+
print(messages_)
|
|
325
|
+
print('----------------------------')
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
axs_.plot(dft.bench_prod_exp.values, label = 'benchmark', color = 'steelblue')
|
|
329
|
+
axs_.scatter(range(len(dft)),np.where(dft[low_signal] == 1,dft.bench_prod_exp.values,np.nan),color = 'red', label = 'signal')
|
|
330
|
+
axs_.scatter(range(len(dft)),np.where(dft[up_signal] == 1,dft.bench_prod_exp.values,np.nan),color = 'green', label = 'signal')
|
|
331
|
+
axs_.plot(dft.strat_prod_exp.values, label = 'strategy', color = 'darksalmon')
|
|
332
|
+
axs_.set_xlabel("index")
|
|
333
|
+
axs_.set_ylabel("comulative return")
|
|
334
|
+
axs_.set_title(f'{map_[open_in]} strategy and cumulative returns based on signals')
|
|
335
|
+
axs_.legend()
|
|
336
|
+
|
|
337
|
+
if self.show_plot:
|
|
338
|
+
plt.plot()
|
|
339
|
+
|
|
340
|
+
if self.save_path:
|
|
341
|
+
result_json_name = f'signals_strategy_return_{self.feature_name}.json'
|
|
342
|
+
result_plot_name = f'signals_strategy_return_{self.feature_name}.png'
|
|
343
|
+
|
|
344
|
+
plt.savefig(self.save_path+result_plot_name)
|
|
345
|
+
|
|
346
|
+
with open(self.save_path+result_json_name, "w") as outfile:
|
|
347
|
+
json.dump(messages, outfile)
|
|
348
|
+
|
|
349
|
+
if self.save_path and self.save_aws:
|
|
350
|
+
|
|
351
|
+
upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = self.save_aws + result_json_name, input_path = self.save_path + result_json_name, aws_credentials = self.aws_credentials)
|
|
352
|
+
upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = self.save_aws + result_plot_name, input_path = self.save_path + result_plot_name, aws_credentials = self.aws_credentials)
|
|
353
|
+
|
|
354
|
+
if not self.show_plot:
|
|
355
|
+
plt.close()
|
|
356
|
+
|
|
357
|
+
del dft
|
|
358
|
+
|
|
359
|
+
if self.return_fig:
|
|
360
|
+
return fig, messages
|
virgo_modules/src/re_utils.py
CHANGED
|
@@ -1378,7 +1378,8 @@ def extract_data_traintest(object_stock,features_to_search,configs, target_confi
|
|
|
1378
1378
|
arguments_to_use = configs[feature_name]['config_params']
|
|
1379
1379
|
method_to_use = configs[feature_name]['method']
|
|
1380
1380
|
getattr(object_stock, method_to_use)(**arguments_to_use, plot = False, save_features = False)
|
|
1381
|
-
|
|
1381
|
+
if method_to_use not in ['minmax_pricefeature']:
|
|
1382
|
+
object_stock.produce_order_features(feature_name)
|
|
1382
1383
|
# geting targets
|
|
1383
1384
|
object_stock.get_categorical_targets(**target_params_up)
|
|
1384
1385
|
object_stock.df = object_stock.df.drop(columns = ['target_down']).rename(columns = {'target_up':'target_up_save'})
|
|
@@ -250,25 +250,6 @@ class FeaturesEntropy(BaseEstimator, TransformerMixin):
|
|
|
250
250
|
X[self.feature_name] = X[self.feature_name].fillna(self.default_null)
|
|
251
251
|
return X
|
|
252
252
|
|
|
253
|
-
def sharpe_ratio(return_series):
|
|
254
|
-
|
|
255
|
-
'''
|
|
256
|
-
calculate sharpe ratio for given array.
|
|
257
|
-
|
|
258
|
-
Parameters:
|
|
259
|
-
return_series (pd.series): pandas series of the asset returns
|
|
260
|
-
|
|
261
|
-
Returns:
|
|
262
|
-
sharpe (float): sharpe ratio
|
|
263
|
-
'''
|
|
264
|
-
|
|
265
|
-
N = 255 # Trading days in the year (change to 365 for crypto)
|
|
266
|
-
rf = 0.005 # Half a percent risk free rare
|
|
267
|
-
mean = return_series.mean() * N -rf
|
|
268
|
-
sigma = return_series.std() * np.sqrt(N)
|
|
269
|
-
sharpe = round(mean / sigma, 3)
|
|
270
|
-
return sharpe
|
|
271
|
-
|
|
272
253
|
class signal_combiner(BaseEstimator, TransformerMixin):
|
|
273
254
|
|
|
274
255
|
"""
|
|
@@ -1803,7 +1784,7 @@ class stock_eda_panel(object):
|
|
|
1803
1784
|
if plot:
|
|
1804
1785
|
self.signal_plotter(feature_name)
|
|
1805
1786
|
|
|
1806
|
-
def minmax_pricefeature(self, type_func, window, distance = False, save_features = False):
|
|
1787
|
+
def minmax_pricefeature(self, type_func, window, distance = False, plot = False, save_features = False):
|
|
1807
1788
|
"""
|
|
1808
1789
|
perform relative price/distance with respect to the min/max price in a given time scope
|
|
1809
1790
|
|
|
@@ -2809,103 +2790,453 @@ class hmm_feature_selector():
|
|
|
2809
2790
|
self.feature_results = feature_results
|
|
2810
2791
|
self.best_features = pd.DataFrame(self.feature_results).T.sort_values('mean relevance').iloc[-1,:].features
|
|
2811
2792
|
|
|
2812
|
-
|
|
2793
|
+
def execute_signal_analyser(test_data_size, feature_name, days_list, configuration, method, object_stock, signal_analyser_object, plot = False, backtest= False, exit_params = {}):
|
|
2794
|
+
'''
|
|
2795
|
+
code snippet that is going run some objects. The analysis is signal analyse which is backtesting
|
|
2796
|
+
|
|
2797
|
+
Parameters:
|
|
2798
|
+
test_data_size (int): test data size
|
|
2799
|
+
feature_name (str): name of the feature to assess
|
|
2800
|
+
days_list (list): tome scope to assess the returns
|
|
2801
|
+
configuration (dict): parameters of the method to run
|
|
2802
|
+
object_stock (obj): object with data to assess
|
|
2803
|
+
signal_analyser_object (obj): signal_analyser object
|
|
2804
|
+
plot (boolean): if true, plot results
|
|
2805
|
+
backtest (boolean): if true, run backtest
|
|
2806
|
+
exit_params (dict): parameters of exit returns
|
|
2807
|
+
|
|
2808
|
+
Returns:
|
|
2809
|
+
mean_median_return (float): median return of the backtests
|
|
2810
|
+
'''
|
|
2811
|
+
method(**configuration)
|
|
2812
|
+
signal_assess = signal_analyser_object(object_stock.df,object_stock.stock_code,show_plot = plot)
|
|
2813
|
+
signal_assess.signal_analyser(test_size = test_data_size, feature_name = feature_name, days_list = days_list, threshold = 1)
|
|
2814
|
+
|
|
2815
|
+
if backtest:
|
|
2816
|
+
print('-----------------------back test ---------------------------')
|
|
2817
|
+
signal_assess.create_backtest_signal(backtest, test_data_size, feature_name, **exit_params )
|
|
2818
|
+
|
|
2819
|
+
return signal_assess.mean_median_return
|
|
2820
|
+
|
|
2821
|
+
def iterate_signal_analyser(test_data_size,feature_name, days_list, arguments_to_test, method, object_stock, signal_analyser_object, plot = True):
|
|
2822
|
+
'''
|
|
2823
|
+
code snippet is going to iterate signal analyser
|
|
2824
|
+
|
|
2825
|
+
Parameters:
|
|
2826
|
+
test_data_size (int): test data size
|
|
2827
|
+
feature_name (str): name of the feature to assess
|
|
2828
|
+
days_list (list): tome scope to assess the returns
|
|
2829
|
+
arguments_to_test: parameters to test
|
|
2830
|
+
method: methods to run
|
|
2831
|
+
object_stock (obj): object with data to assess
|
|
2832
|
+
signal_analyser_object (obj): signal_analyser object
|
|
2833
|
+
plot (boolean): if true, plot results
|
|
2834
|
+
|
|
2835
|
+
Returns:
|
|
2836
|
+
best_result (int): index from the arguments_to_test with the best result
|
|
2837
|
+
'''
|
|
2838
|
+
results = list()
|
|
2839
|
+
for key in arguments_to_test.keys():
|
|
2840
|
+
configuration = arguments_to_test.get(key)
|
|
2841
|
+
mean_median_return = execute_signal_analyser(test_data_size, feature_name, days_list, configuration, method, object_stock, signal_analyser_object)
|
|
2842
|
+
results.append(mean_median_return)
|
|
2843
|
+
|
|
2844
|
+
df_result = pd.DataFrame({'keys':arguments_to_test.keys(),'results':results})
|
|
2845
|
+
if plot:
|
|
2846
|
+
plt.plot(df_result['keys'], df_result['results'])
|
|
2847
|
+
plt.scatter(df_result['keys'], df_result['results'])
|
|
2848
|
+
plt.title('simulation between configurations')
|
|
2849
|
+
plt.ylabel('median expected return')
|
|
2850
|
+
plt.show()
|
|
2851
|
+
|
|
2852
|
+
best_result = df_result.sort_values('results',ascending = False)['keys'].values[0]
|
|
2853
|
+
return best_result
|
|
2854
|
+
|
|
2855
|
+
class analyse_index(stock_eda_panel):
|
|
2813
2856
|
"""
|
|
2814
|
-
class that is going to
|
|
2857
|
+
class that is going to train hmm models to perform feature selection
|
|
2815
2858
|
|
|
2816
2859
|
Attributes
|
|
2817
2860
|
----------
|
|
2818
|
-
|
|
2819
|
-
|
|
2820
|
-
|
|
2821
|
-
|
|
2822
|
-
|
|
2823
|
-
|
|
2861
|
+
data_index : pd.DataFrame
|
|
2862
|
+
name of the index
|
|
2863
|
+
indexes: list
|
|
2864
|
+
list of indexes
|
|
2865
|
+
asset : str
|
|
2866
|
+
name of the asset
|
|
2867
|
+
n_obs : int
|
|
2868
|
+
number of rows to extract
|
|
2869
|
+
lag : int
|
|
2870
|
+
lag to apply
|
|
2871
|
+
data_window : str
|
|
2872
|
+
5y 10y 15y
|
|
2873
|
+
show_plot : bool
|
|
2874
|
+
If True, show plots
|
|
2824
2875
|
save_path : str
|
|
2825
|
-
|
|
2876
|
+
local path for saving e.g r'C:/path/to/the/file/'
|
|
2826
2877
|
save_aws : str
|
|
2827
|
-
|
|
2878
|
+
remote key in s3 bucket path e.g. 'path/to/file/'
|
|
2828
2879
|
aws_credentials : dict
|
|
2829
|
-
|
|
2830
|
-
|
|
2831
|
-
|
|
2832
|
-
|
|
2833
|
-
|
|
2834
|
-
|
|
2880
|
+
dict with the aws credentials
|
|
2881
|
+
merger_df : pd.DataFrame
|
|
2882
|
+
dataframe with the index and asset data
|
|
2883
|
+
states_result = dict
|
|
2884
|
+
betas and correlation score results
|
|
2885
|
+
|
|
2835
2886
|
Methods
|
|
2836
2887
|
-------
|
|
2837
|
-
|
|
2838
|
-
|
|
2839
|
-
|
|
2888
|
+
process_data():
|
|
2889
|
+
using stock_eda_panel, get data and merge data
|
|
2890
|
+
plot_betas(sample_size=int, offset=int, subsample_ts=int):
|
|
2891
|
+
display beta analysis plot
|
|
2892
|
+
get_betas(subsample_ts=int)
|
|
2893
|
+
get general beta and last sample beta, correlation score is included too
|
|
2840
2894
|
"""
|
|
2841
|
-
|
|
2842
|
-
def __init__(self, data,symbol_name, show_plot = True, save_path = False, save_aws = False, aws_credentials = False, return_fig = False):
|
|
2895
|
+
def __init__(self, index_data, asset, n_obs, lag, data_window = '5y', show_plot = False, save_path = False, save_aws = False, aws_credentials = False, return_fig = False):
|
|
2843
2896
|
"""
|
|
2844
2897
|
Initialize object
|
|
2845
2898
|
|
|
2846
2899
|
Parameters
|
|
2847
2900
|
----------
|
|
2848
|
-
|
|
2849
|
-
|
|
2850
|
-
|
|
2851
|
-
|
|
2852
|
-
|
|
2853
|
-
|
|
2854
|
-
|
|
2901
|
+
index_data (pd.DataFrame or str): index data dataframe or index string
|
|
2902
|
+
asset (str): name of the asset
|
|
2903
|
+
n_obs (int): number of rows to extract
|
|
2904
|
+
lag (int): lag to apply
|
|
2905
|
+
data_window (str): 5y 10y 15y
|
|
2906
|
+
show_plot (bool): If True, show plots
|
|
2907
|
+
save_path (str): local path for saving e.g r'C:/path/to/the/file/'
|
|
2908
|
+
save_aws (str): remote key in s3 bucket path e.g. 'path/to/file/'
|
|
2909
|
+
aws_credentials (dict): dict with the aws credentials
|
|
2855
2910
|
|
|
2856
2911
|
Returns
|
|
2857
2912
|
-------
|
|
2858
2913
|
None
|
|
2859
2914
|
"""
|
|
2860
|
-
|
|
2861
|
-
|
|
2915
|
+
|
|
2916
|
+
|
|
2917
|
+
if type(index_data) != str:
|
|
2918
|
+
index_data['Date'] = pd.to_datetime(index_data['Date'])
|
|
2919
|
+
self.index_data = index_data
|
|
2920
|
+
self.indexes = [ x for x in list(index_data.columns) if x != 'Date']
|
|
2921
|
+
else:
|
|
2922
|
+
self.indexes = [index_data]
|
|
2923
|
+
|
|
2924
|
+
self.index_data = index_data
|
|
2925
|
+
self.asset = asset
|
|
2926
|
+
self.n_obs = n_obs
|
|
2927
|
+
self.data_window = data_window
|
|
2928
|
+
self.lag = lag
|
|
2929
|
+
|
|
2862
2930
|
self.show_plot = show_plot
|
|
2931
|
+
self.return_fig = return_fig
|
|
2863
2932
|
self.save_path = save_path
|
|
2864
2933
|
self.save_aws = save_aws
|
|
2865
|
-
self.aws_credentials = aws_credentials
|
|
2866
|
-
self.return_fig = return_fig
|
|
2867
2934
|
|
|
2868
|
-
def
|
|
2935
|
+
def process_data(self):
|
|
2869
2936
|
"""
|
|
2870
|
-
|
|
2937
|
+
using stock_eda_panel, get data and merge data
|
|
2938
|
+
|
|
2939
|
+
Parameters
|
|
2940
|
+
----------
|
|
2941
|
+
None
|
|
2942
|
+
|
|
2943
|
+
Returns
|
|
2944
|
+
-------
|
|
2945
|
+
None
|
|
2946
|
+
"""
|
|
2947
|
+
asset = stock_eda_panel(self.asset, self.n_obs, data_window=self.data_window)
|
|
2948
|
+
asset.get_data()
|
|
2949
|
+
df = asset.df[['Date','Close']]
|
|
2950
|
+
|
|
2951
|
+
if type(self.index_data) != str:
|
|
2952
|
+
df_merge = df.merge(self.index_data, on = ['Date'], how = 'left').sort_values('Date')
|
|
2953
|
+
|
|
2954
|
+
else:
|
|
2955
|
+
indx = stock_eda_panel(self.index_data, self.n_obs, data_window=self.data_window)
|
|
2956
|
+
indx.get_data()
|
|
2957
|
+
indx_df = indx.df[['Date','Close']].rename(columns = {'Close':self.index_data})
|
|
2958
|
+
df_merge = df.merge(indx_df, on = ['Date'], how = 'left').sort_values('Date')
|
|
2959
|
+
|
|
2960
|
+
for colx in ['Close'] + self.indexes:
|
|
2961
|
+
df_merge[f'{colx}_pct'] = df_merge[colx]/df_merge[colx].shift(self.lag) - 1
|
|
2962
|
+
|
|
2963
|
+
df_merge.dropna(inplace = True)
|
|
2964
|
+
self.merger_df = df_merge.rename(columns = {'Close_pct': 'asset_return'})
|
|
2965
|
+
|
|
2966
|
+
def plot_betas(self,sample_size, offset, subsample_ts =False, index = False):
|
|
2967
|
+
"""
|
|
2968
|
+
display beta analysis plot
|
|
2871
2969
|
|
|
2872
2970
|
Parameters
|
|
2873
2971
|
----------
|
|
2874
|
-
|
|
2875
|
-
|
|
2876
|
-
|
|
2877
|
-
threshold (float): alpha or z threshold
|
|
2878
|
-
verbose (boolean): print metrics
|
|
2879
|
-
signal_position (int): if true, the signal is taken at the given step after the signal end
|
|
2972
|
+
sample_size (int): number of days or window size to calculate beta
|
|
2973
|
+
offset (int): overlap between windows
|
|
2974
|
+
subsample_ts (int): subsample size of data
|
|
2880
2975
|
|
|
2881
2976
|
Returns
|
|
2882
2977
|
-------
|
|
2883
2978
|
None
|
|
2884
2979
|
"""
|
|
2885
|
-
|
|
2886
|
-
|
|
2887
|
-
|
|
2888
|
-
|
|
2980
|
+
if (type(self.index_data) == str) & (index != False):
|
|
2981
|
+
raise Exception("No need of index argument")
|
|
2982
|
+
else:
|
|
2983
|
+
index = self.indexes[0]
|
|
2984
|
+
|
|
2985
|
+
index_pct = f'{index}_pct'
|
|
2986
|
+
### ploting analysis
|
|
2987
|
+
figure, ax = plt.subplot_mosaic(
|
|
2988
|
+
[["scatter_total", "scatter_sample",'ts','ts']],
|
|
2989
|
+
layout="constrained",
|
|
2990
|
+
figsize=(18, 5)
|
|
2991
|
+
)
|
|
2889
2992
|
|
|
2890
|
-
|
|
2891
|
-
|
|
2993
|
+
ax['scatter_total'].scatter(self.merger_df.asset_return, self.merger_df[index_pct])
|
|
2994
|
+
|
|
2995
|
+
huber_regr = HuberRegressor(fit_intercept = True)
|
|
2996
|
+
huber_regr.fit(self.merger_df.asset_return.values.reshape(-1,1), self.merger_df[index_pct].values.reshape(-1,1))
|
|
2997
|
+
b, a = huber_regr.coef_[0], huber_regr.intercept_
|
|
2998
|
+
|
|
2999
|
+
# b, a = np.polyfit(self.merger_df.asset_return, self.merger_df[index_pct], 1)
|
|
3000
|
+
ax['scatter_total'].plot(self.merger_df.asset_return, b*self.merger_df.asset_return+a, color='red')
|
|
2892
3001
|
|
|
2893
|
-
|
|
3002
|
+
ax['ts'].plot(self.merger_df.Date, self.merger_df.Close, color = 'grey', alpha = 0.3)
|
|
2894
3003
|
|
|
2895
|
-
|
|
2896
|
-
|
|
2897
|
-
df[feature_] = (df['Close'].shift(-days)/df['Close']-1)*100
|
|
2898
|
-
returns_list.append(feature_)
|
|
3004
|
+
if subsample_ts:
|
|
3005
|
+
self.merger_df = self.merger_df.iloc[-subsample_ts:,:].dropna()
|
|
2899
3006
|
|
|
2900
|
-
|
|
2901
|
-
|
|
2902
|
-
'
|
|
2903
|
-
|
|
2904
|
-
|
|
2905
|
-
|
|
2906
|
-
|
|
2907
|
-
)
|
|
2908
|
-
|
|
3007
|
+
for i in range(0,len(self.merger_df)-sample_size,offset):
|
|
3008
|
+
|
|
3009
|
+
merger_ = self.merger_df.sort_values('Date', ascending = False).iloc[i:i+sample_size,:]
|
|
3010
|
+
x = merger_[index_pct]
|
|
3011
|
+
y = merger_.asset_return
|
|
3012
|
+
# b, a = np.polyfit(x,y, 1)
|
|
3013
|
+
huber_regr = HuberRegressor(fit_intercept = True)
|
|
3014
|
+
huber_regr.fit(x.values.reshape(-1,1), y.values.reshape(-1,1))
|
|
3015
|
+
b, a = huber_regr.coef_[0], huber_regr.intercept_
|
|
3016
|
+
|
|
3017
|
+
normalize = mcolors.Normalize(vmin=-1, vmax=1)
|
|
3018
|
+
colormap = cm.jet
|
|
3019
|
+
|
|
3020
|
+
ax['scatter_sample'].plot(x, y,'o', color = 'blue', alpha = 0.1)
|
|
3021
|
+
ax['scatter_sample'].plot(x, b*x+a, color=colormap(normalize(b)))
|
|
3022
|
+
ax['scatter_sample'].set_xlim(-0.06, 0.06)
|
|
3023
|
+
ax['scatter_sample'].set_ylim(-0.06, 0.06)
|
|
3024
|
+
|
|
3025
|
+
plot = ax['ts'].scatter(merger_.Date, merger_.Close, color=colormap(normalize(b)), s = 10)
|
|
3026
|
+
|
|
3027
|
+
scalarmappaple = cm.ScalarMappable(norm=normalize, cmap=colormap)
|
|
3028
|
+
scalarmappaple.set_array(x)
|
|
3029
|
+
|
|
3030
|
+
plt.title(f'{self.asset} using index: {index}')
|
|
3031
|
+
plt.colorbar(scalarmappaple)
|
|
3032
|
+
|
|
3033
|
+
if self.show_plot:
|
|
3034
|
+
plt.show()
|
|
3035
|
+
|
|
3036
|
+
if self.save_path:
|
|
3037
|
+
result_plot_name = f'market_best_fit.png'
|
|
3038
|
+
figure.savefig(self.save_path+result_plot_name)
|
|
3039
|
+
|
|
3040
|
+
if self.save_path and self.save_aws:
|
|
3041
|
+
# upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = f'market_plots/{self.asset}/'+result_plot_name,input_path = self.save_path+result_plot_name)
|
|
3042
|
+
upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = self.save_aws + result_plot_name, input_path = self.save_path + result_plot_name, aws_credentials = self.aws_credentials)
|
|
3043
|
+
|
|
3044
|
+
if not self.show_plot:
|
|
3045
|
+
plt.close()
|
|
3046
|
+
|
|
3047
|
+
if self.return_fig:
|
|
3048
|
+
return figure
|
|
3049
|
+
|
|
3050
|
+
def get_betas(self,subsample_ts=False):
|
|
3051
|
+
"""
|
|
3052
|
+
get general beta and last sample beta, correlation score is included too
|
|
3053
|
+
|
|
3054
|
+
Parameters
|
|
3055
|
+
----------
|
|
3056
|
+
subsample_ts (int): subsample size of data
|
|
3057
|
+
|
|
3058
|
+
Returns
|
|
3059
|
+
-------
|
|
3060
|
+
None
|
|
3061
|
+
"""
|
|
3062
|
+
result = list()
|
|
3063
|
+
for index in self.indexes:
|
|
3064
|
+
|
|
3065
|
+
index_pct = f'{index}_pct'
|
|
3066
|
+
huber_regr = HuberRegressor(fit_intercept = True)
|
|
3067
|
+
huber_regr.fit(self.merger_df.asset_return.values.reshape(-1,1), self.merger_df[index_pct].values.reshape(-1,1))
|
|
3068
|
+
general_beta, a = huber_regr.coef_[0], huber_regr.intercept_
|
|
3069
|
+
general_r = stats.mstats.pearsonr(self.merger_df.asset_return, self.merger_df[index])[0]
|
|
3070
|
+
|
|
3071
|
+
dict_res = {
|
|
3072
|
+
'index':index,
|
|
3073
|
+
'general_beta':general_beta,
|
|
3074
|
+
'general_r':general_r,
|
|
3075
|
+
}
|
|
3076
|
+
|
|
3077
|
+
if subsample_ts:
|
|
3078
|
+
tmp_df = self.merger_df.iloc[-subsample_ts:,:].dropna()
|
|
3079
|
+
huber_regr = HuberRegressor(fit_intercept = True)
|
|
3080
|
+
huber_regr.fit(tmp_df.asset_return.values.reshape(-1,1), tmp_df[index_pct].values.reshape(-1,1))
|
|
3081
|
+
sample_beta, a = huber_regr.coef_[0], huber_regr.intercept_
|
|
3082
|
+
sample_r = stats.mstats.pearsonr(tmp_df.asset_return, tmp_df[index])[0]
|
|
3083
|
+
dict_res['sample_beta'] = sample_beta
|
|
3084
|
+
dict_res['sample_r'] = sample_r
|
|
3085
|
+
|
|
3086
|
+
result.append(dict_res)
|
|
3087
|
+
|
|
3088
|
+
self.states_result = result
|
|
3089
|
+
|
|
3090
|
+
|
|
3091
|
+
def get_relevant_beta(data_market, ticket_name, show_plot = True, save_path = False, save_aws = False, aws_credentials = False):
|
|
3092
|
+
'''
|
|
3093
|
+
select relevant beta result data of a given asset
|
|
3094
|
+
|
|
3095
|
+
Parameters:
|
|
3096
|
+
data_market (pd.DataFrame): dataframe of the market results
|
|
3097
|
+
ticket_name (str): name of the asset
|
|
3098
|
+
show_plot (bool): If tru, plot results
|
|
3099
|
+
save_path (str): local path for saving e.g r'C:/path/to/the/file/'
|
|
3100
|
+
save_aws (str): remote key in s3 bucket path e.g. 'path/to/file/'
|
|
3101
|
+
aws_credentials (dict): dict of the aws credentials
|
|
3102
|
+
|
|
3103
|
+
Returns:
|
|
3104
|
+
selection (pd.DataFrame): dataframe of the most relevant beta
|
|
3105
|
+
'''
|
|
3106
|
+
all_betas = data_market[data_market.asset == ticket_name].sort_values('general_r', ascending = False)
|
|
3107
|
+
all_betas['gen_r2'] = all_betas.general_r ** 2
|
|
3108
|
+
all_betas['sampl_r2'] = all_betas.sample_r ** 2
|
|
3109
|
+
selection = all_betas.sort_values('gen_r2',ascending =False).head(2).sort_values('sampl_r2',ascending =False).head(1).drop(columns = ['gen_r2','sampl_r2'])
|
|
3110
|
+
|
|
3111
|
+
if show_plot:
|
|
3112
|
+
print(selection)
|
|
3113
|
+
if save_path:
|
|
3114
|
+
result_plot_name = f'market_best_fit.csv'
|
|
3115
|
+
selection.to_csv(save_path+result_plot_name)
|
|
3116
|
+
|
|
3117
|
+
if save_path and save_aws:
|
|
3118
|
+
# upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = f'market_plots/{ticket_name}/'+result_plot_name,input_path = save_path+result_plot_name)
|
|
3119
|
+
upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = save_aws + result_plot_name, input_path = save_path + result_plot_name, aws_credentials = aws_credentials)
|
|
3120
|
+
return selection
|
|
3121
|
+
|
|
3122
|
+
#### to deprecate
|
|
3123
|
+
|
|
3124
|
+
def sharpe_ratio(return_series):
|
|
3125
|
+
|
|
3126
|
+
'''
|
|
3127
|
+
calculate sharpe ratio for given array.
|
|
3128
|
+
|
|
3129
|
+
Parameters:
|
|
3130
|
+
return_series (pd.series): pandas series of the asset returns
|
|
3131
|
+
|
|
3132
|
+
Returns:
|
|
3133
|
+
sharpe (float): sharpe ratio
|
|
3134
|
+
'''
|
|
3135
|
+
|
|
3136
|
+
N = 255 # Trading days in the year (change to 365 for crypto)
|
|
3137
|
+
rf = 0.005 # Half a percent risk free rare
|
|
3138
|
+
mean = return_series.mean() * N -rf
|
|
3139
|
+
sigma = return_series.std() * np.sqrt(N)
|
|
3140
|
+
sharpe = round(mean / sigma, 3)
|
|
3141
|
+
return sharpe
|
|
3142
|
+
|
|
3143
|
+
class signal_analyser_object:
|
|
3144
|
+
"""
|
|
3145
|
+
class that is going to analyse signals
|
|
3146
|
+
|
|
3147
|
+
Attributes
|
|
3148
|
+
----------
|
|
3149
|
+
data : pd.DataFrame
|
|
3150
|
+
symbol of the asset
|
|
3151
|
+
ticket_name :str
|
|
3152
|
+
asset symbol
|
|
3153
|
+
show_plot : boolean
|
|
3154
|
+
if true show plot for every method
|
|
3155
|
+
save_path : str
|
|
3156
|
+
if true, save results in file
|
|
3157
|
+
save_aws : str
|
|
3158
|
+
if true, export results to remote repo
|
|
3159
|
+
aws_credentials : dict
|
|
3160
|
+
credentials for aws
|
|
3161
|
+
return_fig : boolean
|
|
3162
|
+
if true, methods will return objects
|
|
3163
|
+
create_backtest_signal(days_strategy=list, test_size=int, feature_name=str, high_exit=float, low_exit=float):
|
|
3164
|
+
perform backtest signal analysis
|
|
3165
|
+
|
|
3166
|
+
Methods
|
|
3167
|
+
-------
|
|
3168
|
+
signal_analyser(test_size=int, feature_name=str, days_list=list, threshold=float,verbose=boolean, signal_position=boolean):
|
|
3169
|
+
perform signal analysis and feature extraction
|
|
3170
|
+
|
|
3171
|
+
"""
|
|
3172
|
+
|
|
3173
|
+
def __init__(self, data,symbol_name, show_plot = True, save_path = False, save_aws = False, aws_credentials = False, return_fig = False):
|
|
3174
|
+
"""
|
|
3175
|
+
Initialize object
|
|
3176
|
+
|
|
3177
|
+
Parameters
|
|
3178
|
+
----------
|
|
3179
|
+
data (pd.DataFrame): data
|
|
3180
|
+
ticket_name (str): name of the asset
|
|
3181
|
+
show_plot (boolean): if true show plot for every method
|
|
3182
|
+
save_path (str): if true, save results in file e.g r'C:/path/to/the/file/'
|
|
3183
|
+
save_aws (str): if true, export results to remote repo e.g. 'path/to/file/'
|
|
3184
|
+
aws_credentials (dict): credentials for aws
|
|
3185
|
+
return_fig (boolean): if true, methods will return objects
|
|
3186
|
+
|
|
3187
|
+
Returns
|
|
3188
|
+
-------
|
|
3189
|
+
None
|
|
3190
|
+
"""
|
|
3191
|
+
self.data = data.copy()
|
|
3192
|
+
self.ticket_name = symbol_name
|
|
3193
|
+
self.show_plot = show_plot
|
|
3194
|
+
self.save_path = save_path
|
|
3195
|
+
self.save_aws = save_aws
|
|
3196
|
+
self.aws_credentials = aws_credentials
|
|
3197
|
+
self.return_fig = return_fig
|
|
3198
|
+
|
|
3199
|
+
def signal_analyser(self, test_size, feature_name, days_list, threshold = 0.05,verbose = False, signal_position = False):
|
|
3200
|
+
"""
|
|
3201
|
+
perform signal analysis and feature extraction
|
|
3202
|
+
|
|
3203
|
+
Parameters
|
|
3204
|
+
----------
|
|
3205
|
+
test_size (int): test data size
|
|
3206
|
+
feature_name (str): name of the feature to assess
|
|
3207
|
+
days_list (list): list of integers [3,8,10] to assess
|
|
3208
|
+
threshold (float): alpha or z threshold
|
|
3209
|
+
verbose (boolean): print metrics
|
|
3210
|
+
signal_position (int): if true, the signal is taken at the given step after the signal end
|
|
3211
|
+
|
|
3212
|
+
Returns
|
|
3213
|
+
-------
|
|
3214
|
+
None
|
|
3215
|
+
"""
|
|
3216
|
+
data = self.data
|
|
3217
|
+
self.feature_name = feature_name
|
|
3218
|
+
up_signal, low_signal= f'signal_up_{feature_name}', f'signal_low_{feature_name}'
|
|
3219
|
+
features_base = ['Date', up_signal, low_signal, 'Close']
|
|
3220
|
+
|
|
3221
|
+
df = data[features_base].sort_values('Date').iloc[0:-test_size,:]
|
|
3222
|
+
returns_list = list()
|
|
3223
|
+
|
|
3224
|
+
for days in days_list:
|
|
3225
|
+
|
|
3226
|
+
feature_ = f'return_{days}d'
|
|
3227
|
+
days = days + signal_position if signal_position else days
|
|
3228
|
+
df[feature_] = (df['Close'].shift(-days)/df['Close']-1)*100
|
|
3229
|
+
returns_list.append(feature_)
|
|
3230
|
+
|
|
3231
|
+
df['signal_type'] = np.where(
|
|
3232
|
+
df[up_signal] == 1,
|
|
3233
|
+
'up',
|
|
3234
|
+
np.where(
|
|
3235
|
+
df[low_signal] == 1,
|
|
3236
|
+
'down',
|
|
3237
|
+
None
|
|
3238
|
+
)
|
|
3239
|
+
)
|
|
2909
3240
|
df = df[~df.signal_type.isna()]
|
|
2910
3241
|
df['lag_Date'] = df['Date'].shift(1)
|
|
2911
3242
|
df['lag_signal_type'] = df['signal_type'].shift(1)
|
|
@@ -3173,332 +3504,3 @@ class signal_analyser_object:
|
|
|
3173
3504
|
|
|
3174
3505
|
if self.return_fig:
|
|
3175
3506
|
return fig, messages
|
|
3176
|
-
|
|
3177
|
-
def execute_signal_analyser(test_data_size, feature_name, days_list, configuration, method, object_stock, signal_analyser_object, plot = False, backtest= False, exit_params = {}):
|
|
3178
|
-
'''
|
|
3179
|
-
code snippet that is going run some objects. The analysis is signal analyse which is backtesting
|
|
3180
|
-
|
|
3181
|
-
Parameters:
|
|
3182
|
-
test_data_size (int): test data size
|
|
3183
|
-
feature_name (str): name of the feature to assess
|
|
3184
|
-
days_list (list): tome scope to assess the returns
|
|
3185
|
-
configuration (dict): parameters of the method to run
|
|
3186
|
-
object_stock (obj): object with data to assess
|
|
3187
|
-
signal_analyser_object (obj): signal_analyser object
|
|
3188
|
-
plot (boolean): if true, plot results
|
|
3189
|
-
backtest (boolean): if true, run backtest
|
|
3190
|
-
exit_params (dict): parameters of exit returns
|
|
3191
|
-
|
|
3192
|
-
Returns:
|
|
3193
|
-
mean_median_return (float): median return of the backtests
|
|
3194
|
-
'''
|
|
3195
|
-
method(**configuration)
|
|
3196
|
-
signal_assess = signal_analyser_object(object_stock.df,object_stock.stock_code,show_plot = plot)
|
|
3197
|
-
signal_assess.signal_analyser(test_size = test_data_size, feature_name = feature_name, days_list = days_list, threshold = 1)
|
|
3198
|
-
|
|
3199
|
-
if backtest:
|
|
3200
|
-
print('-----------------------back test ---------------------------')
|
|
3201
|
-
signal_assess.create_backtest_signal(backtest, test_data_size, feature_name, **exit_params )
|
|
3202
|
-
|
|
3203
|
-
return signal_assess.mean_median_return
|
|
3204
|
-
|
|
3205
|
-
def iterate_signal_analyser(test_data_size,feature_name, days_list, arguments_to_test, method, object_stock, signal_analyser_object, plot = True):
|
|
3206
|
-
'''
|
|
3207
|
-
code snippet is going to iterate signal analyser
|
|
3208
|
-
|
|
3209
|
-
Parameters:
|
|
3210
|
-
test_data_size (int): test data size
|
|
3211
|
-
feature_name (str): name of the feature to assess
|
|
3212
|
-
days_list (list): tome scope to assess the returns
|
|
3213
|
-
arguments_to_test: parameters to test
|
|
3214
|
-
method: methods to run
|
|
3215
|
-
object_stock (obj): object with data to assess
|
|
3216
|
-
signal_analyser_object (obj): signal_analyser object
|
|
3217
|
-
plot (boolean): if true, plot results
|
|
3218
|
-
|
|
3219
|
-
Returns:
|
|
3220
|
-
best_result (int): index from the arguments_to_test with the best result
|
|
3221
|
-
'''
|
|
3222
|
-
results = list()
|
|
3223
|
-
for key in arguments_to_test.keys():
|
|
3224
|
-
configuration = arguments_to_test.get(key)
|
|
3225
|
-
mean_median_return = execute_signal_analyser(test_data_size, feature_name, days_list, configuration, method, object_stock, signal_analyser_object)
|
|
3226
|
-
results.append(mean_median_return)
|
|
3227
|
-
|
|
3228
|
-
df_result = pd.DataFrame({'keys':arguments_to_test.keys(),'results':results})
|
|
3229
|
-
if plot:
|
|
3230
|
-
plt.plot(df_result['keys'], df_result['results'])
|
|
3231
|
-
plt.scatter(df_result['keys'], df_result['results'])
|
|
3232
|
-
plt.title('simulation between configurations')
|
|
3233
|
-
plt.ylabel('median expected return')
|
|
3234
|
-
plt.show()
|
|
3235
|
-
|
|
3236
|
-
best_result = df_result.sort_values('results',ascending = False)['keys'].values[0]
|
|
3237
|
-
return best_result
|
|
3238
|
-
|
|
3239
|
-
class analyse_index(stock_eda_panel):
|
|
3240
|
-
"""
|
|
3241
|
-
class that is going to train hmm models to perform feature selection
|
|
3242
|
-
|
|
3243
|
-
Attributes
|
|
3244
|
-
----------
|
|
3245
|
-
data_index : pd.DataFrame
|
|
3246
|
-
name of the index
|
|
3247
|
-
indexes: list
|
|
3248
|
-
list of indexes
|
|
3249
|
-
asset : str
|
|
3250
|
-
name of the asset
|
|
3251
|
-
n_obs : int
|
|
3252
|
-
number of rows to extract
|
|
3253
|
-
lag : int
|
|
3254
|
-
lag to apply
|
|
3255
|
-
data_window : str
|
|
3256
|
-
5y 10y 15y
|
|
3257
|
-
show_plot : bool
|
|
3258
|
-
If True, show plots
|
|
3259
|
-
save_path : str
|
|
3260
|
-
local path for saving e.g r'C:/path/to/the/file/'
|
|
3261
|
-
save_aws : str
|
|
3262
|
-
remote key in s3 bucket path e.g. 'path/to/file/'
|
|
3263
|
-
aws_credentials : dict
|
|
3264
|
-
dict with the aws credentials
|
|
3265
|
-
merger_df : pd.DataFrame
|
|
3266
|
-
dataframe with the index and asset data
|
|
3267
|
-
states_result = dict
|
|
3268
|
-
betas and correlation score results
|
|
3269
|
-
|
|
3270
|
-
Methods
|
|
3271
|
-
-------
|
|
3272
|
-
process_data():
|
|
3273
|
-
using stock_eda_panel, get data and merge data
|
|
3274
|
-
plot_betas(sample_size=int, offset=int, subsample_ts=int):
|
|
3275
|
-
display beta analysis plot
|
|
3276
|
-
get_betas(subsample_ts=int)
|
|
3277
|
-
get general beta and last sample beta, correlation score is included too
|
|
3278
|
-
"""
|
|
3279
|
-
def __init__(self, index_data, asset, n_obs, lag, data_window = '5y', show_plot = False, save_path = False, save_aws = False, aws_credentials = False, return_fig = False):
|
|
3280
|
-
"""
|
|
3281
|
-
Initialize object
|
|
3282
|
-
|
|
3283
|
-
Parameters
|
|
3284
|
-
----------
|
|
3285
|
-
index_data (pd.DataFrame or str): index data dataframe or index string
|
|
3286
|
-
asset (str): name of the asset
|
|
3287
|
-
n_obs (int): number of rows to extract
|
|
3288
|
-
lag (int): lag to apply
|
|
3289
|
-
data_window (str): 5y 10y 15y
|
|
3290
|
-
show_plot (bool): If True, show plots
|
|
3291
|
-
save_path (str): local path for saving e.g r'C:/path/to/the/file/'
|
|
3292
|
-
save_aws (str): remote key in s3 bucket path e.g. 'path/to/file/'
|
|
3293
|
-
aws_credentials (dict): dict with the aws credentials
|
|
3294
|
-
|
|
3295
|
-
Returns
|
|
3296
|
-
-------
|
|
3297
|
-
None
|
|
3298
|
-
"""
|
|
3299
|
-
|
|
3300
|
-
|
|
3301
|
-
if type(index_data) != str:
|
|
3302
|
-
index_data['Date'] = pd.to_datetime(index_data['Date'])
|
|
3303
|
-
self.index_data = index_data
|
|
3304
|
-
self.indexes = [ x for x in list(index_data.columns) if x != 'Date']
|
|
3305
|
-
else:
|
|
3306
|
-
self.indexes = [index_data]
|
|
3307
|
-
|
|
3308
|
-
self.index_data = index_data
|
|
3309
|
-
self.asset = asset
|
|
3310
|
-
self.n_obs = n_obs
|
|
3311
|
-
self.data_window = data_window
|
|
3312
|
-
self.lag = lag
|
|
3313
|
-
|
|
3314
|
-
self.show_plot = show_plot
|
|
3315
|
-
self.return_fig = return_fig
|
|
3316
|
-
self.save_path = save_path
|
|
3317
|
-
self.save_aws = save_aws
|
|
3318
|
-
|
|
3319
|
-
def process_data(self):
|
|
3320
|
-
"""
|
|
3321
|
-
using stock_eda_panel, get data and merge data
|
|
3322
|
-
|
|
3323
|
-
Parameters
|
|
3324
|
-
----------
|
|
3325
|
-
None
|
|
3326
|
-
|
|
3327
|
-
Returns
|
|
3328
|
-
-------
|
|
3329
|
-
None
|
|
3330
|
-
"""
|
|
3331
|
-
asset = stock_eda_panel(self.asset, self.n_obs, data_window=self.data_window)
|
|
3332
|
-
asset.get_data()
|
|
3333
|
-
df = asset.df[['Date','Close']]
|
|
3334
|
-
|
|
3335
|
-
if type(self.index_data) != str:
|
|
3336
|
-
df_merge = df.merge(self.index_data, on = ['Date'], how = 'left').sort_values('Date')
|
|
3337
|
-
|
|
3338
|
-
else:
|
|
3339
|
-
indx = stock_eda_panel(self.index_data, self.n_obs, data_window=self.data_window)
|
|
3340
|
-
indx.get_data()
|
|
3341
|
-
indx_df = indx.df[['Date','Close']].rename(columns = {'Close':self.index_data})
|
|
3342
|
-
df_merge = df.merge(indx_df, on = ['Date'], how = 'left').sort_values('Date')
|
|
3343
|
-
|
|
3344
|
-
for colx in ['Close'] + self.indexes:
|
|
3345
|
-
df_merge[f'{colx}_pct'] = df_merge[colx]/df_merge[colx].shift(self.lag) - 1
|
|
3346
|
-
|
|
3347
|
-
df_merge.dropna(inplace = True)
|
|
3348
|
-
self.merger_df = df_merge.rename(columns = {'Close_pct': 'asset_return'})
|
|
3349
|
-
|
|
3350
|
-
def plot_betas(self,sample_size, offset, subsample_ts =False, index = False):
|
|
3351
|
-
"""
|
|
3352
|
-
display beta analysis plot
|
|
3353
|
-
|
|
3354
|
-
Parameters
|
|
3355
|
-
----------
|
|
3356
|
-
sample_size (int): number of days or window size to calculate beta
|
|
3357
|
-
offset (int): overlap between windows
|
|
3358
|
-
subsample_ts (int): subsample size of data
|
|
3359
|
-
|
|
3360
|
-
Returns
|
|
3361
|
-
-------
|
|
3362
|
-
None
|
|
3363
|
-
"""
|
|
3364
|
-
if (type(self.index_data) == str) & (index != False):
|
|
3365
|
-
raise Exception("No need of index argument")
|
|
3366
|
-
else:
|
|
3367
|
-
index = self.indexes[0]
|
|
3368
|
-
|
|
3369
|
-
index_pct = f'{index}_pct'
|
|
3370
|
-
### ploting analysis
|
|
3371
|
-
figure, ax = plt.subplot_mosaic(
|
|
3372
|
-
[["scatter_total", "scatter_sample",'ts','ts']],
|
|
3373
|
-
layout="constrained",
|
|
3374
|
-
figsize=(18, 5)
|
|
3375
|
-
)
|
|
3376
|
-
|
|
3377
|
-
ax['scatter_total'].scatter(self.merger_df.asset_return, self.merger_df[index_pct])
|
|
3378
|
-
|
|
3379
|
-
huber_regr = HuberRegressor(fit_intercept = True)
|
|
3380
|
-
huber_regr.fit(self.merger_df.asset_return.values.reshape(-1,1), self.merger_df[index_pct].values.reshape(-1,1))
|
|
3381
|
-
b, a = huber_regr.coef_[0], huber_regr.intercept_
|
|
3382
|
-
|
|
3383
|
-
# b, a = np.polyfit(self.merger_df.asset_return, self.merger_df[index_pct], 1)
|
|
3384
|
-
ax['scatter_total'].plot(self.merger_df.asset_return, b*self.merger_df.asset_return+a, color='red')
|
|
3385
|
-
|
|
3386
|
-
ax['ts'].plot(self.merger_df.Date, self.merger_df.Close, color = 'grey', alpha = 0.3)
|
|
3387
|
-
|
|
3388
|
-
if subsample_ts:
|
|
3389
|
-
self.merger_df = self.merger_df.iloc[-subsample_ts:,:].dropna()
|
|
3390
|
-
|
|
3391
|
-
for i in range(0,len(self.merger_df)-sample_size,offset):
|
|
3392
|
-
|
|
3393
|
-
merger_ = self.merger_df.sort_values('Date', ascending = False).iloc[i:i+sample_size,:]
|
|
3394
|
-
x = merger_[index_pct]
|
|
3395
|
-
y = merger_.asset_return
|
|
3396
|
-
# b, a = np.polyfit(x,y, 1)
|
|
3397
|
-
huber_regr = HuberRegressor(fit_intercept = True)
|
|
3398
|
-
huber_regr.fit(x.values.reshape(-1,1), y.values.reshape(-1,1))
|
|
3399
|
-
b, a = huber_regr.coef_[0], huber_regr.intercept_
|
|
3400
|
-
|
|
3401
|
-
normalize = mcolors.Normalize(vmin=-1, vmax=1)
|
|
3402
|
-
colormap = cm.jet
|
|
3403
|
-
|
|
3404
|
-
ax['scatter_sample'].plot(x, y,'o', color = 'blue', alpha = 0.1)
|
|
3405
|
-
ax['scatter_sample'].plot(x, b*x+a, color=colormap(normalize(b)))
|
|
3406
|
-
ax['scatter_sample'].set_xlim(-0.06, 0.06)
|
|
3407
|
-
ax['scatter_sample'].set_ylim(-0.06, 0.06)
|
|
3408
|
-
|
|
3409
|
-
plot = ax['ts'].scatter(merger_.Date, merger_.Close, color=colormap(normalize(b)), s = 10)
|
|
3410
|
-
|
|
3411
|
-
scalarmappaple = cm.ScalarMappable(norm=normalize, cmap=colormap)
|
|
3412
|
-
scalarmappaple.set_array(x)
|
|
3413
|
-
|
|
3414
|
-
plt.title(f'{self.asset} using index: {index}')
|
|
3415
|
-
plt.colorbar(scalarmappaple)
|
|
3416
|
-
|
|
3417
|
-
if self.show_plot:
|
|
3418
|
-
plt.show()
|
|
3419
|
-
|
|
3420
|
-
if self.save_path:
|
|
3421
|
-
result_plot_name = f'market_best_fit.png'
|
|
3422
|
-
figure.savefig(self.save_path+result_plot_name)
|
|
3423
|
-
|
|
3424
|
-
if self.save_path and self.save_aws:
|
|
3425
|
-
# upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = f'market_plots/{self.asset}/'+result_plot_name,input_path = self.save_path+result_plot_name)
|
|
3426
|
-
upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = self.save_aws + result_plot_name, input_path = self.save_path + result_plot_name, aws_credentials = self.aws_credentials)
|
|
3427
|
-
|
|
3428
|
-
if not self.show_plot:
|
|
3429
|
-
plt.close()
|
|
3430
|
-
|
|
3431
|
-
if self.return_fig:
|
|
3432
|
-
return figure
|
|
3433
|
-
|
|
3434
|
-
def get_betas(self,subsample_ts=False):
|
|
3435
|
-
"""
|
|
3436
|
-
get general beta and last sample beta, correlation score is included too
|
|
3437
|
-
|
|
3438
|
-
Parameters
|
|
3439
|
-
----------
|
|
3440
|
-
subsample_ts (int): subsample size of data
|
|
3441
|
-
|
|
3442
|
-
Returns
|
|
3443
|
-
-------
|
|
3444
|
-
None
|
|
3445
|
-
"""
|
|
3446
|
-
result = list()
|
|
3447
|
-
for index in self.indexes:
|
|
3448
|
-
|
|
3449
|
-
index_pct = f'{index}_pct'
|
|
3450
|
-
huber_regr = HuberRegressor(fit_intercept = True)
|
|
3451
|
-
huber_regr.fit(self.merger_df.asset_return.values.reshape(-1,1), self.merger_df[index_pct].values.reshape(-1,1))
|
|
3452
|
-
general_beta, a = huber_regr.coef_[0], huber_regr.intercept_
|
|
3453
|
-
general_r = stats.mstats.pearsonr(self.merger_df.asset_return, self.merger_df[index])[0]
|
|
3454
|
-
|
|
3455
|
-
dict_res = {
|
|
3456
|
-
'index':index,
|
|
3457
|
-
'general_beta':general_beta,
|
|
3458
|
-
'general_r':general_r,
|
|
3459
|
-
}
|
|
3460
|
-
|
|
3461
|
-
if subsample_ts:
|
|
3462
|
-
tmp_df = self.merger_df.iloc[-subsample_ts:,:].dropna()
|
|
3463
|
-
huber_regr = HuberRegressor(fit_intercept = True)
|
|
3464
|
-
huber_regr.fit(tmp_df.asset_return.values.reshape(-1,1), tmp_df[index_pct].values.reshape(-1,1))
|
|
3465
|
-
sample_beta, a = huber_regr.coef_[0], huber_regr.intercept_
|
|
3466
|
-
sample_r = stats.mstats.pearsonr(tmp_df.asset_return, tmp_df[index])[0]
|
|
3467
|
-
dict_res['sample_beta'] = sample_beta
|
|
3468
|
-
dict_res['sample_r'] = sample_r
|
|
3469
|
-
|
|
3470
|
-
result.append(dict_res)
|
|
3471
|
-
|
|
3472
|
-
self.states_result = result
|
|
3473
|
-
|
|
3474
|
-
|
|
3475
|
-
def get_relevant_beta(data_market, ticket_name, show_plot = True, save_path = False, save_aws = False, aws_credentials = False):
|
|
3476
|
-
'''
|
|
3477
|
-
select relevant beta result data of a given asset
|
|
3478
|
-
|
|
3479
|
-
Parameters:
|
|
3480
|
-
data_market (pd.DataFrame): dataframe of the market results
|
|
3481
|
-
ticket_name (str): name of the asset
|
|
3482
|
-
show_plot (bool): If tru, plot results
|
|
3483
|
-
save_path (str): local path for saving e.g r'C:/path/to/the/file/'
|
|
3484
|
-
save_aws (str): remote key in s3 bucket path e.g. 'path/to/file/'
|
|
3485
|
-
aws_credentials (dict): dict of the aws credentials
|
|
3486
|
-
|
|
3487
|
-
Returns:
|
|
3488
|
-
selection (pd.DataFrame): dataframe of the most relevant beta
|
|
3489
|
-
'''
|
|
3490
|
-
all_betas = data_market[data_market.asset == ticket_name].sort_values('general_r', ascending = False)
|
|
3491
|
-
all_betas['gen_r2'] = all_betas.general_r ** 2
|
|
3492
|
-
all_betas['sampl_r2'] = all_betas.sample_r ** 2
|
|
3493
|
-
selection = all_betas.sort_values('gen_r2',ascending =False).head(2).sort_values('sampl_r2',ascending =False).head(1).drop(columns = ['gen_r2','sampl_r2'])
|
|
3494
|
-
|
|
3495
|
-
if show_plot:
|
|
3496
|
-
print(selection)
|
|
3497
|
-
if save_path:
|
|
3498
|
-
result_plot_name = f'market_best_fit.csv'
|
|
3499
|
-
selection.to_csv(save_path+result_plot_name)
|
|
3500
|
-
|
|
3501
|
-
if save_path and save_aws:
|
|
3502
|
-
# upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = f'market_plots/{ticket_name}/'+result_plot_name,input_path = save_path+result_plot_name)
|
|
3503
|
-
upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = save_aws + result_plot_name, input_path = save_path + result_plot_name, aws_credentials = aws_credentials)
|
|
3504
|
-
return selection
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
virgo_modules/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
virgo_modules/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
+
virgo_modules/src/aws_utils.py,sha256=q0l7D7ofo09Lu1QQjv-esheQ06uiSy1Pdq3xMul8zvk,2571
|
|
4
|
+
virgo_modules/src/backtester.py,sha256=9ZFUl2VbcJhn4ycQ3prPSXforaw-mb_FmDssDKzK5mc,17252
|
|
5
|
+
virgo_modules/src/edge_utils.py,sha256=i3Hm3fO-QA-u17jDpnRodLLILMWZ2VTMEkMKijdGKLg,14287
|
|
6
|
+
virgo_modules/src/pull_artifacts.py,sha256=5OPrgR7pcMSdpbevDRhf0ebk7g7ZRjff4NpTIIWAKjE,1989
|
|
7
|
+
virgo_modules/src/re_utils.py,sha256=tRyU9WpH0K7qMWXB6DIDtVqjsWg_pVdxEbq363RHZ4M,72306
|
|
8
|
+
virgo_modules/src/ticketer_source.py,sha256=2wnPm2ng8VyzhApFuJ6tn8VYX_3C8bg77qijsZq_u7k,150153
|
|
9
|
+
virgo_modules-0.1.0.dist-info/LICENSE,sha256=pNgFyCYgmimaw0o6V20JupZLROycAnOA_HDDh1tX2V4,1097
|
|
10
|
+
virgo_modules-0.1.0.dist-info/METADATA,sha256=kUgig6T9Goiuwmw1iRf_572ii__9MTRaPSnu4aZ5Qxo,1428
|
|
11
|
+
virgo_modules-0.1.0.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
|
|
12
|
+
virgo_modules-0.1.0.dist-info/top_level.txt,sha256=ZjI-qEkDtT-8mFwGAWnXfqPOKEGlIhWRW1es1VyXc60,14
|
|
13
|
+
virgo_modules-0.1.0.dist-info/RECORD,,
|
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
virgo_modules/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
virgo_modules/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
-
virgo_modules/src/aws_utils.py,sha256=q0l7D7ofo09Lu1QQjv-esheQ06uiSy1Pdq3xMul8zvk,2571
|
|
4
|
-
virgo_modules/src/edge_utils.py,sha256=i3Hm3fO-QA-u17jDpnRodLLILMWZ2VTMEkMKijdGKLg,14287
|
|
5
|
-
virgo_modules/src/pull_artifacts.py,sha256=5OPrgR7pcMSdpbevDRhf0ebk7g7ZRjff4NpTIIWAKjE,1989
|
|
6
|
-
virgo_modules/src/re_utils.py,sha256=ndPUW3F0QkljtKLR1dqtBm2I2LtceduSgLRIk3HszWk,72244
|
|
7
|
-
virgo_modules/src/ticketer_source.py,sha256=30xCmfL16SHMPQOs4qKsKSfvfdfv-9IkYY4X9gJgx70,150116
|
|
8
|
-
virgo_modules-0.0.90.dist-info/LICENSE,sha256=pNgFyCYgmimaw0o6V20JupZLROycAnOA_HDDh1tX2V4,1097
|
|
9
|
-
virgo_modules-0.0.90.dist-info/METADATA,sha256=6KCZW4HK_io_AsQjBV733cVNeNlyRKqJ6MdFCFdmTWY,1429
|
|
10
|
-
virgo_modules-0.0.90.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
|
|
11
|
-
virgo_modules-0.0.90.dist-info/top_level.txt,sha256=ZjI-qEkDtT-8mFwGAWnXfqPOKEGlIhWRW1es1VyXc60,14
|
|
12
|
-
virgo_modules-0.0.90.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|