virgo-modules 0.0.90__tar.gz → 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of virgo-modules might be problematic. Click here for more details.

Files changed (18) hide show
  1. {virgo_modules-0.0.90 → virgo_modules-0.1.0}/PKG-INFO +1 -1
  2. {virgo_modules-0.0.90 → virgo_modules-0.1.0}/setup.py +1 -1
  3. virgo_modules-0.1.0/virgo_app/virgo_modules/src/backtester.py +360 -0
  4. {virgo_modules-0.0.90 → virgo_modules-0.1.0}/virgo_app/virgo_modules/src/re_utils.py +2 -1
  5. {virgo_modules-0.0.90 → virgo_modules-0.1.0}/virgo_app/virgo_modules/src/ticketer_source.py +411 -409
  6. {virgo_modules-0.0.90 → virgo_modules-0.1.0}/virgo_app/virgo_modules.egg-info/PKG-INFO +1 -1
  7. {virgo_modules-0.0.90 → virgo_modules-0.1.0}/virgo_app/virgo_modules.egg-info/SOURCES.txt +1 -0
  8. {virgo_modules-0.0.90 → virgo_modules-0.1.0}/LICENSE +0 -0
  9. {virgo_modules-0.0.90 → virgo_modules-0.1.0}/README.md +0 -0
  10. {virgo_modules-0.0.90 → virgo_modules-0.1.0}/setup.cfg +0 -0
  11. {virgo_modules-0.0.90 → virgo_modules-0.1.0}/virgo_app/virgo_modules/__init__.py +0 -0
  12. {virgo_modules-0.0.90 → virgo_modules-0.1.0}/virgo_app/virgo_modules/src/__init__.py +0 -0
  13. {virgo_modules-0.0.90 → virgo_modules-0.1.0}/virgo_app/virgo_modules/src/aws_utils.py +0 -0
  14. {virgo_modules-0.0.90 → virgo_modules-0.1.0}/virgo_app/virgo_modules/src/edge_utils.py +0 -0
  15. {virgo_modules-0.0.90 → virgo_modules-0.1.0}/virgo_app/virgo_modules/src/pull_artifacts.py +0 -0
  16. {virgo_modules-0.0.90 → virgo_modules-0.1.0}/virgo_app/virgo_modules.egg-info/dependency_links.txt +0 -0
  17. {virgo_modules-0.0.90 → virgo_modules-0.1.0}/virgo_app/virgo_modules.egg-info/requires.txt +0 -0
  18. {virgo_modules-0.0.90 → virgo_modules-0.1.0}/virgo_app/virgo_modules.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: virgo_modules
3
- Version: 0.0.90
3
+ Version: 0.1.0
4
4
  Summary: data processing and statistical modeling using stock market data
5
5
  Home-page: https://github.com/miguelmayhem92/virgo_module
6
6
  Author: Miguel Mayhuire
@@ -5,7 +5,7 @@ with open("virgo_app/README.md", "r") as f:
5
5
 
6
6
  setup(
7
7
  name="virgo_modules",
8
- version="0.0.90",
8
+ version="0.1.0",
9
9
  description="data processing and statistical modeling using stock market data",
10
10
  package_dir={"": "virgo_app"},
11
11
  packages=find_packages(where="virgo_app"),
@@ -0,0 +1,360 @@
1
+ import pandas as pd
2
+ import numpy as np
3
+ import json
4
+
5
+ import matplotlib.pyplot as plt
6
+ import matplotlib.gridspec as gridspec
7
+ import seaborn as sns; sns.set()
8
+
9
+ import warnings
10
+ warnings.filterwarnings('ignore')
11
+
12
+ from .aws_utils import upload_file_to_aws
13
+
14
+ def sharpe_ratio(return_series):
15
+
16
+ '''
17
+ calculate sharpe ratio for given array.
18
+
19
+ Parameters:
20
+ return_series (pd.series): pandas series of the asset returns
21
+
22
+ Returns:
23
+ sharpe (float): sharpe ratio
24
+ '''
25
+
26
+ N = 255 # Trading days in the year (change to 365 for crypto)
27
+ rf = 0.005 # Half a percent risk free rare
28
+ mean = return_series.mean() * N -rf
29
+ sigma = return_series.std() * np.sqrt(N)
30
+ sharpe = round(mean / sigma, 3)
31
+ return sharpe
32
+
33
+
34
+ class SignalAnalyserObject:
35
+ """
36
+ Class that produces back-tests analysis for a given feature
37
+
38
+ Attributes
39
+ ----------
40
+ symbol_name : str
41
+ stock or asset to assess
42
+ feature_name : str
43
+ test_size: int
44
+ testing data size
45
+ show_plot: boolean
46
+ save_path: str
47
+ if available, save result locally
48
+ save_aws: str
49
+ if available, save result locally
50
+ aws_credentials: dict
51
+ signal_position: int
52
+ if available, signal position to open a position
53
+ df: pd.DataFrame:
54
+ transformed data of the selected feature to perform back-test
55
+
56
+ Methods
57
+ -------
58
+ signal_analyser(days_list=list):
59
+ given a signal position for either botton or roof signal, calculate the espected return and distributions for a time scope in the days list (time horizons)
60
+ create_backtest_signal(days_strategy=int, high_exit=float, low_exit=float, open_in_list=list):
61
+ create a back-test analysis using the test data using some opening anc closing postion criterias
62
+ """
63
+
64
+ def __init__(self, data,symbol_name, feature_name, test_size, signal_position = False, correct_signals = False, show_plot = True, save_path = False, save_aws = False, aws_credentials = False, return_fig = False):
65
+ """
66
+ Initialize object
67
+
68
+ Parameters
69
+ ----------
70
+ data (pd.DataFrame): data
71
+ ticket_name (str): name of the asset
72
+ feature_name (str): name of the features
73
+ test_size (int): size of the test data
74
+ signal_position (int): signal position to open the position, False by default
75
+ correct_signals (int): clean abnormal signals using interpolation
76
+ show_plot (boolean): if true show plot for every method
77
+ save_path (str): if true, save results in file e.g r'C:/path/to/the/file/'
78
+ save_aws (str): if true, export results to remote repo e.g. 'path/to/file/'
79
+ aws_credentials (dict): credentials for aws
80
+ return_fig (boolean): if true, methods will return objects
81
+
82
+ Returns
83
+ -------
84
+ None
85
+ """
86
+ self.ticket_name = symbol_name
87
+ self.feature_name=feature_name
88
+ self.test_size=test_size
89
+ self.show_plot = show_plot
90
+ self.save_path = save_path
91
+ self.save_aws = save_aws
92
+ self.aws_credentials = aws_credentials
93
+ self.return_fig = return_fig
94
+ self.signal_position = signal_position
95
+ ## preprocessing
96
+ up_signal, low_signal= f'signal_up_{feature_name}', f'signal_low_{feature_name}'
97
+ features_base = ['Date', up_signal, low_signal, 'Close','Open','High','Low']
98
+
99
+ df = data[features_base].sort_values('Date')
100
+
101
+ df['signal_type'] = np.where(
102
+ df[up_signal] == 1,
103
+ 'up',
104
+ np.where(
105
+ df[low_signal] == 1,
106
+ 'down',
107
+ 'no signal'
108
+ )
109
+ )
110
+ def correct_sygnals(df,correct_i = 1):
111
+ ### signal cleaning
112
+ for i in range(1+correct_i, len(df)-1):
113
+ start_i, end_i = i-(correct_i+1), i+1
114
+ dfw = df.iloc[start_i: end_i,]
115
+ before_type = dfw.iloc[0].signal_type
116
+ after_type = dfw.iloc[-1].signal_type
117
+ window_types = dfw.iloc[1:-1].signal_type.unique()
118
+ n_window_type = len(window_types)
119
+ if n_window_type == 1:
120
+ if (before_type == after_type) and (window_types[0] != after_type):
121
+ df.iloc[start_i+1: end_i-1, df.columns.get_loc('signal_type')] = before_type
122
+ return df.copy()
123
+
124
+ if correct_signals:
125
+ for correct_i in range(1,correct_signals+1):
126
+ df = correct_sygnals(df,correct_i = correct_i)
127
+ df[up_signal] = np.where(df['signal_type'] == 'up', 1,0)
128
+ df[low_signal] = np.where(df['signal_type'] == 'down', 1,0)
129
+
130
+ ## indexing chains
131
+ df['lag_signal_type'] = df['signal_type'].shift(1)
132
+ df['lag_Date'] = df['Date'].shift(1)
133
+ df['span'] = (pd.to_datetime(df['Date']) - pd.to_datetime(df['lag_Date'])).dt.days - 1
134
+ df['break'] = np.where((df['span'] > 3) & (df['lag_signal_type'] == df['signal_type']), 1, 0)
135
+ df['break'] = np.where((df['lag_signal_type'] != df['signal_type']), 1, df['break'])
136
+ df['chain_id'] = df.sort_values(['Date']).groupby(['break']).cumcount() + 1
137
+ df['chain_id'] = np.where(df['break'] == 1, df['chain_id'], np.nan )
138
+ df['chain_id'] = df['chain_id'].fillna(method = 'ffill')
139
+
140
+ df['internal_rn'] = df.sort_values(['Date']).groupby(['chain_id']).cumcount() + 1
141
+ df['inv_internal_rn'] = df.sort_values(['Date'],ascending = False).groupby(['chain_id']).cumcount() + 1
142
+
143
+ df['first_in_chain'] = np.where(df['internal_rn'] == 1, True, False)
144
+ df['last_in_chain'] = np.where(df['inv_internal_rn'] == 1, True, False)
145
+
146
+ df['span'] = (pd.to_datetime(df['Date']) - pd.to_datetime(df['lag_Date'])).dt.days - 1
147
+ self.df = df.drop(columns = ['span','break','lag_signal_type','lag_Date']).copy()
148
+
149
+ def signal_analyser(self, days_list):
150
+ """
151
+ Initialize object
152
+
153
+ Parameters
154
+ ----------
155
+ days_list (list): list of integers to calculate expected returns
156
+
157
+ Returns
158
+ -------
159
+ if returns_fig is true, returns a matplotlib fig
160
+ """
161
+ signal_position = self.signal_position
162
+ df = self.df.iloc[0:-self.test_size,:].copy()
163
+ returns_list = list()
164
+
165
+ for days in days_list:
166
+ feature_ = f'return_{days}d'
167
+ df[feature_] = (df['Close'].shift(-days)/df['Close']-1)*100
168
+ returns_list.append(feature_)
169
+
170
+ df['open_long'] = np.where(df.last_in_chain == True, True, np.nan)
171
+ df['open_short'] = np.where(df.first_in_chain == True, True, np.nan)
172
+
173
+ # plotting
174
+ fig, axs = plt.subplots(1, 4, figsize = (20,5))
175
+ palette ={"go down": "tomato", "go up": "lightblue"}
176
+
177
+ df2 = df[df.signal_type.isin(['up','down'])]
178
+ df2['lag_Date'] = df2['Date'].shift(1)
179
+ df2['lag_signal_type'] = df2['signal_type'].shift(1)
180
+ df2 = df2[df2.lag_signal_type != df2.signal_type]
181
+ df2['span'] = (pd.to_datetime(df2['Date']) - pd.to_datetime(df2['lag_Date'])).dt.days - 1
182
+ sns.violinplot(data=df2, y="span",ax = axs[0], color = 'lightblue', linewidth=0.7,inner="quart")
183
+ sns.stripplot(data=df2, y="span",ax = axs[0], jitter=True, zorder=1)
184
+ axs[0].set_title('span between last signals')
185
+
186
+ df.signal_type = df.signal_type.map({'up':'go down', 'down': 'go up'})
187
+ df_ = df[df.last_in_chain == True]
188
+ df_['part'] = '-'
189
+ sns.violinplot(data=df_, y="internal_rn", x='part', ax = axs[1], hue="signal_type", inner="quart",palette = palette,gap=0.1, split=True, linewidth=0.7)
190
+ axs[1].set_title('signal duration distribution')
191
+
192
+ if signal_position:
193
+ for feature in returns_list:
194
+ df[feature] = df[feature].shift(-signal_position)
195
+
196
+ df_melt = df[df.open_long == 1].melt(id_vars=['signal_type'], value_vars=returns_list, var_name='time', value_name='value')
197
+ df_melt = df_melt.dropna()
198
+ sns.violinplot(data=df_melt, x="time", y="value", hue="signal_type",ax = axs[2], split=True, gap=0.1, inner="quart",palette = palette, linewidth=0.8)
199
+ axs[2].axhline(y=0, color='grey', linestyle='--')
200
+ axs[2].set_title('E. returns - end of the signal')
201
+
202
+ df_melt = df[df.open_short == 1].melt(id_vars=['signal_type'], value_vars=returns_list, var_name='time', value_name='value')
203
+ df_melt = df_melt.dropna()
204
+ sns.violinplot(data=df_melt, x="time", y="value", hue="signal_type",ax = axs[3], split=True, gap=0.1, inner="quart",palette = palette, linewidth=0.8)
205
+ axs[3].axhline(y=0, color='grey', linestyle='--')
206
+ axs[3].set_title('E. returns - start of the signal')
207
+
208
+ if self.show_plot:
209
+ plt.show()
210
+
211
+ if self.save_path:
212
+ result_plot_name = f'signals_strategy_distribution_{self.feature_name}.png'
213
+ fig.savefig(self.save_path+result_plot_name)
214
+ # pickle.dump(axs, open(self.save_path+result_plot_name, 'wb'))
215
+
216
+ if self.save_path and self.save_aws:
217
+ # upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = f'market_plots/{self.ticket_name}/'+result_plot_name, input_path = self.save_path+result_plot_name)
218
+ upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = self.save_aws + result_plot_name, input_path = self.save_path + result_plot_name, aws_credentials = self.aws_credentials)
219
+ if not self.show_plot:
220
+ plt.close()
221
+
222
+ del df
223
+
224
+ if self.return_fig:
225
+ return fig
226
+
227
+ def create_backtest_signal(self,days_strategy, high_exit = False, low_exit = False, open_in_list = ['down']):
228
+ """
229
+ Initialize object
230
+
231
+ Parameters
232
+ ----------
233
+ days_strategy (int): position horizon
234
+ high_exit (float): max threshold to close position
235
+ low_exit (float): min threshold to close position, this parameter has to be positive
236
+ open_in_list (list): list of strings ("down","up") to assess signals
237
+ Returns
238
+ -------
239
+ if returns_fig is true, returns a matplotlib fig and list of dicts containing analysis
240
+ """
241
+ asset_1 = 'Close'
242
+ up_signal, low_signal= f'signal_up_{self.feature_name}', f'signal_low_{self.feature_name}'
243
+ signal_position = self.signal_position
244
+ dft = self.df.iloc[-self.test_size:,:].reset_index(drop=True).copy()
245
+
246
+ dft['lrets_bench'] = np.log(dft[asset_1]/dft[asset_1].shift(1))
247
+ dft['bench_prod'] = dft['lrets_bench'].cumsum()
248
+ dft['bench_prod_exp'] = np.exp(dft['bench_prod']) - 1
249
+
250
+ map_ = {'down':'END LOW TREND', 'up': 'BEGINNING HIGH TREND'}
251
+
252
+ open_in_list_items = len(open_in_list)
253
+ fig, axs = plt.subplots(1,open_in_list_items, figsize = (7*open_in_list_items,6))
254
+ messages = list()
255
+ for i, open_in in enumerate(open_in_list):
256
+ axs_ = axs if open_in_list_items == 1 else axs[i]
257
+ if open_in == 'down':
258
+ dft['open_long'] = np.where((dft.last_in_chain == True) & (dft.signal_type == 'down'), True, np.nan) # open strat
259
+ elif open_in == 'up':
260
+ dft['open_long'] = np.where((dft.first_in_chain == True) & (dft.signal_type == 'up'), True, np.nan) # open strat
261
+
262
+ def chain_position(dft):
263
+ dft['open_long_id'] = np.where(dft['open_long'] == True, dft.chain_id, np.nan)
264
+ dft['open_long_id'] = dft['open_long_id'].fillna(method = 'ffill')
265
+ dft['open_long_rn'] = dft.sort_values(['Date']).groupby(['open_long_id']).cumcount() + 1
266
+ return dft
267
+
268
+ if signal_position:
269
+ dft['open_long'] = dft.sort_values(['Date'])['open_long'].shift(signal_position)
270
+
271
+ dft = chain_position(dft)
272
+ dft['flag'] = np.where(dft['open_long_rn'] < days_strategy, 1,0)
273
+
274
+ if high_exit and low_exit:
275
+ dft['open_strat'] = np.where(dft.open_long == True, dft.Open, np.nan) # open strat
276
+ dft['open_strat'] = dft['open_strat'].fillna(method = 'ffill')
277
+ dft['open_strat'] = np.where(dft.flag == 1, dft.open_strat, np.nan)
278
+ dft['high_strat_ret'] = (dft['High']/dft['open_strat']-1)*100
279
+ dft['low_strat_ret'] = (dft['Low']/dft['open_strat']-1)*100
280
+ dft['max_step_chain'] = dft.groupby(['open_long_id'])['open_long_rn'].transform('max')
281
+ dft['high_exit'] = np.where(((dft['high_strat_ret'] >= high_exit) | (dft['open_long_rn'] == days_strategy) | (dft['max_step_chain'] == dft['open_long_rn'])), 1, np.nan)
282
+ dft['low_exit'] = np.where((dft['low_strat_ret'] <= low_exit), -1, np.nan)
283
+
284
+ dft["exit_type"] = dft[["high_exit", "low_exit"]].max(axis=1)
285
+ dft['exit_type'] = np.where(dft["exit_type"] == 1, 1, np.where(dft["exit_type"] == -1,-1,np.nan))
286
+ dft['exit'] = np.where(dft['exit_type'].isnull(), np.nan, 1)
287
+ dft['exit_order'] = dft.sort_values(['Date']).groupby(['open_long_id','exit']).cumcount() + 1
288
+ dft['exit'] = np.where(dft['exit_order'] == 1, True, np.nan)
289
+ dft = dft.drop(columns = ['exit_order'])
290
+ ## if last signal is near
291
+ max_id = dft.open_long_id.max()
292
+ dft['max_internal_rn'] = dft.sort_values(['Date']).groupby(['open_long_id']).open_long_rn.transform('max')
293
+ dft['exit'] = np.where((dft.open_long_id == max_id) & (dft.max_internal_rn < days_strategy) & (dft.max_internal_rn == dft.open_long_rn), 1, dft['exit'])
294
+
295
+ dft['exit_step'] = np.where(dft.exit == 1, dft.open_long_rn, np.nan)
296
+ dft['exit_step'] = dft.sort_values(['Date']).groupby(['open_long_id']).exit_step.transform('max')
297
+
298
+ dft['flag'] = np.where(dft.open_long_rn <= dft.exit_step, 1, 0)
299
+
300
+ dft['lrets_strat'] = np.log(dft[asset_1].shift(-1)/dft[asset_1]) * dft['flag']
301
+ dft['lrets_strat'] = np.where(dft['lrets_strat'].isna(),-0.0,dft['lrets_strat'])
302
+ dft['lrets_prod'] = dft['lrets_strat'].cumsum()
303
+ dft['strat_prod_exp'] = np.exp(dft['lrets_prod']) - 1
304
+
305
+ bench_rets = round(dft['bench_prod_exp'].values[-1]*100,1)
306
+ strat_rets = round(dft['strat_prod_exp'].values[-1]*100,1)
307
+
308
+ bench_sr = round(sharpe_ratio(dft.bench_prod_exp.dropna()),1)
309
+ strat_sr = round(sharpe_ratio(dft.strat_prod_exp.dropna()),1)
310
+
311
+ message1 = f'{bench_rets}%'
312
+ message2 = f'{strat_rets}%'
313
+
314
+ messages_ = {
315
+ 'type strategy':map_[open_in],
316
+ 'benchmark return:':message1,
317
+ 'benchmark sharpe ratio:': bench_sr,
318
+ 'strategy return:':message2,
319
+ 'strategy sharpe ratio:': strat_sr,
320
+ }
321
+ messages.append(messages_)
322
+ if self.show_plot:
323
+ print('----------------------------')
324
+ print(messages_)
325
+ print('----------------------------')
326
+
327
+
328
+ axs_.plot(dft.bench_prod_exp.values, label = 'benchmark', color = 'steelblue')
329
+ axs_.scatter(range(len(dft)),np.where(dft[low_signal] == 1,dft.bench_prod_exp.values,np.nan),color = 'red', label = 'signal')
330
+ axs_.scatter(range(len(dft)),np.where(dft[up_signal] == 1,dft.bench_prod_exp.values,np.nan),color = 'green', label = 'signal')
331
+ axs_.plot(dft.strat_prod_exp.values, label = 'strategy', color = 'darksalmon')
332
+ axs_.set_xlabel("index")
333
+ axs_.set_ylabel("comulative return")
334
+ axs_.set_title(f'{map_[open_in]} strategy and cumulative returns based on signals')
335
+ axs_.legend()
336
+
337
+ if self.show_plot:
338
+ plt.plot()
339
+
340
+ if self.save_path:
341
+ result_json_name = f'signals_strategy_return_{self.feature_name}.json'
342
+ result_plot_name = f'signals_strategy_return_{self.feature_name}.png'
343
+
344
+ plt.savefig(self.save_path+result_plot_name)
345
+
346
+ with open(self.save_path+result_json_name, "w") as outfile:
347
+ json.dump(messages, outfile)
348
+
349
+ if self.save_path and self.save_aws:
350
+
351
+ upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = self.save_aws + result_json_name, input_path = self.save_path + result_json_name, aws_credentials = self.aws_credentials)
352
+ upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = self.save_aws + result_plot_name, input_path = self.save_path + result_plot_name, aws_credentials = self.aws_credentials)
353
+
354
+ if not self.show_plot:
355
+ plt.close()
356
+
357
+ del dft
358
+
359
+ if self.return_fig:
360
+ return fig, messages
@@ -1378,7 +1378,8 @@ def extract_data_traintest(object_stock,features_to_search,configs, target_confi
1378
1378
  arguments_to_use = configs[feature_name]['config_params']
1379
1379
  method_to_use = configs[feature_name]['method']
1380
1380
  getattr(object_stock, method_to_use)(**arguments_to_use, plot = False, save_features = False)
1381
- object_stock.produce_order_features(feature_name)
1381
+ if method_to_use not in ['minmax_pricefeature']:
1382
+ object_stock.produce_order_features(feature_name)
1382
1383
  # geting targets
1383
1384
  object_stock.get_categorical_targets(**target_params_up)
1384
1385
  object_stock.df = object_stock.df.drop(columns = ['target_down']).rename(columns = {'target_up':'target_up_save'})
@@ -250,25 +250,6 @@ class FeaturesEntropy(BaseEstimator, TransformerMixin):
250
250
  X[self.feature_name] = X[self.feature_name].fillna(self.default_null)
251
251
  return X
252
252
 
253
- def sharpe_ratio(return_series):
254
-
255
- '''
256
- calculate sharpe ratio for given array.
257
-
258
- Parameters:
259
- return_series (pd.series): pandas series of the asset returns
260
-
261
- Returns:
262
- sharpe (float): sharpe ratio
263
- '''
264
-
265
- N = 255 # Trading days in the year (change to 365 for crypto)
266
- rf = 0.005 # Half a percent risk free rare
267
- mean = return_series.mean() * N -rf
268
- sigma = return_series.std() * np.sqrt(N)
269
- sharpe = round(mean / sigma, 3)
270
- return sharpe
271
-
272
253
  class signal_combiner(BaseEstimator, TransformerMixin):
273
254
 
274
255
  """
@@ -1803,7 +1784,7 @@ class stock_eda_panel(object):
1803
1784
  if plot:
1804
1785
  self.signal_plotter(feature_name)
1805
1786
 
1806
- def minmax_pricefeature(self, type_func, window, distance = False, save_features = False):
1787
+ def minmax_pricefeature(self, type_func, window, distance = False, plot = False, save_features = False):
1807
1788
  """
1808
1789
  perform relative price/distance with respect to the min/max price in a given time scope
1809
1790
 
@@ -2809,103 +2790,453 @@ class hmm_feature_selector():
2809
2790
  self.feature_results = feature_results
2810
2791
  self.best_features = pd.DataFrame(self.feature_results).T.sort_values('mean relevance').iloc[-1,:].features
2811
2792
 
2812
- class signal_analyser_object:
2793
+ def execute_signal_analyser(test_data_size, feature_name, days_list, configuration, method, object_stock, signal_analyser_object, plot = False, backtest= False, exit_params = {}):
2794
+ '''
2795
+ code snippet that is going run some objects. The analysis is signal analyse which is backtesting
2796
+
2797
+ Parameters:
2798
+ test_data_size (int): test data size
2799
+ feature_name (str): name of the feature to assess
2800
+ days_list (list): tome scope to assess the returns
2801
+ configuration (dict): parameters of the method to run
2802
+ object_stock (obj): object with data to assess
2803
+ signal_analyser_object (obj): signal_analyser object
2804
+ plot (boolean): if true, plot results
2805
+ backtest (boolean): if true, run backtest
2806
+ exit_params (dict): parameters of exit returns
2807
+
2808
+ Returns:
2809
+ mean_median_return (float): median return of the backtests
2810
+ '''
2811
+ method(**configuration)
2812
+ signal_assess = signal_analyser_object(object_stock.df,object_stock.stock_code,show_plot = plot)
2813
+ signal_assess.signal_analyser(test_size = test_data_size, feature_name = feature_name, days_list = days_list, threshold = 1)
2814
+
2815
+ if backtest:
2816
+ print('-----------------------back test ---------------------------')
2817
+ signal_assess.create_backtest_signal(backtest, test_data_size, feature_name, **exit_params )
2818
+
2819
+ return signal_assess.mean_median_return
2820
+
2821
+ def iterate_signal_analyser(test_data_size,feature_name, days_list, arguments_to_test, method, object_stock, signal_analyser_object, plot = True):
2822
+ '''
2823
+ code snippet is going to iterate signal analyser
2824
+
2825
+ Parameters:
2826
+ test_data_size (int): test data size
2827
+ feature_name (str): name of the feature to assess
2828
+ days_list (list): tome scope to assess the returns
2829
+ arguments_to_test: parameters to test
2830
+ method: methods to run
2831
+ object_stock (obj): object with data to assess
2832
+ signal_analyser_object (obj): signal_analyser object
2833
+ plot (boolean): if true, plot results
2834
+
2835
+ Returns:
2836
+ best_result (int): index from the arguments_to_test with the best result
2837
+ '''
2838
+ results = list()
2839
+ for key in arguments_to_test.keys():
2840
+ configuration = arguments_to_test.get(key)
2841
+ mean_median_return = execute_signal_analyser(test_data_size, feature_name, days_list, configuration, method, object_stock, signal_analyser_object)
2842
+ results.append(mean_median_return)
2843
+
2844
+ df_result = pd.DataFrame({'keys':arguments_to_test.keys(),'results':results})
2845
+ if plot:
2846
+ plt.plot(df_result['keys'], df_result['results'])
2847
+ plt.scatter(df_result['keys'], df_result['results'])
2848
+ plt.title('simulation between configurations')
2849
+ plt.ylabel('median expected return')
2850
+ plt.show()
2851
+
2852
+ best_result = df_result.sort_values('results',ascending = False)['keys'].values[0]
2853
+ return best_result
2854
+
2855
+ class analyse_index(stock_eda_panel):
2813
2856
  """
2814
- class that is going to analyse signals
2857
+ class that is going to train hmm models to perform feature selection
2815
2858
 
2816
2859
  Attributes
2817
2860
  ----------
2818
- data : pd.DataFrame
2819
- symbol of the asset
2820
- ticket_name :str
2821
- asset symbol
2822
- show_plot : boolean
2823
- if true show plot for every method
2861
+ data_index : pd.DataFrame
2862
+ name of the index
2863
+ indexes: list
2864
+ list of indexes
2865
+ asset : str
2866
+ name of the asset
2867
+ n_obs : int
2868
+ number of rows to extract
2869
+ lag : int
2870
+ lag to apply
2871
+ data_window : str
2872
+ 5y 10y 15y
2873
+ show_plot : bool
2874
+ If True, show plots
2824
2875
  save_path : str
2825
- if true, save results in file
2876
+ local path for saving e.g r'C:/path/to/the/file/'
2826
2877
  save_aws : str
2827
- if true, export results to remote repo
2878
+ remote key in s3 bucket path e.g. 'path/to/file/'
2828
2879
  aws_credentials : dict
2829
- credentials for aws
2830
- return_fig : boolean
2831
- if true, methods will return objects
2832
- create_backtest_signal(days_strategy=list, test_size=int, feature_name=str, high_exit=float, low_exit=float):
2833
- perform backtest signal analysis
2834
-
2880
+ dict with the aws credentials
2881
+ merger_df : pd.DataFrame
2882
+ dataframe with the index and asset data
2883
+ states_result = dict
2884
+ betas and correlation score results
2885
+
2835
2886
  Methods
2836
2887
  -------
2837
- signal_analyser(test_size=int, feature_name=str, days_list=list, threshold=float,verbose=boolean, signal_position=boolean):
2838
- perform signal analysis and feature extraction
2839
-
2888
+ process_data():
2889
+ using stock_eda_panel, get data and merge data
2890
+ plot_betas(sample_size=int, offset=int, subsample_ts=int):
2891
+ display beta analysis plot
2892
+ get_betas(subsample_ts=int)
2893
+ get general beta and last sample beta, correlation score is included too
2840
2894
  """
2841
-
2842
- def __init__(self, data,symbol_name, show_plot = True, save_path = False, save_aws = False, aws_credentials = False, return_fig = False):
2895
+ def __init__(self, index_data, asset, n_obs, lag, data_window = '5y', show_plot = False, save_path = False, save_aws = False, aws_credentials = False, return_fig = False):
2843
2896
  """
2844
2897
  Initialize object
2845
2898
 
2846
2899
  Parameters
2847
2900
  ----------
2848
- data (pd.DataFrame): data
2849
- ticket_name (str): name of the asset
2850
- show_plot (boolean): if true show plot for every method
2851
- save_path (str): if true, save results in file e.g r'C:/path/to/the/file/'
2852
- save_aws (str): if true, export results to remote repo e.g. 'path/to/file/'
2853
- aws_credentials (dict): credentials for aws
2854
- return_fig (boolean): if true, methods will return objects
2901
+ index_data (pd.DataFrame or str): index data dataframe or index string
2902
+ asset (str): name of the asset
2903
+ n_obs (int): number of rows to extract
2904
+ lag (int): lag to apply
2905
+ data_window (str): 5y 10y 15y
2906
+ show_plot (bool): If True, show plots
2907
+ save_path (str): local path for saving e.g r'C:/path/to/the/file/'
2908
+ save_aws (str): remote key in s3 bucket path e.g. 'path/to/file/'
2909
+ aws_credentials (dict): dict with the aws credentials
2855
2910
 
2856
2911
  Returns
2857
2912
  -------
2858
2913
  None
2859
2914
  """
2860
- self.data = data.copy()
2861
- self.ticket_name = symbol_name
2915
+
2916
+
2917
+ if type(index_data) != str:
2918
+ index_data['Date'] = pd.to_datetime(index_data['Date'])
2919
+ self.index_data = index_data
2920
+ self.indexes = [ x for x in list(index_data.columns) if x != 'Date']
2921
+ else:
2922
+ self.indexes = [index_data]
2923
+
2924
+ self.index_data = index_data
2925
+ self.asset = asset
2926
+ self.n_obs = n_obs
2927
+ self.data_window = data_window
2928
+ self.lag = lag
2929
+
2862
2930
  self.show_plot = show_plot
2931
+ self.return_fig = return_fig
2863
2932
  self.save_path = save_path
2864
2933
  self.save_aws = save_aws
2865
- self.aws_credentials = aws_credentials
2866
- self.return_fig = return_fig
2867
2934
 
2868
- def signal_analyser(self, test_size, feature_name, days_list, threshold = 0.05,verbose = False, signal_position = False):
2935
+ def process_data(self):
2869
2936
  """
2870
- perform signal analysis and feature extraction
2937
+ using stock_eda_panel, get data and merge data
2938
+
2939
+ Parameters
2940
+ ----------
2941
+ None
2942
+
2943
+ Returns
2944
+ -------
2945
+ None
2946
+ """
2947
+ asset = stock_eda_panel(self.asset, self.n_obs, data_window=self.data_window)
2948
+ asset.get_data()
2949
+ df = asset.df[['Date','Close']]
2950
+
2951
+ if type(self.index_data) != str:
2952
+ df_merge = df.merge(self.index_data, on = ['Date'], how = 'left').sort_values('Date')
2953
+
2954
+ else:
2955
+ indx = stock_eda_panel(self.index_data, self.n_obs, data_window=self.data_window)
2956
+ indx.get_data()
2957
+ indx_df = indx.df[['Date','Close']].rename(columns = {'Close':self.index_data})
2958
+ df_merge = df.merge(indx_df, on = ['Date'], how = 'left').sort_values('Date')
2959
+
2960
+ for colx in ['Close'] + self.indexes:
2961
+ df_merge[f'{colx}_pct'] = df_merge[colx]/df_merge[colx].shift(self.lag) - 1
2962
+
2963
+ df_merge.dropna(inplace = True)
2964
+ self.merger_df = df_merge.rename(columns = {'Close_pct': 'asset_return'})
2965
+
2966
+ def plot_betas(self,sample_size, offset, subsample_ts =False, index = False):
2967
+ """
2968
+ display beta analysis plot
2871
2969
 
2872
2970
  Parameters
2873
2971
  ----------
2874
- test_size (int): test data size
2875
- feature_name (str): name of the feature to assess
2876
- days_list (list): list of integers [3,8,10] to assess
2877
- threshold (float): alpha or z threshold
2878
- verbose (boolean): print metrics
2879
- signal_position (int): if true, the signal is taken at the given step after the signal end
2972
+ sample_size (int): number of days or window size to calculate beta
2973
+ offset (int): overlap between windows
2974
+ subsample_ts (int): subsample size of data
2880
2975
 
2881
2976
  Returns
2882
2977
  -------
2883
2978
  None
2884
2979
  """
2885
- data = self.data
2886
- self.feature_name = feature_name
2887
- up_signal, low_signal= f'signal_up_{feature_name}', f'signal_low_{feature_name}'
2888
- features_base = ['Date', up_signal, low_signal, 'Close']
2980
+ if (type(self.index_data) == str) & (index != False):
2981
+ raise Exception("No need of index argument")
2982
+ else:
2983
+ index = self.indexes[0]
2984
+
2985
+ index_pct = f'{index}_pct'
2986
+ ### ploting analysis
2987
+ figure, ax = plt.subplot_mosaic(
2988
+ [["scatter_total", "scatter_sample",'ts','ts']],
2989
+ layout="constrained",
2990
+ figsize=(18, 5)
2991
+ )
2889
2992
 
2890
- df = data[features_base].sort_values('Date').iloc[0:-test_size,:]
2891
- returns_list = list()
2993
+ ax['scatter_total'].scatter(self.merger_df.asset_return, self.merger_df[index_pct])
2994
+
2995
+ huber_regr = HuberRegressor(fit_intercept = True)
2996
+ huber_regr.fit(self.merger_df.asset_return.values.reshape(-1,1), self.merger_df[index_pct].values.reshape(-1,1))
2997
+ b, a = huber_regr.coef_[0], huber_regr.intercept_
2998
+
2999
+ # b, a = np.polyfit(self.merger_df.asset_return, self.merger_df[index_pct], 1)
3000
+ ax['scatter_total'].plot(self.merger_df.asset_return, b*self.merger_df.asset_return+a, color='red')
2892
3001
 
2893
- for days in days_list:
3002
+ ax['ts'].plot(self.merger_df.Date, self.merger_df.Close, color = 'grey', alpha = 0.3)
2894
3003
 
2895
- feature_ = f'return_{days}d'
2896
- days = days + signal_position if signal_position else days
2897
- df[feature_] = (df['Close'].shift(-days)/df['Close']-1)*100
2898
- returns_list.append(feature_)
3004
+ if subsample_ts:
3005
+ self.merger_df = self.merger_df.iloc[-subsample_ts:,:].dropna()
2899
3006
 
2900
- df['signal_type'] = np.where(
2901
- df[up_signal] == 1,
2902
- 'up',
2903
- np.where(
2904
- df[low_signal] == 1,
2905
- 'down',
2906
- None
2907
- )
2908
- )
3007
+ for i in range(0,len(self.merger_df)-sample_size,offset):
3008
+
3009
+ merger_ = self.merger_df.sort_values('Date', ascending = False).iloc[i:i+sample_size,:]
3010
+ x = merger_[index_pct]
3011
+ y = merger_.asset_return
3012
+ # b, a = np.polyfit(x,y, 1)
3013
+ huber_regr = HuberRegressor(fit_intercept = True)
3014
+ huber_regr.fit(x.values.reshape(-1,1), y.values.reshape(-1,1))
3015
+ b, a = huber_regr.coef_[0], huber_regr.intercept_
3016
+
3017
+ normalize = mcolors.Normalize(vmin=-1, vmax=1)
3018
+ colormap = cm.jet
3019
+
3020
+ ax['scatter_sample'].plot(x, y,'o', color = 'blue', alpha = 0.1)
3021
+ ax['scatter_sample'].plot(x, b*x+a, color=colormap(normalize(b)))
3022
+ ax['scatter_sample'].set_xlim(-0.06, 0.06)
3023
+ ax['scatter_sample'].set_ylim(-0.06, 0.06)
3024
+
3025
+ plot = ax['ts'].scatter(merger_.Date, merger_.Close, color=colormap(normalize(b)), s = 10)
3026
+
3027
+ scalarmappaple = cm.ScalarMappable(norm=normalize, cmap=colormap)
3028
+ scalarmappaple.set_array(x)
3029
+
3030
+ plt.title(f'{self.asset} using index: {index}')
3031
+ plt.colorbar(scalarmappaple)
3032
+
3033
+ if self.show_plot:
3034
+ plt.show()
3035
+
3036
+ if self.save_path:
3037
+ result_plot_name = f'market_best_fit.png'
3038
+ figure.savefig(self.save_path+result_plot_name)
3039
+
3040
+ if self.save_path and self.save_aws:
3041
+ # upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = f'market_plots/{self.asset}/'+result_plot_name,input_path = self.save_path+result_plot_name)
3042
+ upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = self.save_aws + result_plot_name, input_path = self.save_path + result_plot_name, aws_credentials = self.aws_credentials)
3043
+
3044
+ if not self.show_plot:
3045
+ plt.close()
3046
+
3047
+ if self.return_fig:
3048
+ return figure
3049
+
3050
+ def get_betas(self,subsample_ts=False):
3051
+ """
3052
+ get general beta and last sample beta, correlation score is included too
3053
+
3054
+ Parameters
3055
+ ----------
3056
+ subsample_ts (int): subsample size of data
3057
+
3058
+ Returns
3059
+ -------
3060
+ None
3061
+ """
3062
+ result = list()
3063
+ for index in self.indexes:
3064
+
3065
+ index_pct = f'{index}_pct'
3066
+ huber_regr = HuberRegressor(fit_intercept = True)
3067
+ huber_regr.fit(self.merger_df.asset_return.values.reshape(-1,1), self.merger_df[index_pct].values.reshape(-1,1))
3068
+ general_beta, a = huber_regr.coef_[0], huber_regr.intercept_
3069
+ general_r = stats.mstats.pearsonr(self.merger_df.asset_return, self.merger_df[index])[0]
3070
+
3071
+ dict_res = {
3072
+ 'index':index,
3073
+ 'general_beta':general_beta,
3074
+ 'general_r':general_r,
3075
+ }
3076
+
3077
+ if subsample_ts:
3078
+ tmp_df = self.merger_df.iloc[-subsample_ts:,:].dropna()
3079
+ huber_regr = HuberRegressor(fit_intercept = True)
3080
+ huber_regr.fit(tmp_df.asset_return.values.reshape(-1,1), tmp_df[index_pct].values.reshape(-1,1))
3081
+ sample_beta, a = huber_regr.coef_[0], huber_regr.intercept_
3082
+ sample_r = stats.mstats.pearsonr(tmp_df.asset_return, tmp_df[index])[0]
3083
+ dict_res['sample_beta'] = sample_beta
3084
+ dict_res['sample_r'] = sample_r
3085
+
3086
+ result.append(dict_res)
3087
+
3088
+ self.states_result = result
3089
+
3090
+
3091
+ def get_relevant_beta(data_market, ticket_name, show_plot = True, save_path = False, save_aws = False, aws_credentials = False):
3092
+ '''
3093
+ select relevant beta result data of a given asset
3094
+
3095
+ Parameters:
3096
+ data_market (pd.DataFrame): dataframe of the market results
3097
+ ticket_name (str): name of the asset
3098
+ show_plot (bool): If tru, plot results
3099
+ save_path (str): local path for saving e.g r'C:/path/to/the/file/'
3100
+ save_aws (str): remote key in s3 bucket path e.g. 'path/to/file/'
3101
+ aws_credentials (dict): dict of the aws credentials
3102
+
3103
+ Returns:
3104
+ selection (pd.DataFrame): dataframe of the most relevant beta
3105
+ '''
3106
+ all_betas = data_market[data_market.asset == ticket_name].sort_values('general_r', ascending = False)
3107
+ all_betas['gen_r2'] = all_betas.general_r ** 2
3108
+ all_betas['sampl_r2'] = all_betas.sample_r ** 2
3109
+ selection = all_betas.sort_values('gen_r2',ascending =False).head(2).sort_values('sampl_r2',ascending =False).head(1).drop(columns = ['gen_r2','sampl_r2'])
3110
+
3111
+ if show_plot:
3112
+ print(selection)
3113
+ if save_path:
3114
+ result_plot_name = f'market_best_fit.csv'
3115
+ selection.to_csv(save_path+result_plot_name)
3116
+
3117
+ if save_path and save_aws:
3118
+ # upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = f'market_plots/{ticket_name}/'+result_plot_name,input_path = save_path+result_plot_name)
3119
+ upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = save_aws + result_plot_name, input_path = save_path + result_plot_name, aws_credentials = aws_credentials)
3120
+ return selection
3121
+
3122
+ #### to deprecate
3123
+
3124
+ def sharpe_ratio(return_series):
3125
+
3126
+ '''
3127
+ calculate sharpe ratio for given array.
3128
+
3129
+ Parameters:
3130
+ return_series (pd.series): pandas series of the asset returns
3131
+
3132
+ Returns:
3133
+ sharpe (float): sharpe ratio
3134
+ '''
3135
+
3136
+ N = 255 # Trading days in the year (change to 365 for crypto)
3137
+ rf = 0.005 # Half a percent risk free rare
3138
+ mean = return_series.mean() * N -rf
3139
+ sigma = return_series.std() * np.sqrt(N)
3140
+ sharpe = round(mean / sigma, 3)
3141
+ return sharpe
3142
+
3143
+ class signal_analyser_object:
3144
+ """
3145
+ class that is going to analyse signals
3146
+
3147
+ Attributes
3148
+ ----------
3149
+ data : pd.DataFrame
3150
+ symbol of the asset
3151
+ ticket_name :str
3152
+ asset symbol
3153
+ show_plot : boolean
3154
+ if true show plot for every method
3155
+ save_path : str
3156
+ if true, save results in file
3157
+ save_aws : str
3158
+ if true, export results to remote repo
3159
+ aws_credentials : dict
3160
+ credentials for aws
3161
+ return_fig : boolean
3162
+ if true, methods will return objects
3163
+ create_backtest_signal(days_strategy=list, test_size=int, feature_name=str, high_exit=float, low_exit=float):
3164
+ perform backtest signal analysis
3165
+
3166
+ Methods
3167
+ -------
3168
+ signal_analyser(test_size=int, feature_name=str, days_list=list, threshold=float,verbose=boolean, signal_position=boolean):
3169
+ perform signal analysis and feature extraction
3170
+
3171
+ """
3172
+
3173
+ def __init__(self, data,symbol_name, show_plot = True, save_path = False, save_aws = False, aws_credentials = False, return_fig = False):
3174
+ """
3175
+ Initialize object
3176
+
3177
+ Parameters
3178
+ ----------
3179
+ data (pd.DataFrame): data
3180
+ ticket_name (str): name of the asset
3181
+ show_plot (boolean): if true show plot for every method
3182
+ save_path (str): if true, save results in file e.g r'C:/path/to/the/file/'
3183
+ save_aws (str): if true, export results to remote repo e.g. 'path/to/file/'
3184
+ aws_credentials (dict): credentials for aws
3185
+ return_fig (boolean): if true, methods will return objects
3186
+
3187
+ Returns
3188
+ -------
3189
+ None
3190
+ """
3191
+ self.data = data.copy()
3192
+ self.ticket_name = symbol_name
3193
+ self.show_plot = show_plot
3194
+ self.save_path = save_path
3195
+ self.save_aws = save_aws
3196
+ self.aws_credentials = aws_credentials
3197
+ self.return_fig = return_fig
3198
+
3199
+ def signal_analyser(self, test_size, feature_name, days_list, threshold = 0.05,verbose = False, signal_position = False):
3200
+ """
3201
+ perform signal analysis and feature extraction
3202
+
3203
+ Parameters
3204
+ ----------
3205
+ test_size (int): test data size
3206
+ feature_name (str): name of the feature to assess
3207
+ days_list (list): list of integers [3,8,10] to assess
3208
+ threshold (float): alpha or z threshold
3209
+ verbose (boolean): print metrics
3210
+ signal_position (int): if true, the signal is taken at the given step after the signal end
3211
+
3212
+ Returns
3213
+ -------
3214
+ None
3215
+ """
3216
+ data = self.data
3217
+ self.feature_name = feature_name
3218
+ up_signal, low_signal= f'signal_up_{feature_name}', f'signal_low_{feature_name}'
3219
+ features_base = ['Date', up_signal, low_signal, 'Close']
3220
+
3221
+ df = data[features_base].sort_values('Date').iloc[0:-test_size,:]
3222
+ returns_list = list()
3223
+
3224
+ for days in days_list:
3225
+
3226
+ feature_ = f'return_{days}d'
3227
+ days = days + signal_position if signal_position else days
3228
+ df[feature_] = (df['Close'].shift(-days)/df['Close']-1)*100
3229
+ returns_list.append(feature_)
3230
+
3231
+ df['signal_type'] = np.where(
3232
+ df[up_signal] == 1,
3233
+ 'up',
3234
+ np.where(
3235
+ df[low_signal] == 1,
3236
+ 'down',
3237
+ None
3238
+ )
3239
+ )
2909
3240
  df = df[~df.signal_type.isna()]
2910
3241
  df['lag_Date'] = df['Date'].shift(1)
2911
3242
  df['lag_signal_type'] = df['signal_type'].shift(1)
@@ -3173,332 +3504,3 @@ class signal_analyser_object:
3173
3504
 
3174
3505
  if self.return_fig:
3175
3506
  return fig, messages
3176
-
3177
- def execute_signal_analyser(test_data_size, feature_name, days_list, configuration, method, object_stock, signal_analyser_object, plot = False, backtest= False, exit_params = {}):
3178
- '''
3179
- code snippet that is going run some objects. The analysis is signal analyse which is backtesting
3180
-
3181
- Parameters:
3182
- test_data_size (int): test data size
3183
- feature_name (str): name of the feature to assess
3184
- days_list (list): tome scope to assess the returns
3185
- configuration (dict): parameters of the method to run
3186
- object_stock (obj): object with data to assess
3187
- signal_analyser_object (obj): signal_analyser object
3188
- plot (boolean): if true, plot results
3189
- backtest (boolean): if true, run backtest
3190
- exit_params (dict): parameters of exit returns
3191
-
3192
- Returns:
3193
- mean_median_return (float): median return of the backtests
3194
- '''
3195
- method(**configuration)
3196
- signal_assess = signal_analyser_object(object_stock.df,object_stock.stock_code,show_plot = plot)
3197
- signal_assess.signal_analyser(test_size = test_data_size, feature_name = feature_name, days_list = days_list, threshold = 1)
3198
-
3199
- if backtest:
3200
- print('-----------------------back test ---------------------------')
3201
- signal_assess.create_backtest_signal(backtest, test_data_size, feature_name, **exit_params )
3202
-
3203
- return signal_assess.mean_median_return
3204
-
3205
- def iterate_signal_analyser(test_data_size,feature_name, days_list, arguments_to_test, method, object_stock, signal_analyser_object, plot = True):
3206
- '''
3207
- code snippet is going to iterate signal analyser
3208
-
3209
- Parameters:
3210
- test_data_size (int): test data size
3211
- feature_name (str): name of the feature to assess
3212
- days_list (list): tome scope to assess the returns
3213
- arguments_to_test: parameters to test
3214
- method: methods to run
3215
- object_stock (obj): object with data to assess
3216
- signal_analyser_object (obj): signal_analyser object
3217
- plot (boolean): if true, plot results
3218
-
3219
- Returns:
3220
- best_result (int): index from the arguments_to_test with the best result
3221
- '''
3222
- results = list()
3223
- for key in arguments_to_test.keys():
3224
- configuration = arguments_to_test.get(key)
3225
- mean_median_return = execute_signal_analyser(test_data_size, feature_name, days_list, configuration, method, object_stock, signal_analyser_object)
3226
- results.append(mean_median_return)
3227
-
3228
- df_result = pd.DataFrame({'keys':arguments_to_test.keys(),'results':results})
3229
- if plot:
3230
- plt.plot(df_result['keys'], df_result['results'])
3231
- plt.scatter(df_result['keys'], df_result['results'])
3232
- plt.title('simulation between configurations')
3233
- plt.ylabel('median expected return')
3234
- plt.show()
3235
-
3236
- best_result = df_result.sort_values('results',ascending = False)['keys'].values[0]
3237
- return best_result
3238
-
3239
- class analyse_index(stock_eda_panel):
3240
- """
3241
- class that is going to train hmm models to perform feature selection
3242
-
3243
- Attributes
3244
- ----------
3245
- data_index : pd.DataFrame
3246
- name of the index
3247
- indexes: list
3248
- list of indexes
3249
- asset : str
3250
- name of the asset
3251
- n_obs : int
3252
- number of rows to extract
3253
- lag : int
3254
- lag to apply
3255
- data_window : str
3256
- 5y 10y 15y
3257
- show_plot : bool
3258
- If True, show plots
3259
- save_path : str
3260
- local path for saving e.g r'C:/path/to/the/file/'
3261
- save_aws : str
3262
- remote key in s3 bucket path e.g. 'path/to/file/'
3263
- aws_credentials : dict
3264
- dict with the aws credentials
3265
- merger_df : pd.DataFrame
3266
- dataframe with the index and asset data
3267
- states_result = dict
3268
- betas and correlation score results
3269
-
3270
- Methods
3271
- -------
3272
- process_data():
3273
- using stock_eda_panel, get data and merge data
3274
- plot_betas(sample_size=int, offset=int, subsample_ts=int):
3275
- display beta analysis plot
3276
- get_betas(subsample_ts=int)
3277
- get general beta and last sample beta, correlation score is included too
3278
- """
3279
- def __init__(self, index_data, asset, n_obs, lag, data_window = '5y', show_plot = False, save_path = False, save_aws = False, aws_credentials = False, return_fig = False):
3280
- """
3281
- Initialize object
3282
-
3283
- Parameters
3284
- ----------
3285
- index_data (pd.DataFrame or str): index data dataframe or index string
3286
- asset (str): name of the asset
3287
- n_obs (int): number of rows to extract
3288
- lag (int): lag to apply
3289
- data_window (str): 5y 10y 15y
3290
- show_plot (bool): If True, show plots
3291
- save_path (str): local path for saving e.g r'C:/path/to/the/file/'
3292
- save_aws (str): remote key in s3 bucket path e.g. 'path/to/file/'
3293
- aws_credentials (dict): dict with the aws credentials
3294
-
3295
- Returns
3296
- -------
3297
- None
3298
- """
3299
-
3300
-
3301
- if type(index_data) != str:
3302
- index_data['Date'] = pd.to_datetime(index_data['Date'])
3303
- self.index_data = index_data
3304
- self.indexes = [ x for x in list(index_data.columns) if x != 'Date']
3305
- else:
3306
- self.indexes = [index_data]
3307
-
3308
- self.index_data = index_data
3309
- self.asset = asset
3310
- self.n_obs = n_obs
3311
- self.data_window = data_window
3312
- self.lag = lag
3313
-
3314
- self.show_plot = show_plot
3315
- self.return_fig = return_fig
3316
- self.save_path = save_path
3317
- self.save_aws = save_aws
3318
-
3319
- def process_data(self):
3320
- """
3321
- using stock_eda_panel, get data and merge data
3322
-
3323
- Parameters
3324
- ----------
3325
- None
3326
-
3327
- Returns
3328
- -------
3329
- None
3330
- """
3331
- asset = stock_eda_panel(self.asset, self.n_obs, data_window=self.data_window)
3332
- asset.get_data()
3333
- df = asset.df[['Date','Close']]
3334
-
3335
- if type(self.index_data) != str:
3336
- df_merge = df.merge(self.index_data, on = ['Date'], how = 'left').sort_values('Date')
3337
-
3338
- else:
3339
- indx = stock_eda_panel(self.index_data, self.n_obs, data_window=self.data_window)
3340
- indx.get_data()
3341
- indx_df = indx.df[['Date','Close']].rename(columns = {'Close':self.index_data})
3342
- df_merge = df.merge(indx_df, on = ['Date'], how = 'left').sort_values('Date')
3343
-
3344
- for colx in ['Close'] + self.indexes:
3345
- df_merge[f'{colx}_pct'] = df_merge[colx]/df_merge[colx].shift(self.lag) - 1
3346
-
3347
- df_merge.dropna(inplace = True)
3348
- self.merger_df = df_merge.rename(columns = {'Close_pct': 'asset_return'})
3349
-
3350
- def plot_betas(self,sample_size, offset, subsample_ts =False, index = False):
3351
- """
3352
- display beta analysis plot
3353
-
3354
- Parameters
3355
- ----------
3356
- sample_size (int): number of days or window size to calculate beta
3357
- offset (int): overlap between windows
3358
- subsample_ts (int): subsample size of data
3359
-
3360
- Returns
3361
- -------
3362
- None
3363
- """
3364
- if (type(self.index_data) == str) & (index != False):
3365
- raise Exception("No need of index argument")
3366
- else:
3367
- index = self.indexes[0]
3368
-
3369
- index_pct = f'{index}_pct'
3370
- ### ploting analysis
3371
- figure, ax = plt.subplot_mosaic(
3372
- [["scatter_total", "scatter_sample",'ts','ts']],
3373
- layout="constrained",
3374
- figsize=(18, 5)
3375
- )
3376
-
3377
- ax['scatter_total'].scatter(self.merger_df.asset_return, self.merger_df[index_pct])
3378
-
3379
- huber_regr = HuberRegressor(fit_intercept = True)
3380
- huber_regr.fit(self.merger_df.asset_return.values.reshape(-1,1), self.merger_df[index_pct].values.reshape(-1,1))
3381
- b, a = huber_regr.coef_[0], huber_regr.intercept_
3382
-
3383
- # b, a = np.polyfit(self.merger_df.asset_return, self.merger_df[index_pct], 1)
3384
- ax['scatter_total'].plot(self.merger_df.asset_return, b*self.merger_df.asset_return+a, color='red')
3385
-
3386
- ax['ts'].plot(self.merger_df.Date, self.merger_df.Close, color = 'grey', alpha = 0.3)
3387
-
3388
- if subsample_ts:
3389
- self.merger_df = self.merger_df.iloc[-subsample_ts:,:].dropna()
3390
-
3391
- for i in range(0,len(self.merger_df)-sample_size,offset):
3392
-
3393
- merger_ = self.merger_df.sort_values('Date', ascending = False).iloc[i:i+sample_size,:]
3394
- x = merger_[index_pct]
3395
- y = merger_.asset_return
3396
- # b, a = np.polyfit(x,y, 1)
3397
- huber_regr = HuberRegressor(fit_intercept = True)
3398
- huber_regr.fit(x.values.reshape(-1,1), y.values.reshape(-1,1))
3399
- b, a = huber_regr.coef_[0], huber_regr.intercept_
3400
-
3401
- normalize = mcolors.Normalize(vmin=-1, vmax=1)
3402
- colormap = cm.jet
3403
-
3404
- ax['scatter_sample'].plot(x, y,'o', color = 'blue', alpha = 0.1)
3405
- ax['scatter_sample'].plot(x, b*x+a, color=colormap(normalize(b)))
3406
- ax['scatter_sample'].set_xlim(-0.06, 0.06)
3407
- ax['scatter_sample'].set_ylim(-0.06, 0.06)
3408
-
3409
- plot = ax['ts'].scatter(merger_.Date, merger_.Close, color=colormap(normalize(b)), s = 10)
3410
-
3411
- scalarmappaple = cm.ScalarMappable(norm=normalize, cmap=colormap)
3412
- scalarmappaple.set_array(x)
3413
-
3414
- plt.title(f'{self.asset} using index: {index}')
3415
- plt.colorbar(scalarmappaple)
3416
-
3417
- if self.show_plot:
3418
- plt.show()
3419
-
3420
- if self.save_path:
3421
- result_plot_name = f'market_best_fit.png'
3422
- figure.savefig(self.save_path+result_plot_name)
3423
-
3424
- if self.save_path and self.save_aws:
3425
- # upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = f'market_plots/{self.asset}/'+result_plot_name,input_path = self.save_path+result_plot_name)
3426
- upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = self.save_aws + result_plot_name, input_path = self.save_path + result_plot_name, aws_credentials = self.aws_credentials)
3427
-
3428
- if not self.show_plot:
3429
- plt.close()
3430
-
3431
- if self.return_fig:
3432
- return figure
3433
-
3434
- def get_betas(self,subsample_ts=False):
3435
- """
3436
- get general beta and last sample beta, correlation score is included too
3437
-
3438
- Parameters
3439
- ----------
3440
- subsample_ts (int): subsample size of data
3441
-
3442
- Returns
3443
- -------
3444
- None
3445
- """
3446
- result = list()
3447
- for index in self.indexes:
3448
-
3449
- index_pct = f'{index}_pct'
3450
- huber_regr = HuberRegressor(fit_intercept = True)
3451
- huber_regr.fit(self.merger_df.asset_return.values.reshape(-1,1), self.merger_df[index_pct].values.reshape(-1,1))
3452
- general_beta, a = huber_regr.coef_[0], huber_regr.intercept_
3453
- general_r = stats.mstats.pearsonr(self.merger_df.asset_return, self.merger_df[index])[0]
3454
-
3455
- dict_res = {
3456
- 'index':index,
3457
- 'general_beta':general_beta,
3458
- 'general_r':general_r,
3459
- }
3460
-
3461
- if subsample_ts:
3462
- tmp_df = self.merger_df.iloc[-subsample_ts:,:].dropna()
3463
- huber_regr = HuberRegressor(fit_intercept = True)
3464
- huber_regr.fit(tmp_df.asset_return.values.reshape(-1,1), tmp_df[index_pct].values.reshape(-1,1))
3465
- sample_beta, a = huber_regr.coef_[0], huber_regr.intercept_
3466
- sample_r = stats.mstats.pearsonr(tmp_df.asset_return, tmp_df[index])[0]
3467
- dict_res['sample_beta'] = sample_beta
3468
- dict_res['sample_r'] = sample_r
3469
-
3470
- result.append(dict_res)
3471
-
3472
- self.states_result = result
3473
-
3474
-
3475
- def get_relevant_beta(data_market, ticket_name, show_plot = True, save_path = False, save_aws = False, aws_credentials = False):
3476
- '''
3477
- select relevant beta result data of a given asset
3478
-
3479
- Parameters:
3480
- data_market (pd.DataFrame): dataframe of the market results
3481
- ticket_name (str): name of the asset
3482
- show_plot (bool): If tru, plot results
3483
- save_path (str): local path for saving e.g r'C:/path/to/the/file/'
3484
- save_aws (str): remote key in s3 bucket path e.g. 'path/to/file/'
3485
- aws_credentials (dict): dict of the aws credentials
3486
-
3487
- Returns:
3488
- selection (pd.DataFrame): dataframe of the most relevant beta
3489
- '''
3490
- all_betas = data_market[data_market.asset == ticket_name].sort_values('general_r', ascending = False)
3491
- all_betas['gen_r2'] = all_betas.general_r ** 2
3492
- all_betas['sampl_r2'] = all_betas.sample_r ** 2
3493
- selection = all_betas.sort_values('gen_r2',ascending =False).head(2).sort_values('sampl_r2',ascending =False).head(1).drop(columns = ['gen_r2','sampl_r2'])
3494
-
3495
- if show_plot:
3496
- print(selection)
3497
- if save_path:
3498
- result_plot_name = f'market_best_fit.csv'
3499
- selection.to_csv(save_path+result_plot_name)
3500
-
3501
- if save_path and save_aws:
3502
- # upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = f'market_plots/{ticket_name}/'+result_plot_name,input_path = save_path+result_plot_name)
3503
- upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = save_aws + result_plot_name, input_path = save_path + result_plot_name, aws_credentials = aws_credentials)
3504
- return selection
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: virgo-modules
3
- Version: 0.0.90
3
+ Version: 0.1.0
4
4
  Summary: data processing and statistical modeling using stock market data
5
5
  Home-page: https://github.com/miguelmayhem92/virgo_module
6
6
  Author: Miguel Mayhuire
@@ -9,6 +9,7 @@ virgo_app/virgo_modules.egg-info/requires.txt
9
9
  virgo_app/virgo_modules.egg-info/top_level.txt
10
10
  virgo_app/virgo_modules/src/__init__.py
11
11
  virgo_app/virgo_modules/src/aws_utils.py
12
+ virgo_app/virgo_modules/src/backtester.py
12
13
  virgo_app/virgo_modules/src/edge_utils.py
13
14
  virgo_app/virgo_modules/src/pull_artifacts.py
14
15
  virgo_app/virgo_modules/src/re_utils.py
File without changes
File without changes
File without changes