virgo-modules 0.0.72__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
virgo_modules/__init__.py CHANGED
@@ -0,0 +1 @@
1
+ __version__ = "0.9.0"
@@ -6,14 +6,36 @@ import pandas as pd
6
6
 
7
7
 
8
8
  def upload_file_to_aws(bucket,key,input_path, aws_credentials):
9
-
9
+ '''
10
+ upload file from a folder to an s3 folder
11
+
12
+ Parameters:
13
+ bucket (str): bucket name
14
+ key (str): key pattern or folder in s3 e.g. path/to/upload/
15
+ input_path (str): input path of the file to upload e.g. path/to/upload.txt
16
+ aws_credentials (dict): aws credentials dictionary
17
+
18
+ Returns:
19
+ None
20
+ '''
10
21
  session = boto3.Session(aws_access_key_id=aws_credentials['AWS_ACCESS_KEY_ID'],aws_secret_access_key=aws_credentials['AWS_SECRET_ACCESS_KEY'])
11
22
  bucket = aws_credentials[bucket]
12
23
  s3 = session.resource('s3')
13
24
  s3.meta.client.upload_file(Filename=input_path , Bucket=bucket, Key=key)
14
25
 
15
26
  def upload_pandas_to_s3(data_frame,bucket,key, aws_credentials):
27
+ '''
28
+ upload dataframe as csv to an s3 folder
16
29
 
30
+ Parameters:
31
+ data_frame (pd.DataFrame): data
32
+ bucket (str): bucket name
33
+ key (str): key pattern or folder in s3 e.g. path/to/upload/
34
+ aws_credentials (dict): aws credentials dictionary
35
+
36
+ Returns:
37
+ None
38
+ '''
17
39
  csv_buffer = StringIO()
18
40
  data_frame.to_csv(csv_buffer)
19
41
  csv_buffer.seek(0)
@@ -23,7 +45,17 @@ def upload_pandas_to_s3(data_frame,bucket,key, aws_credentials):
23
45
  s3.put_object(Bucket=bucket, Body=csv_buffer.getvalue(), Key= key)
24
46
 
25
47
  def download_file_to_aws(bucket,key, aws_credentials):
26
-
48
+ '''
49
+ download csv file from s3 folder
50
+
51
+ Parameters:
52
+ bucket (str): bucket name
53
+ key (str): key pattern or folder in s3 e.g. path/to/download/file.csv
54
+ aws_credentials (dict): aws credentials dictionary
55
+
56
+ Returns:
57
+ None
58
+ '''
27
59
  s3c = boto3.client(
28
60
  's3',
29
61
  region_name = aws_credentials['AWS_DEFAULT_REGION'],
@@ -31,5 +63,5 @@ def download_file_to_aws(bucket,key, aws_credentials):
31
63
  aws_secret_access_key = aws_credentials['AWS_SECRET_ACCESS_KEY']
32
64
  )
33
65
  obj = s3c.get_object(Bucket= bucket , Key = key)
34
- df = pd.read_csv(BytesIO(obj['Body'].read()), encoding='utf8')
66
+ df = pd.read_csv(BytesIO(obj['Body'].read()), encoding='utf8', sep = ';')
35
67
  return df
@@ -0,0 +1,474 @@
1
+ import pandas as pd
2
+ import numpy as np
3
+ import json
4
+
5
+ import matplotlib.pyplot as plt
6
+ import matplotlib.gridspec as gridspec
7
+ import seaborn as sns; sns.set()
8
+
9
+ import warnings
10
+ warnings.filterwarnings('ignore')
11
+
12
+ from .aws_utils import upload_file_to_aws
13
+
14
+ def sharpe_ratio(return_series):
15
+
16
+ '''
17
+ calculate sharpe ratio for given array.
18
+
19
+ Parameters:
20
+ return_series (pd.series): pandas series of the asset returns
21
+
22
+ Returns:
23
+ sharpe (float): sharpe ratio
24
+ '''
25
+
26
+ N = 255 # Trading days in the year (change to 365 for crypto)
27
+ rf = 0.005 # Half a percent risk free rare
28
+ mean = return_series.mean() * N -rf
29
+ sigma = return_series.std() * np.sqrt(N)
30
+ sharpe = round(mean / sigma, 3)
31
+ return sharpe
32
+
33
+
34
+ class SignalAnalyserObject:
35
+ """
36
+ Class that produces back-tests analysis for a given feature
37
+
38
+ Attributes
39
+ ----------
40
+ symbol_name : str
41
+ stock or asset to assess
42
+ feature_name : str
43
+ test_size: int
44
+ testing data size
45
+ show_plot: boolean
46
+ save_path: str
47
+ if available, save result locally
48
+ save_aws: str
49
+ if available, save result locally
50
+ aws_credentials: dict
51
+ signal_position: int
52
+ if available, signal position to open a position
53
+ df: pd.DataFrame
54
+ transformed data of the selected feature to perform back-test
55
+ median_return: float
56
+ median return after end low signals
57
+
58
+ Methods
59
+ -------
60
+ signal_analyser(days_list=list):
61
+ given a signal position for either botton or roof signal, calculate the espected return and distributions for a time scope in the days list (time horizons)
62
+ create_backtest_signal(days_strategy=int, high_exit=float, low_exit=float, open_in_list=list):
63
+ create a back-test analysis using the test data using some opening anc closing postion criterias
64
+ """
65
+
66
+ def __init__(self, data,symbol_name, feature_name, test_size, signal_position = False, correct_signals = False, show_plot = True, save_path = False, save_aws = False, aws_credentials = False, return_fig = False):
67
+ """
68
+ Initialize object
69
+
70
+ Parameters
71
+ ----------
72
+ data (pd.DataFrame): data
73
+ ticket_name (str): name of the asset
74
+ feature_name (str): name of the features
75
+ test_size (int): size of the test data
76
+ signal_position (int): signal position to open the position, False by default
77
+ correct_signals (int): clean abnormal signals using interpolation
78
+ show_plot (boolean): if true show plot for every method
79
+ save_path (str): if true, save results in file e.g r'C:/path/to/the/file/'
80
+ save_aws (str): if true, export results to remote repo e.g. 'path/to/file/'
81
+ aws_credentials (dict): credentials for aws
82
+ return_fig (boolean): if true, methods will return objects
83
+
84
+ Returns
85
+ -------
86
+ None
87
+ """
88
+ self.ticket_name = symbol_name
89
+ self.feature_name=feature_name
90
+ self.test_size=test_size
91
+ self.show_plot = show_plot
92
+ self.save_path = save_path
93
+ self.save_aws = save_aws
94
+ self.aws_credentials = aws_credentials
95
+ self.return_fig = return_fig
96
+ self.signal_position = signal_position
97
+ ## preprocessing
98
+ up_signal, low_signal= f'signal_up_{feature_name}', f'signal_low_{feature_name}'
99
+ features_base = ['Date', up_signal, low_signal, 'Close','Open','High','Low']
100
+
101
+ df = data[features_base].sort_values('Date')
102
+
103
+ df['signal_type'] = np.where(
104
+ df[up_signal] == 1,
105
+ 'up',
106
+ np.where(
107
+ df[low_signal] == 1,
108
+ 'down',
109
+ 'no signal'
110
+ )
111
+ )
112
+ def correct_sygnals(df,correct_i = 1):
113
+ ### signal cleaning
114
+ for i in range(1+correct_i, len(df)-1):
115
+ start_i, end_i = i-(correct_i+1), i+1
116
+ dfw = df.iloc[start_i: end_i,]
117
+ before_type = dfw.iloc[0].signal_type
118
+ after_type = dfw.iloc[-1].signal_type
119
+ window_types = dfw.iloc[1:-1].signal_type.unique()
120
+ n_window_type = len(window_types)
121
+ if n_window_type == 1:
122
+ if (before_type == after_type) and (window_types[0] != after_type):
123
+ df.iloc[start_i+1: end_i-1, df.columns.get_loc('signal_type')] = before_type
124
+ return df.copy()
125
+
126
+ if correct_signals:
127
+ for correct_i in range(1,correct_signals+1):
128
+ df = correct_sygnals(df,correct_i = correct_i)
129
+ df[up_signal] = np.where(df['signal_type'] == 'up', 1,0)
130
+ df[low_signal] = np.where(df['signal_type'] == 'down', 1,0)
131
+
132
+ ## indexing chains
133
+ df['lag_signal_type'] = df['signal_type'].shift(1)
134
+ df['lag_Date'] = df['Date'].shift(1)
135
+ df['span'] = (pd.to_datetime(df['Date']) - pd.to_datetime(df['lag_Date'])).dt.days - 1
136
+ df['break'] = np.where((df['span'] > 3) & (df['lag_signal_type'] == df['signal_type']), 1, 0)
137
+ df['break'] = np.where((df['lag_signal_type'] != df['signal_type']), 1, df['break'])
138
+ df['chain_id'] = df.sort_values(['Date']).groupby(['break']).cumcount() + 1
139
+ df['chain_id'] = np.where(df['break'] == 1, df['chain_id'], np.nan )
140
+ df['chain_id'] = df['chain_id'].fillna(method = 'ffill')
141
+
142
+ df['internal_rn'] = df.sort_values(['Date']).groupby(['chain_id']).cumcount() + 1
143
+ df['inv_internal_rn'] = df.sort_values(['Date'],ascending = False).groupby(['chain_id']).cumcount() + 1
144
+
145
+ df['first_in_chain'] = np.where(df['internal_rn'] == 1, True, False)
146
+ df['last_in_chain'] = np.where(df['inv_internal_rn'] == 1, True, False)
147
+
148
+ df['span'] = (pd.to_datetime(df['Date']) - pd.to_datetime(df['lag_Date'])).dt.days - 1
149
+ self.df = df.drop(columns = ['span','break','lag_signal_type','lag_Date']).copy()
150
+
151
+ def signal_analyser(self, days_list):
152
+ """
153
+ Initialize object
154
+
155
+ Parameters
156
+ ----------
157
+ days_list (list): list of integers to calculate expected returns
158
+
159
+ Returns
160
+ -------
161
+ if returns_fig is true, returns a matplotlib fig
162
+ """
163
+ signal_position = self.signal_position
164
+ df = self.df.iloc[0:-self.test_size,:].copy()
165
+ returns_list = list()
166
+
167
+ for days in days_list:
168
+ feature_ = f'return_{days}d'
169
+ df[feature_] = (df['Close'].shift(-days)/df['Close']-1)*100
170
+ returns_list.append(feature_)
171
+
172
+ df['open_long'] = np.where(df.last_in_chain == True, True, np.nan)
173
+ df['open_short'] = np.where(df.first_in_chain == True, True, np.nan)
174
+ df.signal_type = df.signal_type.map({'up':'go down', 'down': 'go up'})
175
+
176
+ # median return
177
+ returns_list = [f'return_{days}d' for days in days_list]
178
+ df_melt = df[df.open_long == True].pivot_table(index=['signal_type'], values=returns_list, aggfunc='median')
179
+ df_melt['median'] = df_melt[returns_list].median(axis = 1)
180
+ self.median_return = df_melt.loc['go up', 'median']
181
+
182
+ # plotting
183
+ fig, axs = plt.subplots(1, 4, figsize = (20,5))
184
+ palette ={"go down": "tomato", "go up": "lightblue"}
185
+
186
+ df2 = df[df.signal_type.isin(['go down','go up'])]
187
+ df2['lag_Date'] = df2['Date'].shift(1)
188
+ df2['lag_signal_type'] = df2['signal_type'].shift(1)
189
+ df2 = df2[df2.lag_signal_type != df2.signal_type]
190
+ df2['span'] = (pd.to_datetime(df2['Date']) - pd.to_datetime(df2['lag_Date'])).dt.days - 1
191
+ sns.violinplot(data=df2, y="span",ax = axs[0], color = 'lightblue', linewidth=0.7,inner="quart")
192
+ sns.stripplot(data=df2, y="span",ax = axs[0], jitter=True, zorder=1)
193
+ axs[0].set_title('span between last signals')
194
+
195
+ df_ = df[df.last_in_chain == True]
196
+ df_['part'] = '-'
197
+ sns.violinplot(data=df_, y="internal_rn", x='part', ax = axs[1], hue="signal_type", inner="quart",palette = palette,gap=0.1, split=True, linewidth=0.7)
198
+ axs[1].set_title('signal duration distribution')
199
+
200
+ if signal_position:
201
+ for feature in returns_list:
202
+ df[feature] = df[feature].shift(-signal_position)
203
+
204
+ df_melt = df[df.open_long == 1].melt(id_vars=['signal_type'], value_vars=returns_list, var_name='time', value_name='value')
205
+ df_melt = df_melt.dropna()
206
+ sns.violinplot(data=df_melt, x="time", y="value", hue="signal_type",ax = axs[2], split=True, gap=0.1, inner="quart",palette = palette, linewidth=0.8)
207
+ axs[2].axhline(y=0, color='grey', linestyle='--')
208
+ axs[2].set_title('E. returns - end of the signal')
209
+
210
+ df_melt = df[df.open_short == 1].melt(id_vars=['signal_type'], value_vars=returns_list, var_name='time', value_name='value')
211
+ df_melt = df_melt.dropna()
212
+ sns.violinplot(data=df_melt, x="time", y="value", hue="signal_type",ax = axs[3], split=True, gap=0.1, inner="quart",palette = palette, linewidth=0.8)
213
+ axs[3].axhline(y=0, color='grey', linestyle='--')
214
+ axs[3].set_title('E. returns - start of the signal')
215
+
216
+ if self.show_plot:
217
+ plt.show()
218
+
219
+ if self.save_path:
220
+ result_plot_name = f'signals_strategy_distribution_{self.feature_name}.png'
221
+ fig.savefig(self.save_path+result_plot_name)
222
+ # pickle.dump(axs, open(self.save_path+result_plot_name, 'wb'))
223
+
224
+ if self.save_path and self.save_aws:
225
+ # upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = f'market_plots/{self.ticket_name}/'+result_plot_name, input_path = self.save_path+result_plot_name)
226
+ upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = self.save_aws + result_plot_name, input_path = self.save_path + result_plot_name, aws_credentials = self.aws_credentials)
227
+ if not self.show_plot:
228
+ plt.close()
229
+
230
+ del df
231
+
232
+ if self.return_fig:
233
+ return fig
234
+
235
+ def create_backtest_signal(self,days_strategy, high_exit = False, low_exit = False, open_in_list = ['down']):
236
+ """
237
+ Initialize object
238
+
239
+ Parameters
240
+ ----------
241
+ days_strategy (int): position horizon
242
+ high_exit (float): max threshold to close position
243
+ low_exit (float): min threshold to close position, this parameter has to be positive
244
+ open_in_list (list): list of strings ("down","up") to assess signals
245
+ Returns
246
+ -------
247
+ if returns_fig is true, returns a matplotlib fig and list of dicts containing analysis
248
+ """
249
+ asset_1 = 'Close'
250
+ up_signal, low_signal= f'signal_up_{self.feature_name}', f'signal_low_{self.feature_name}'
251
+ signal_position = self.signal_position
252
+ dft = self.df.iloc[-self.test_size:,:].reset_index(drop=True).copy()
253
+
254
+ dft['lrets_bench'] = np.log(dft[asset_1]/dft[asset_1].shift(1))
255
+ dft['bench_prod'] = dft['lrets_bench'].cumsum()
256
+ dft['bench_prod_exp'] = np.exp(dft['bench_prod']) - 1
257
+
258
+ map_ = {'down':'END LOW TREND', 'up': 'BEGINNING HIGH TREND'}
259
+
260
+ open_in_list_items = len(open_in_list)
261
+ fig, axs = plt.subplots(1,open_in_list_items, figsize = (7*open_in_list_items,6))
262
+ messages = list()
263
+ for i, open_in in enumerate(open_in_list):
264
+ axs_ = axs if open_in_list_items == 1 else axs[i]
265
+ if open_in == 'down':
266
+ dft['open_long'] = np.where((dft.last_in_chain == True) & (dft.signal_type == 'down'), True, np.nan) # open strat
267
+ elif open_in == 'up':
268
+ dft['open_long'] = np.where((dft.first_in_chain == True) & (dft.signal_type == 'up'), True, np.nan) # open strat
269
+
270
+ def chain_position(dft):
271
+ dft['open_long_id'] = np.where(dft['open_long'] == True, dft.chain_id, np.nan)
272
+ dft['open_long_id'] = dft['open_long_id'].fillna(method = 'ffill')
273
+ dft['open_long_rn'] = dft.sort_values(['Date']).groupby(['open_long_id']).cumcount() + 1
274
+ return dft
275
+
276
+ if signal_position:
277
+ dft['open_long'] = dft.sort_values(['Date'])['open_long'].shift(signal_position)
278
+
279
+ dft = chain_position(dft)
280
+ dft['flag'] = np.where(dft['open_long_rn'] < days_strategy, 1,0)
281
+
282
+ if high_exit and low_exit:
283
+ dft['open_strat'] = np.where(dft.open_long == True, dft.Open, np.nan) # open strat
284
+ dft['open_strat'] = dft['open_strat'].fillna(method = 'ffill')
285
+ dft['open_strat'] = np.where(dft.flag == 1, dft.open_strat, np.nan)
286
+ dft['high_strat_ret'] = (dft['High']/dft['open_strat']-1)*100
287
+ dft['low_strat_ret'] = (dft['Low']/dft['open_strat']-1)*100
288
+ dft['max_step_chain'] = dft.groupby(['open_long_id'])['open_long_rn'].transform('max')
289
+ dft['high_exit'] = np.where(((dft['high_strat_ret'] >= high_exit) | (dft['open_long_rn'] == days_strategy) | (dft['max_step_chain'] == dft['open_long_rn'])), 1, np.nan)
290
+ dft['low_exit'] = np.where((dft['low_strat_ret'] <= low_exit), -1, np.nan)
291
+
292
+ dft["exit_type"] = dft[["high_exit", "low_exit"]].max(axis=1)
293
+ dft['exit_type'] = np.where(dft["exit_type"] == 1, 1, np.where(dft["exit_type"] == -1,-1,np.nan))
294
+ dft['exit'] = np.where(dft['exit_type'].isnull(), np.nan, 1)
295
+ dft['exit_order'] = dft.sort_values(['Date']).groupby(['open_long_id','exit']).cumcount() + 1
296
+ dft['exit'] = np.where(dft['exit_order'] == 1, True, np.nan)
297
+ dft = dft.drop(columns = ['exit_order'])
298
+ ## if last signal is near
299
+ max_id = dft.open_long_id.max()
300
+ dft['max_internal_rn'] = dft.sort_values(['Date']).groupby(['open_long_id']).open_long_rn.transform('max')
301
+ dft['exit'] = np.where((dft.open_long_id == max_id) & (dft.max_internal_rn < days_strategy) & (dft.max_internal_rn == dft.open_long_rn), 1, dft['exit'])
302
+
303
+ dft['exit_step'] = np.where(dft.exit == 1, dft.open_long_rn, np.nan)
304
+ dft['exit_step'] = dft.sort_values(['Date']).groupby(['open_long_id']).exit_step.transform('max')
305
+
306
+ dft['flag'] = np.where(dft.open_long_rn <= dft.exit_step, 1, 0)
307
+
308
+ dft['lrets_strat'] = np.log(dft[asset_1].shift(-1)/dft[asset_1]) * dft['flag']
309
+ dft['lrets_strat'] = np.where(dft['lrets_strat'].isna(),-0.0,dft['lrets_strat'])
310
+ dft['lrets_prod'] = dft['lrets_strat'].cumsum()
311
+ dft['strat_prod_exp'] = np.exp(dft['lrets_prod']) - 1
312
+
313
+ bench_rets = round(dft['bench_prod_exp'].values[-1]*100,1)
314
+ strat_rets = round(dft['strat_prod_exp'].values[-1]*100,1)
315
+
316
+ bench_sr = round(sharpe_ratio(dft.bench_prod_exp.dropna()),1)
317
+ strat_sr = round(sharpe_ratio(dft.strat_prod_exp.dropna()),1)
318
+
319
+ message1 = f'{bench_rets}%'
320
+ message2 = f'{strat_rets}%'
321
+
322
+ messages_ = {
323
+ 'type strategy':map_[open_in],
324
+ 'benchmark return:':message1,
325
+ 'benchmark sharpe ratio:': bench_sr,
326
+ 'strategy return:':message2,
327
+ 'strategy sharpe ratio:': strat_sr,
328
+ }
329
+ messages.append(messages_)
330
+ if self.show_plot:
331
+ print('----------------------------')
332
+ print(messages_)
333
+ print('----------------------------')
334
+
335
+
336
+ axs_.plot(dft.bench_prod_exp.values, label = 'benchmark', color = 'steelblue')
337
+ axs_.scatter(range(len(dft)),np.where(dft[low_signal] == 1,dft.bench_prod_exp.values,np.nan),color = 'red', label = 'signal')
338
+ axs_.scatter(range(len(dft)),np.where(dft[up_signal] == 1,dft.bench_prod_exp.values,np.nan),color = 'green', label = 'signal')
339
+ axs_.plot(dft.strat_prod_exp.values, label = 'strategy', color = 'darksalmon')
340
+ axs_.set_xlabel("index")
341
+ axs_.set_ylabel("comulative return")
342
+ axs_.set_title(f'{map_[open_in]} strategy and cumulative returns')
343
+ axs_.legend()
344
+
345
+ if self.show_plot:
346
+ plt.plot()
347
+
348
+ if self.save_path:
349
+ result_json_name = f'signals_strategy_return_{self.feature_name}.json'
350
+ result_plot_name = f'signals_strategy_return_{self.feature_name}.png'
351
+
352
+ plt.savefig(self.save_path+result_plot_name)
353
+
354
+ with open(self.save_path+result_json_name, "w") as outfile:
355
+ json.dump(messages, outfile)
356
+
357
+ if self.save_path and self.save_aws:
358
+
359
+ upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = self.save_aws + result_json_name, input_path = self.save_path + result_json_name, aws_credentials = self.aws_credentials)
360
+ upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = self.save_aws + result_plot_name, input_path = self.save_path + result_plot_name, aws_credentials = self.aws_credentials)
361
+
362
+ if not self.show_plot:
363
+ plt.close()
364
+
365
+ del dft
366
+
367
+ if self.return_fig:
368
+ return fig, messages
369
+
370
+ class IterateSignalAnalyse(SignalAnalyserObject):
371
+ """
372
+ object that is going to iterate backtest given a parameter space
373
+
374
+ Attributes
375
+ ----------
376
+ test_data_size : int
377
+ feature_name : str
378
+ days_list: list
379
+ list of integers that serve as time horizons
380
+ arguments_to_test : dict
381
+ paramter space
382
+ method: str
383
+ method to use
384
+ object_stock: obj
385
+ object containing data and methods
386
+ plot: boolean
387
+ show summary plot of median results
388
+ best_result: float
389
+ index of the best result, the index corresponds to the parameter space
390
+
391
+ Methods
392
+ -------
393
+ execute(show_plot_iter=boolean):
394
+ display plots for every iteration
395
+ """
396
+ def __init__(self, test_data_size, feature_name, days_list, arguments_to_test, method, object_stock, plot = False):
397
+ """
398
+ Parameters
399
+ ----------
400
+ test_data_size (int): size of the test data
401
+ feature_name (str): name of the feature
402
+ days_list (list): list of integers that serve as time horizons
403
+ arguments_to_test (dict): paramter space
404
+ method (str): method to use
405
+ object_stock (obj): object containing data and methods
406
+ plot (boolean): show summary plot of median results
407
+
408
+ Returns
409
+ -------
410
+ None
411
+ """
412
+ self.test_data_size = test_data_size
413
+ self.feature_name = feature_name
414
+ self.days_list = days_list
415
+ self.arguments_to_test = arguments_to_test
416
+ self.method = method
417
+ self.plot = plot
418
+ self.object_stock = object_stock
419
+
420
+ def execute(self,show_plot_iter = False):
421
+ """
422
+ Iterate backtest and compute median result for every iteration
423
+
424
+ Parameters
425
+ ----------
426
+ show_plot_iter (boolean): display plots for every iteration
427
+
428
+ Returns
429
+ -------
430
+ None
431
+ """
432
+ results = list()
433
+ for key in self.arguments_to_test.keys():
434
+ configuration = self.arguments_to_test.get(key)
435
+ getattr(self.object_stock, self.method)(**configuration)
436
+ signal_assess = SignalAnalyserObject(self.object_stock.df, self.object_stock.stock_code, show_plot = show_plot_iter, test_size = self.test_data_size, feature_name = self.feature_name)
437
+ signal_assess.signal_analyser(days_list = self.days_list)
438
+ mean_median_return = signal_assess.median_return
439
+ results.append(mean_median_return)
440
+
441
+ df_result = pd.DataFrame({'keys':self.arguments_to_test.keys(),'results':results})
442
+ if self.plot:
443
+ plt.plot(df_result['keys'], df_result['results'])
444
+ plt.scatter(df_result['keys'], df_result['results'])
445
+ plt.title('simulation between configurations')
446
+ plt.ylabel('median expected return')
447
+ plt.show()
448
+
449
+ best_result = df_result.sort_values('results',ascending = False)['keys'].values[0]
450
+ self.best_result = best_result
451
+
452
+ def execute_signal_analyser(test_data_size, feature_name, days_list, configuration, method, object_stock, analyser_object, plot = False, backtest= False, exit_params = {}):
453
+ '''
454
+ code snippet that is going run backtest and display analysis messages and plots
455
+
456
+ Parameters:
457
+ test_data_size (int): test data size
458
+ feature_name (str): name of the feature to assess
459
+ days_list (list): tome scope to assess the returns
460
+ configuration (dict): parameters of the method to run
461
+ object_stock (obj): object with data to assess
462
+ method (str): method to use
463
+ analyser_object (obj): signal_analyser object
464
+ plot (boolean): if true, plot results
465
+ backtest (boolean): if true, run backtest
466
+ exit_params (dict): parameters of exit returns
467
+
468
+ Returns:
469
+ None
470
+ '''
471
+ getattr(object_stock, method)(**configuration)
472
+ signal_assess = analyser_object(object_stock.df,object_stock.stock_code,show_plot = plot, feature_name = feature_name, test_size = test_data_size)
473
+ signal_assess.signal_analyser(days_list = days_list)
474
+ signal_assess.create_backtest_signal(backtest, open_in_list = ['down','up'], **exit_params )
File without changes
@@ -0,0 +1,106 @@
1
+ from plotly.subplots import make_subplots
2
+ import plotly.graph_objects as go
3
+ from sklearn.pipeline import Pipeline
4
+ import mlflow
5
+ import pandas as pd
6
+ import numpy as np
7
+ from sklearn.base import BaseEstimator, ClassifierMixin
8
+ from mapie.classification import SplitConformalClassifier
9
+
10
+ class ConformalStack(mlflow.pyfunc.PythonModel):
11
+ def __init__(self, model,targets, alphas):
12
+ self.model = model
13
+ self.targets = targets
14
+ self.alphas = alphas
15
+ def fit(self, data):
16
+ self.classifiers = dict()
17
+ for i,target in enumerate(self.targets):
18
+ st = SingleStack(self.model["model"],i)
19
+ st.fit()
20
+ seg_model = Pipeline([
21
+ ('pipe',self.model['pipe_transform']),
22
+ ('modelbase',st)
23
+ ])
24
+ mapie_class = SplitConformalClassifier(seg_model, prefit=True, random_state=123, conformity_score="lac", confidence_level=1-np.array(self.alphas))
25
+ mapie_class.conformalize(data, data[self.targets[i]].values)
26
+ self.classifiers[target] = mapie_class
27
+ def predict_conformal(self, data, ):
28
+ for target in self.targets:
29
+ prefix = target+"_conf"
30
+ _, y_pis = self.classifiers[target].predict_set(data)
31
+ for i,alpha in enumerate(self.alphas):
32
+ data[f'{prefix}-{alpha}'] = y_pis[:,1,i]
33
+ data[f'{prefix}-{alpha}'] = np.where(data[f'{prefix}-{alpha}'] == True,alpha,0)
34
+ return data
35
+
36
+
37
+ class SingleStack(ClassifierMixin, BaseEstimator):
38
+ def __init__(self, model, estimator_index):
39
+ self.model = model
40
+ self.estimator_index = estimator_index
41
+
42
+ def fit(self):
43
+ self._is_fitted = True
44
+ self.classes_ = [0,1]
45
+
46
+ def predict_proba(self, X):
47
+ metas_pred = dict()
48
+ for i,cont in enumerate(self.model.estimators, start=1):
49
+ _,estimator = cont
50
+ meta_pred = estimator.predict_proba(X)
51
+ metas_pred[f"meta{i}0"] = meta_pred[0][:,1]
52
+ metas_pred[f"meta{i}1"] = meta_pred[1][:,1]
53
+ self.meta_preds_df__ = pd.DataFrame(metas_pred)
54
+
55
+ prediction_vector = list()
56
+ for i,cont in enumerate(self.model.meta_estimators, start=0):
57
+ _,estimator = cont
58
+ metacols = [f"meta{j}{i}" for j in range(1,len(self.model.estimators)+1)]
59
+ preds = estimator.predict_proba(self.meta_preds_df__[metacols].values)
60
+ prediction_vector.append(preds)
61
+ return prediction_vector[self.estimator_index]
62
+
63
+ def predict(self, X):
64
+ prediction_vector = list()
65
+ _ = self.predict_proba(X)
66
+ for i,cont in enumerate(self.model.meta_estimators, start=0):
67
+ _,estimator = cont
68
+ metacols = [f"meta{j}{i}" for j in range(1,len(self.model.estimators)+1)]
69
+ preds = estimator.predict(self.meta_preds_df__[metacols].values)
70
+ prediction_vector.append(preds)
71
+
72
+ p = np.array(tuple(prediction_vector))
73
+ return p.reshape((p.shape[1],p.shape[0]))[:,self.estimator_index]
74
+
75
+ def __sklearn_is_fitted__(self):
76
+ return hasattr(self, "_is_fitted") and self._is_fitted
77
+
78
+ def edge_conformal_lines(data, alphas,threshold = 0.6, plot = False, look_back = 750, offset = 0.08):
79
+ ### corect labels ####
80
+ df = data.sort_values('Date').iloc[-look_back:]
81
+ fig = make_subplots(specs=[[{"secondary_y": True}]])
82
+ fig.add_trace(go.Scatter(x=df.Date, y=df.Close,mode='lines+markers',marker = dict(color = 'grey'),line = dict(color = 'grey'),name='Close price'))
83
+ fig.add_trace(go.Scatter(x=df.Date, y=df.proba_target_up,mode='lines',marker = dict(color = 'blue'),showlegend=True,legendgroup='go up', name='go up'),secondary_y=True)
84
+ fig.add_trace(go.Scatter(x=df.Date, y=df.proba_target_down,mode='lines',marker = dict(color = 'coral'),showlegend=True,legendgroup='go down',name='go down'),secondary_y=True)
85
+ for i,alpha in enumerate(alphas, start=1):
86
+ try:
87
+ col_alpha = [x for x in df.columns if str(alpha) in x and 'target_up' in x][0]
88
+ df_ = df[df[col_alpha] != 0]
89
+ fig.add_trace(go.Scatter(x=df_.Date, y=df_.proba_target_up + (offset*i),mode='markers',marker = dict(opacity=0.7,size=10, color = 'blue')
90
+ ,showlegend=False,legendgroup='go up',name='go up', text=df_[col_alpha],textposition="bottom center")
91
+ , secondary_y=True)
92
+ except:
93
+ pass
94
+ try:
95
+ col_alpha = [x for x in df.columns if str(alpha) in x and 'target_down' in x][0]
96
+ df_ = df[df[col_alpha] != 0]
97
+ fig.add_trace(go.Scatter(x=df_.Date, y=df_.proba_target_down + (offset*i),mode='markers',marker = dict(opacity=0.7,size=10, color = 'coral')
98
+ ,showlegend=False,legendgroup='go down', name='go down',text=df_[col_alpha].astype(str),textposition="bottom center")
99
+ , secondary_y=True)
100
+ except:
101
+ pass
102
+ fig.add_shape(type="line", xref="paper", yref="y2",x0=0.02, y0=threshold, x1=0.9, y1=threshold,line=dict(color="red",dash="dash"))
103
+ fig.update_layout(title_text="sirius - edge probabilities conformal",width=1200,height = 500)
104
+ if plot:
105
+ fig.show()
106
+ return fig