virgo-modules 0.0.72__py3-none-any.whl → 0.8.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- virgo_modules/src/aws_utils.py +35 -3
- virgo_modules/src/backtester.py +474 -0
- virgo_modules/src/edge_utils/__init__.py +0 -0
- virgo_modules/src/edge_utils/conformal_utils.py +106 -0
- virgo_modules/src/edge_utils/edge_utils.py +502 -0
- virgo_modules/src/edge_utils/feature_selection.py +66 -0
- virgo_modules/src/edge_utils/shap_utils.py +54 -0
- virgo_modules/src/edge_utils/stack_model.py +94 -0
- virgo_modules/src/hmm_utils.py +494 -0
- virgo_modules/src/market/__init__.py +0 -0
- virgo_modules/src/market/market_tools.py +189 -0
- virgo_modules/src/re_utils.py +628 -85
- virgo_modules/src/ticketer_source.py +1278 -1066
- virgo_modules/src/transformer_utils.py +401 -0
- {virgo_modules-0.0.72.dist-info → virgo_modules-0.8.4.dist-info}/METADATA +16 -22
- virgo_modules-0.8.4.dist-info/RECORD +22 -0
- {virgo_modules-0.0.72.dist-info → virgo_modules-0.8.4.dist-info}/WHEEL +1 -1
- virgo_modules/src/edge_utils.py +0 -178
- virgo_modules-0.0.72.dist-info/RECORD +0 -12
- {virgo_modules-0.0.72.dist-info → virgo_modules-0.8.4.dist-info/licenses}/LICENSE +0 -0
- {virgo_modules-0.0.72.dist-info → virgo_modules-0.8.4.dist-info}/top_level.txt +0 -0
virgo_modules/src/aws_utils.py
CHANGED
|
@@ -6,14 +6,36 @@ import pandas as pd
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
def upload_file_to_aws(bucket,key,input_path, aws_credentials):
|
|
9
|
-
|
|
9
|
+
'''
|
|
10
|
+
upload file from a folder to an s3 folder
|
|
11
|
+
|
|
12
|
+
Parameters:
|
|
13
|
+
bucket (str): bucket name
|
|
14
|
+
key (str): key pattern or folder in s3 e.g. path/to/upload/
|
|
15
|
+
input_path (str): input path of the file to upload e.g. path/to/upload.txt
|
|
16
|
+
aws_credentials (dict): aws credentials dictionary
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
None
|
|
20
|
+
'''
|
|
10
21
|
session = boto3.Session(aws_access_key_id=aws_credentials['AWS_ACCESS_KEY_ID'],aws_secret_access_key=aws_credentials['AWS_SECRET_ACCESS_KEY'])
|
|
11
22
|
bucket = aws_credentials[bucket]
|
|
12
23
|
s3 = session.resource('s3')
|
|
13
24
|
s3.meta.client.upload_file(Filename=input_path , Bucket=bucket, Key=key)
|
|
14
25
|
|
|
15
26
|
def upload_pandas_to_s3(data_frame,bucket,key, aws_credentials):
|
|
27
|
+
'''
|
|
28
|
+
upload dataframe as csv to an s3 folder
|
|
16
29
|
|
|
30
|
+
Parameters:
|
|
31
|
+
data_frame (pd.DataFrame): data
|
|
32
|
+
bucket (str): bucket name
|
|
33
|
+
key (str): key pattern or folder in s3 e.g. path/to/upload/
|
|
34
|
+
aws_credentials (dict): aws credentials dictionary
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
None
|
|
38
|
+
'''
|
|
17
39
|
csv_buffer = StringIO()
|
|
18
40
|
data_frame.to_csv(csv_buffer)
|
|
19
41
|
csv_buffer.seek(0)
|
|
@@ -23,7 +45,17 @@ def upload_pandas_to_s3(data_frame,bucket,key, aws_credentials):
|
|
|
23
45
|
s3.put_object(Bucket=bucket, Body=csv_buffer.getvalue(), Key= key)
|
|
24
46
|
|
|
25
47
|
def download_file_to_aws(bucket,key, aws_credentials):
|
|
26
|
-
|
|
48
|
+
'''
|
|
49
|
+
download csv file from s3 folder
|
|
50
|
+
|
|
51
|
+
Parameters:
|
|
52
|
+
bucket (str): bucket name
|
|
53
|
+
key (str): key pattern or folder in s3 e.g. path/to/download/file.csv
|
|
54
|
+
aws_credentials (dict): aws credentials dictionary
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
None
|
|
58
|
+
'''
|
|
27
59
|
s3c = boto3.client(
|
|
28
60
|
's3',
|
|
29
61
|
region_name = aws_credentials['AWS_DEFAULT_REGION'],
|
|
@@ -31,5 +63,5 @@ def download_file_to_aws(bucket,key, aws_credentials):
|
|
|
31
63
|
aws_secret_access_key = aws_credentials['AWS_SECRET_ACCESS_KEY']
|
|
32
64
|
)
|
|
33
65
|
obj = s3c.get_object(Bucket= bucket , Key = key)
|
|
34
|
-
df = pd.read_csv(BytesIO(obj['Body'].read()), encoding='utf8')
|
|
66
|
+
df = pd.read_csv(BytesIO(obj['Body'].read()), encoding='utf8', sep = ';')
|
|
35
67
|
return df
|
|
@@ -0,0 +1,474 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import numpy as np
|
|
3
|
+
import json
|
|
4
|
+
|
|
5
|
+
import matplotlib.pyplot as plt
|
|
6
|
+
import matplotlib.gridspec as gridspec
|
|
7
|
+
import seaborn as sns; sns.set()
|
|
8
|
+
|
|
9
|
+
import warnings
|
|
10
|
+
warnings.filterwarnings('ignore')
|
|
11
|
+
|
|
12
|
+
from .aws_utils import upload_file_to_aws
|
|
13
|
+
|
|
14
|
+
def sharpe_ratio(return_series):
|
|
15
|
+
|
|
16
|
+
'''
|
|
17
|
+
calculate sharpe ratio for given array.
|
|
18
|
+
|
|
19
|
+
Parameters:
|
|
20
|
+
return_series (pd.series): pandas series of the asset returns
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
sharpe (float): sharpe ratio
|
|
24
|
+
'''
|
|
25
|
+
|
|
26
|
+
N = 255 # Trading days in the year (change to 365 for crypto)
|
|
27
|
+
rf = 0.005 # Half a percent risk free rare
|
|
28
|
+
mean = return_series.mean() * N -rf
|
|
29
|
+
sigma = return_series.std() * np.sqrt(N)
|
|
30
|
+
sharpe = round(mean / sigma, 3)
|
|
31
|
+
return sharpe
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class SignalAnalyserObject:
|
|
35
|
+
"""
|
|
36
|
+
Class that produces back-tests analysis for a given feature
|
|
37
|
+
|
|
38
|
+
Attributes
|
|
39
|
+
----------
|
|
40
|
+
symbol_name : str
|
|
41
|
+
stock or asset to assess
|
|
42
|
+
feature_name : str
|
|
43
|
+
test_size: int
|
|
44
|
+
testing data size
|
|
45
|
+
show_plot: boolean
|
|
46
|
+
save_path: str
|
|
47
|
+
if available, save result locally
|
|
48
|
+
save_aws: str
|
|
49
|
+
if available, save result locally
|
|
50
|
+
aws_credentials: dict
|
|
51
|
+
signal_position: int
|
|
52
|
+
if available, signal position to open a position
|
|
53
|
+
df: pd.DataFrame
|
|
54
|
+
transformed data of the selected feature to perform back-test
|
|
55
|
+
median_return: float
|
|
56
|
+
median return after end low signals
|
|
57
|
+
|
|
58
|
+
Methods
|
|
59
|
+
-------
|
|
60
|
+
signal_analyser(days_list=list):
|
|
61
|
+
given a signal position for either botton or roof signal, calculate the espected return and distributions for a time scope in the days list (time horizons)
|
|
62
|
+
create_backtest_signal(days_strategy=int, high_exit=float, low_exit=float, open_in_list=list):
|
|
63
|
+
create a back-test analysis using the test data using some opening anc closing postion criterias
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
def __init__(self, data,symbol_name, feature_name, test_size, signal_position = False, correct_signals = False, show_plot = True, save_path = False, save_aws = False, aws_credentials = False, return_fig = False):
|
|
67
|
+
"""
|
|
68
|
+
Initialize object
|
|
69
|
+
|
|
70
|
+
Parameters
|
|
71
|
+
----------
|
|
72
|
+
data (pd.DataFrame): data
|
|
73
|
+
ticket_name (str): name of the asset
|
|
74
|
+
feature_name (str): name of the features
|
|
75
|
+
test_size (int): size of the test data
|
|
76
|
+
signal_position (int): signal position to open the position, False by default
|
|
77
|
+
correct_signals (int): clean abnormal signals using interpolation
|
|
78
|
+
show_plot (boolean): if true show plot for every method
|
|
79
|
+
save_path (str): if true, save results in file e.g r'C:/path/to/the/file/'
|
|
80
|
+
save_aws (str): if true, export results to remote repo e.g. 'path/to/file/'
|
|
81
|
+
aws_credentials (dict): credentials for aws
|
|
82
|
+
return_fig (boolean): if true, methods will return objects
|
|
83
|
+
|
|
84
|
+
Returns
|
|
85
|
+
-------
|
|
86
|
+
None
|
|
87
|
+
"""
|
|
88
|
+
self.ticket_name = symbol_name
|
|
89
|
+
self.feature_name=feature_name
|
|
90
|
+
self.test_size=test_size
|
|
91
|
+
self.show_plot = show_plot
|
|
92
|
+
self.save_path = save_path
|
|
93
|
+
self.save_aws = save_aws
|
|
94
|
+
self.aws_credentials = aws_credentials
|
|
95
|
+
self.return_fig = return_fig
|
|
96
|
+
self.signal_position = signal_position
|
|
97
|
+
## preprocessing
|
|
98
|
+
up_signal, low_signal= f'signal_up_{feature_name}', f'signal_low_{feature_name}'
|
|
99
|
+
features_base = ['Date', up_signal, low_signal, 'Close','Open','High','Low']
|
|
100
|
+
|
|
101
|
+
df = data[features_base].sort_values('Date')
|
|
102
|
+
|
|
103
|
+
df['signal_type'] = np.where(
|
|
104
|
+
df[up_signal] == 1,
|
|
105
|
+
'up',
|
|
106
|
+
np.where(
|
|
107
|
+
df[low_signal] == 1,
|
|
108
|
+
'down',
|
|
109
|
+
'no signal'
|
|
110
|
+
)
|
|
111
|
+
)
|
|
112
|
+
def correct_sygnals(df,correct_i = 1):
|
|
113
|
+
### signal cleaning
|
|
114
|
+
for i in range(1+correct_i, len(df)-1):
|
|
115
|
+
start_i, end_i = i-(correct_i+1), i+1
|
|
116
|
+
dfw = df.iloc[start_i: end_i,]
|
|
117
|
+
before_type = dfw.iloc[0].signal_type
|
|
118
|
+
after_type = dfw.iloc[-1].signal_type
|
|
119
|
+
window_types = dfw.iloc[1:-1].signal_type.unique()
|
|
120
|
+
n_window_type = len(window_types)
|
|
121
|
+
if n_window_type == 1:
|
|
122
|
+
if (before_type == after_type) and (window_types[0] != after_type):
|
|
123
|
+
df.iloc[start_i+1: end_i-1, df.columns.get_loc('signal_type')] = before_type
|
|
124
|
+
return df.copy()
|
|
125
|
+
|
|
126
|
+
if correct_signals:
|
|
127
|
+
for correct_i in range(1,correct_signals+1):
|
|
128
|
+
df = correct_sygnals(df,correct_i = correct_i)
|
|
129
|
+
df[up_signal] = np.where(df['signal_type'] == 'up', 1,0)
|
|
130
|
+
df[low_signal] = np.where(df['signal_type'] == 'down', 1,0)
|
|
131
|
+
|
|
132
|
+
## indexing chains
|
|
133
|
+
df['lag_signal_type'] = df['signal_type'].shift(1)
|
|
134
|
+
df['lag_Date'] = df['Date'].shift(1)
|
|
135
|
+
df['span'] = (pd.to_datetime(df['Date']) - pd.to_datetime(df['lag_Date'])).dt.days - 1
|
|
136
|
+
df['break'] = np.where((df['span'] > 3) & (df['lag_signal_type'] == df['signal_type']), 1, 0)
|
|
137
|
+
df['break'] = np.where((df['lag_signal_type'] != df['signal_type']), 1, df['break'])
|
|
138
|
+
df['chain_id'] = df.sort_values(['Date']).groupby(['break']).cumcount() + 1
|
|
139
|
+
df['chain_id'] = np.where(df['break'] == 1, df['chain_id'], np.nan )
|
|
140
|
+
df['chain_id'] = df['chain_id'].fillna(method = 'ffill')
|
|
141
|
+
|
|
142
|
+
df['internal_rn'] = df.sort_values(['Date']).groupby(['chain_id']).cumcount() + 1
|
|
143
|
+
df['inv_internal_rn'] = df.sort_values(['Date'],ascending = False).groupby(['chain_id']).cumcount() + 1
|
|
144
|
+
|
|
145
|
+
df['first_in_chain'] = np.where(df['internal_rn'] == 1, True, False)
|
|
146
|
+
df['last_in_chain'] = np.where(df['inv_internal_rn'] == 1, True, False)
|
|
147
|
+
|
|
148
|
+
df['span'] = (pd.to_datetime(df['Date']) - pd.to_datetime(df['lag_Date'])).dt.days - 1
|
|
149
|
+
self.df = df.drop(columns = ['span','break','lag_signal_type','lag_Date']).copy()
|
|
150
|
+
|
|
151
|
+
def signal_analyser(self, days_list):
|
|
152
|
+
"""
|
|
153
|
+
Initialize object
|
|
154
|
+
|
|
155
|
+
Parameters
|
|
156
|
+
----------
|
|
157
|
+
days_list (list): list of integers to calculate expected returns
|
|
158
|
+
|
|
159
|
+
Returns
|
|
160
|
+
-------
|
|
161
|
+
if returns_fig is true, returns a matplotlib fig
|
|
162
|
+
"""
|
|
163
|
+
signal_position = self.signal_position
|
|
164
|
+
df = self.df.iloc[0:-self.test_size,:].copy()
|
|
165
|
+
returns_list = list()
|
|
166
|
+
|
|
167
|
+
for days in days_list:
|
|
168
|
+
feature_ = f'return_{days}d'
|
|
169
|
+
df[feature_] = (df['Close'].shift(-days)/df['Close']-1)*100
|
|
170
|
+
returns_list.append(feature_)
|
|
171
|
+
|
|
172
|
+
df['open_long'] = np.where(df.last_in_chain == True, True, np.nan)
|
|
173
|
+
df['open_short'] = np.where(df.first_in_chain == True, True, np.nan)
|
|
174
|
+
df.signal_type = df.signal_type.map({'up':'go down', 'down': 'go up'})
|
|
175
|
+
|
|
176
|
+
# median return
|
|
177
|
+
returns_list = [f'return_{days}d' for days in days_list]
|
|
178
|
+
df_melt = df[df.open_long == True].pivot_table(index=['signal_type'], values=returns_list, aggfunc='median')
|
|
179
|
+
df_melt['median'] = df_melt[returns_list].median(axis = 1)
|
|
180
|
+
self.median_return = df_melt.loc['go up', 'median']
|
|
181
|
+
|
|
182
|
+
# plotting
|
|
183
|
+
fig, axs = plt.subplots(1, 4, figsize = (20,5))
|
|
184
|
+
palette ={"go down": "tomato", "go up": "lightblue"}
|
|
185
|
+
|
|
186
|
+
df2 = df[df.signal_type.isin(['go down','go up'])]
|
|
187
|
+
df2['lag_Date'] = df2['Date'].shift(1)
|
|
188
|
+
df2['lag_signal_type'] = df2['signal_type'].shift(1)
|
|
189
|
+
df2 = df2[df2.lag_signal_type != df2.signal_type]
|
|
190
|
+
df2['span'] = (pd.to_datetime(df2['Date']) - pd.to_datetime(df2['lag_Date'])).dt.days - 1
|
|
191
|
+
sns.violinplot(data=df2, y="span",ax = axs[0], color = 'lightblue', linewidth=0.7,inner="quart")
|
|
192
|
+
sns.stripplot(data=df2, y="span",ax = axs[0], jitter=True, zorder=1)
|
|
193
|
+
axs[0].set_title('span between last signals')
|
|
194
|
+
|
|
195
|
+
df_ = df[df.last_in_chain == True]
|
|
196
|
+
df_['part'] = '-'
|
|
197
|
+
sns.violinplot(data=df_, y="internal_rn", x='part', ax = axs[1], hue="signal_type", inner="quart",palette = palette,gap=0.1, split=True, linewidth=0.7)
|
|
198
|
+
axs[1].set_title('signal duration distribution')
|
|
199
|
+
|
|
200
|
+
if signal_position:
|
|
201
|
+
for feature in returns_list:
|
|
202
|
+
df[feature] = df[feature].shift(-signal_position)
|
|
203
|
+
|
|
204
|
+
df_melt = df[df.open_long == 1].melt(id_vars=['signal_type'], value_vars=returns_list, var_name='time', value_name='value')
|
|
205
|
+
df_melt = df_melt.dropna()
|
|
206
|
+
sns.violinplot(data=df_melt, x="time", y="value", hue="signal_type",ax = axs[2], split=True, gap=0.1, inner="quart",palette = palette, linewidth=0.8)
|
|
207
|
+
axs[2].axhline(y=0, color='grey', linestyle='--')
|
|
208
|
+
axs[2].set_title('E. returns - end of the signal')
|
|
209
|
+
|
|
210
|
+
df_melt = df[df.open_short == 1].melt(id_vars=['signal_type'], value_vars=returns_list, var_name='time', value_name='value')
|
|
211
|
+
df_melt = df_melt.dropna()
|
|
212
|
+
sns.violinplot(data=df_melt, x="time", y="value", hue="signal_type",ax = axs[3], split=True, gap=0.1, inner="quart",palette = palette, linewidth=0.8)
|
|
213
|
+
axs[3].axhline(y=0, color='grey', linestyle='--')
|
|
214
|
+
axs[3].set_title('E. returns - start of the signal')
|
|
215
|
+
|
|
216
|
+
if self.show_plot:
|
|
217
|
+
plt.show()
|
|
218
|
+
|
|
219
|
+
if self.save_path:
|
|
220
|
+
result_plot_name = f'signals_strategy_distribution_{self.feature_name}.png'
|
|
221
|
+
fig.savefig(self.save_path+result_plot_name)
|
|
222
|
+
# pickle.dump(axs, open(self.save_path+result_plot_name, 'wb'))
|
|
223
|
+
|
|
224
|
+
if self.save_path and self.save_aws:
|
|
225
|
+
# upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = f'market_plots/{self.ticket_name}/'+result_plot_name, input_path = self.save_path+result_plot_name)
|
|
226
|
+
upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = self.save_aws + result_plot_name, input_path = self.save_path + result_plot_name, aws_credentials = self.aws_credentials)
|
|
227
|
+
if not self.show_plot:
|
|
228
|
+
plt.close()
|
|
229
|
+
|
|
230
|
+
del df
|
|
231
|
+
|
|
232
|
+
if self.return_fig:
|
|
233
|
+
return fig
|
|
234
|
+
|
|
235
|
+
def create_backtest_signal(self,days_strategy, high_exit = False, low_exit = False, open_in_list = ['down']):
|
|
236
|
+
"""
|
|
237
|
+
Initialize object
|
|
238
|
+
|
|
239
|
+
Parameters
|
|
240
|
+
----------
|
|
241
|
+
days_strategy (int): position horizon
|
|
242
|
+
high_exit (float): max threshold to close position
|
|
243
|
+
low_exit (float): min threshold to close position, this parameter has to be positive
|
|
244
|
+
open_in_list (list): list of strings ("down","up") to assess signals
|
|
245
|
+
Returns
|
|
246
|
+
-------
|
|
247
|
+
if returns_fig is true, returns a matplotlib fig and list of dicts containing analysis
|
|
248
|
+
"""
|
|
249
|
+
asset_1 = 'Close'
|
|
250
|
+
up_signal, low_signal= f'signal_up_{self.feature_name}', f'signal_low_{self.feature_name}'
|
|
251
|
+
signal_position = self.signal_position
|
|
252
|
+
dft = self.df.iloc[-self.test_size:,:].reset_index(drop=True).copy()
|
|
253
|
+
|
|
254
|
+
dft['lrets_bench'] = np.log(dft[asset_1]/dft[asset_1].shift(1))
|
|
255
|
+
dft['bench_prod'] = dft['lrets_bench'].cumsum()
|
|
256
|
+
dft['bench_prod_exp'] = np.exp(dft['bench_prod']) - 1
|
|
257
|
+
|
|
258
|
+
map_ = {'down':'END LOW TREND', 'up': 'BEGINNING HIGH TREND'}
|
|
259
|
+
|
|
260
|
+
open_in_list_items = len(open_in_list)
|
|
261
|
+
fig, axs = plt.subplots(1,open_in_list_items, figsize = (7*open_in_list_items,6))
|
|
262
|
+
messages = list()
|
|
263
|
+
for i, open_in in enumerate(open_in_list):
|
|
264
|
+
axs_ = axs if open_in_list_items == 1 else axs[i]
|
|
265
|
+
if open_in == 'down':
|
|
266
|
+
dft['open_long'] = np.where((dft.last_in_chain == True) & (dft.signal_type == 'down'), True, np.nan) # open strat
|
|
267
|
+
elif open_in == 'up':
|
|
268
|
+
dft['open_long'] = np.where((dft.first_in_chain == True) & (dft.signal_type == 'up'), True, np.nan) # open strat
|
|
269
|
+
|
|
270
|
+
def chain_position(dft):
|
|
271
|
+
dft['open_long_id'] = np.where(dft['open_long'] == True, dft.chain_id, np.nan)
|
|
272
|
+
dft['open_long_id'] = dft['open_long_id'].fillna(method = 'ffill')
|
|
273
|
+
dft['open_long_rn'] = dft.sort_values(['Date']).groupby(['open_long_id']).cumcount() + 1
|
|
274
|
+
return dft
|
|
275
|
+
|
|
276
|
+
if signal_position:
|
|
277
|
+
dft['open_long'] = dft.sort_values(['Date'])['open_long'].shift(signal_position)
|
|
278
|
+
|
|
279
|
+
dft = chain_position(dft)
|
|
280
|
+
dft['flag'] = np.where(dft['open_long_rn'] < days_strategy, 1,0)
|
|
281
|
+
|
|
282
|
+
if high_exit and low_exit:
|
|
283
|
+
dft['open_strat'] = np.where(dft.open_long == True, dft.Open, np.nan) # open strat
|
|
284
|
+
dft['open_strat'] = dft['open_strat'].fillna(method = 'ffill')
|
|
285
|
+
dft['open_strat'] = np.where(dft.flag == 1, dft.open_strat, np.nan)
|
|
286
|
+
dft['high_strat_ret'] = (dft['High']/dft['open_strat']-1)*100
|
|
287
|
+
dft['low_strat_ret'] = (dft['Low']/dft['open_strat']-1)*100
|
|
288
|
+
dft['max_step_chain'] = dft.groupby(['open_long_id'])['open_long_rn'].transform('max')
|
|
289
|
+
dft['high_exit'] = np.where(((dft['high_strat_ret'] >= high_exit) | (dft['open_long_rn'] == days_strategy) | (dft['max_step_chain'] == dft['open_long_rn'])), 1, np.nan)
|
|
290
|
+
dft['low_exit'] = np.where((dft['low_strat_ret'] <= low_exit), -1, np.nan)
|
|
291
|
+
|
|
292
|
+
dft["exit_type"] = dft[["high_exit", "low_exit"]].max(axis=1)
|
|
293
|
+
dft['exit_type'] = np.where(dft["exit_type"] == 1, 1, np.where(dft["exit_type"] == -1,-1,np.nan))
|
|
294
|
+
dft['exit'] = np.where(dft['exit_type'].isnull(), np.nan, 1)
|
|
295
|
+
dft['exit_order'] = dft.sort_values(['Date']).groupby(['open_long_id','exit']).cumcount() + 1
|
|
296
|
+
dft['exit'] = np.where(dft['exit_order'] == 1, True, np.nan)
|
|
297
|
+
dft = dft.drop(columns = ['exit_order'])
|
|
298
|
+
## if last signal is near
|
|
299
|
+
max_id = dft.open_long_id.max()
|
|
300
|
+
dft['max_internal_rn'] = dft.sort_values(['Date']).groupby(['open_long_id']).open_long_rn.transform('max')
|
|
301
|
+
dft['exit'] = np.where((dft.open_long_id == max_id) & (dft.max_internal_rn < days_strategy) & (dft.max_internal_rn == dft.open_long_rn), 1, dft['exit'])
|
|
302
|
+
|
|
303
|
+
dft['exit_step'] = np.where(dft.exit == 1, dft.open_long_rn, np.nan)
|
|
304
|
+
dft['exit_step'] = dft.sort_values(['Date']).groupby(['open_long_id']).exit_step.transform('max')
|
|
305
|
+
|
|
306
|
+
dft['flag'] = np.where(dft.open_long_rn <= dft.exit_step, 1, 0)
|
|
307
|
+
|
|
308
|
+
dft['lrets_strat'] = np.log(dft[asset_1].shift(-1)/dft[asset_1]) * dft['flag']
|
|
309
|
+
dft['lrets_strat'] = np.where(dft['lrets_strat'].isna(),-0.0,dft['lrets_strat'])
|
|
310
|
+
dft['lrets_prod'] = dft['lrets_strat'].cumsum()
|
|
311
|
+
dft['strat_prod_exp'] = np.exp(dft['lrets_prod']) - 1
|
|
312
|
+
|
|
313
|
+
bench_rets = round(dft['bench_prod_exp'].values[-1]*100,1)
|
|
314
|
+
strat_rets = round(dft['strat_prod_exp'].values[-1]*100,1)
|
|
315
|
+
|
|
316
|
+
bench_sr = round(sharpe_ratio(dft.bench_prod_exp.dropna()),1)
|
|
317
|
+
strat_sr = round(sharpe_ratio(dft.strat_prod_exp.dropna()),1)
|
|
318
|
+
|
|
319
|
+
message1 = f'{bench_rets}%'
|
|
320
|
+
message2 = f'{strat_rets}%'
|
|
321
|
+
|
|
322
|
+
messages_ = {
|
|
323
|
+
'type strategy':map_[open_in],
|
|
324
|
+
'benchmark return:':message1,
|
|
325
|
+
'benchmark sharpe ratio:': bench_sr,
|
|
326
|
+
'strategy return:':message2,
|
|
327
|
+
'strategy sharpe ratio:': strat_sr,
|
|
328
|
+
}
|
|
329
|
+
messages.append(messages_)
|
|
330
|
+
if self.show_plot:
|
|
331
|
+
print('----------------------------')
|
|
332
|
+
print(messages_)
|
|
333
|
+
print('----------------------------')
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
axs_.plot(dft.bench_prod_exp.values, label = 'benchmark', color = 'steelblue')
|
|
337
|
+
axs_.scatter(range(len(dft)),np.where(dft[low_signal] == 1,dft.bench_prod_exp.values,np.nan),color = 'red', label = 'signal')
|
|
338
|
+
axs_.scatter(range(len(dft)),np.where(dft[up_signal] == 1,dft.bench_prod_exp.values,np.nan),color = 'green', label = 'signal')
|
|
339
|
+
axs_.plot(dft.strat_prod_exp.values, label = 'strategy', color = 'darksalmon')
|
|
340
|
+
axs_.set_xlabel("index")
|
|
341
|
+
axs_.set_ylabel("comulative return")
|
|
342
|
+
axs_.set_title(f'{map_[open_in]} strategy and cumulative returns')
|
|
343
|
+
axs_.legend()
|
|
344
|
+
|
|
345
|
+
if self.show_plot:
|
|
346
|
+
plt.plot()
|
|
347
|
+
|
|
348
|
+
if self.save_path:
|
|
349
|
+
result_json_name = f'signals_strategy_return_{self.feature_name}.json'
|
|
350
|
+
result_plot_name = f'signals_strategy_return_{self.feature_name}.png'
|
|
351
|
+
|
|
352
|
+
plt.savefig(self.save_path+result_plot_name)
|
|
353
|
+
|
|
354
|
+
with open(self.save_path+result_json_name, "w") as outfile:
|
|
355
|
+
json.dump(messages, outfile)
|
|
356
|
+
|
|
357
|
+
if self.save_path and self.save_aws:
|
|
358
|
+
|
|
359
|
+
upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = self.save_aws + result_json_name, input_path = self.save_path + result_json_name, aws_credentials = self.aws_credentials)
|
|
360
|
+
upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = self.save_aws + result_plot_name, input_path = self.save_path + result_plot_name, aws_credentials = self.aws_credentials)
|
|
361
|
+
|
|
362
|
+
if not self.show_plot:
|
|
363
|
+
plt.close()
|
|
364
|
+
|
|
365
|
+
del dft
|
|
366
|
+
|
|
367
|
+
if self.return_fig:
|
|
368
|
+
return fig, messages
|
|
369
|
+
|
|
370
|
+
class IterateSignalAnalyse(SignalAnalyserObject):
|
|
371
|
+
"""
|
|
372
|
+
object that is going to iterate backtest given a parameter space
|
|
373
|
+
|
|
374
|
+
Attributes
|
|
375
|
+
----------
|
|
376
|
+
test_data_size : int
|
|
377
|
+
feature_name : str
|
|
378
|
+
days_list: list
|
|
379
|
+
list of integers that serve as time horizons
|
|
380
|
+
arguments_to_test : dict
|
|
381
|
+
paramter space
|
|
382
|
+
method: str
|
|
383
|
+
method to use
|
|
384
|
+
object_stock: obj
|
|
385
|
+
object containing data and methods
|
|
386
|
+
plot: boolean
|
|
387
|
+
show summary plot of median results
|
|
388
|
+
best_result: float
|
|
389
|
+
index of the best result, the index corresponds to the parameter space
|
|
390
|
+
|
|
391
|
+
Methods
|
|
392
|
+
-------
|
|
393
|
+
execute(show_plot_iter=boolean):
|
|
394
|
+
display plots for every iteration
|
|
395
|
+
"""
|
|
396
|
+
def __init__(self, test_data_size, feature_name, days_list, arguments_to_test, method, object_stock, plot = False):
|
|
397
|
+
"""
|
|
398
|
+
Parameters
|
|
399
|
+
----------
|
|
400
|
+
test_data_size (int): size of the test data
|
|
401
|
+
feature_name (str): name of the feature
|
|
402
|
+
days_list (list): list of integers that serve as time horizons
|
|
403
|
+
arguments_to_test (dict): paramter space
|
|
404
|
+
method (str): method to use
|
|
405
|
+
object_stock (obj): object containing data and methods
|
|
406
|
+
plot (boolean): show summary plot of median results
|
|
407
|
+
|
|
408
|
+
Returns
|
|
409
|
+
-------
|
|
410
|
+
None
|
|
411
|
+
"""
|
|
412
|
+
self.test_data_size = test_data_size
|
|
413
|
+
self.feature_name = feature_name
|
|
414
|
+
self.days_list = days_list
|
|
415
|
+
self.arguments_to_test = arguments_to_test
|
|
416
|
+
self.method = method
|
|
417
|
+
self.plot = plot
|
|
418
|
+
self.object_stock = object_stock
|
|
419
|
+
|
|
420
|
+
def execute(self,show_plot_iter = False):
|
|
421
|
+
"""
|
|
422
|
+
Iterate backtest and compute median result for every iteration
|
|
423
|
+
|
|
424
|
+
Parameters
|
|
425
|
+
----------
|
|
426
|
+
show_plot_iter (boolean): display plots for every iteration
|
|
427
|
+
|
|
428
|
+
Returns
|
|
429
|
+
-------
|
|
430
|
+
None
|
|
431
|
+
"""
|
|
432
|
+
results = list()
|
|
433
|
+
for key in self.arguments_to_test.keys():
|
|
434
|
+
configuration = self.arguments_to_test.get(key)
|
|
435
|
+
getattr(self.object_stock, self.method)(**configuration)
|
|
436
|
+
signal_assess = SignalAnalyserObject(self.object_stock.df, self.object_stock.stock_code, show_plot = show_plot_iter, test_size = self.test_data_size, feature_name = self.feature_name)
|
|
437
|
+
signal_assess.signal_analyser(days_list = self.days_list)
|
|
438
|
+
mean_median_return = signal_assess.median_return
|
|
439
|
+
results.append(mean_median_return)
|
|
440
|
+
|
|
441
|
+
df_result = pd.DataFrame({'keys':self.arguments_to_test.keys(),'results':results})
|
|
442
|
+
if self.plot:
|
|
443
|
+
plt.plot(df_result['keys'], df_result['results'])
|
|
444
|
+
plt.scatter(df_result['keys'], df_result['results'])
|
|
445
|
+
plt.title('simulation between configurations')
|
|
446
|
+
plt.ylabel('median expected return')
|
|
447
|
+
plt.show()
|
|
448
|
+
|
|
449
|
+
best_result = df_result.sort_values('results',ascending = False)['keys'].values[0]
|
|
450
|
+
self.best_result = best_result
|
|
451
|
+
|
|
452
|
+
def execute_signal_analyser(test_data_size, feature_name, days_list, configuration, method, object_stock, analyser_object, plot = False, backtest= False, exit_params = {}):
|
|
453
|
+
'''
|
|
454
|
+
code snippet that is going run backtest and display analysis messages and plots
|
|
455
|
+
|
|
456
|
+
Parameters:
|
|
457
|
+
test_data_size (int): test data size
|
|
458
|
+
feature_name (str): name of the feature to assess
|
|
459
|
+
days_list (list): tome scope to assess the returns
|
|
460
|
+
configuration (dict): parameters of the method to run
|
|
461
|
+
object_stock (obj): object with data to assess
|
|
462
|
+
method (str): method to use
|
|
463
|
+
analyser_object (obj): signal_analyser object
|
|
464
|
+
plot (boolean): if true, plot results
|
|
465
|
+
backtest (boolean): if true, run backtest
|
|
466
|
+
exit_params (dict): parameters of exit returns
|
|
467
|
+
|
|
468
|
+
Returns:
|
|
469
|
+
None
|
|
470
|
+
'''
|
|
471
|
+
getattr(object_stock, method)(**configuration)
|
|
472
|
+
signal_assess = analyser_object(object_stock.df,object_stock.stock_code,show_plot = plot, feature_name = feature_name, test_size = test_data_size)
|
|
473
|
+
signal_assess.signal_analyser(days_list = days_list)
|
|
474
|
+
signal_assess.create_backtest_signal(backtest, open_in_list = ['down','up'], **exit_params )
|
|
File without changes
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
from plotly.subplots import make_subplots
|
|
2
|
+
import plotly.graph_objects as go
|
|
3
|
+
from sklearn.pipeline import Pipeline
|
|
4
|
+
import mlflow
|
|
5
|
+
import pandas as pd
|
|
6
|
+
import numpy as np
|
|
7
|
+
from sklearn.base import BaseEstimator, ClassifierMixin
|
|
8
|
+
from mapie.classification import SplitConformalClassifier
|
|
9
|
+
|
|
10
|
+
class ConformalStack(mlflow.pyfunc.PythonModel):
|
|
11
|
+
def __init__(self, model,targets, alphas):
|
|
12
|
+
self.model = model
|
|
13
|
+
self.targets = targets
|
|
14
|
+
self.alphas = alphas
|
|
15
|
+
def fit(self, data):
|
|
16
|
+
self.classifiers = dict()
|
|
17
|
+
for i,target in enumerate(self.targets):
|
|
18
|
+
st = SingleStack(self.model["model"],i)
|
|
19
|
+
st.fit()
|
|
20
|
+
seg_model = Pipeline([
|
|
21
|
+
('pipe',self.model['pipe_transform']),
|
|
22
|
+
('modelbase',st)
|
|
23
|
+
])
|
|
24
|
+
mapie_class = SplitConformalClassifier(seg_model, prefit=True, random_state=123, conformity_score="lac", confidence_level=1-np.array(self.alphas))
|
|
25
|
+
mapie_class.conformalize(data, data[self.targets[i]].values)
|
|
26
|
+
self.classifiers[target] = mapie_class
|
|
27
|
+
def predict_conformal(self, data, ):
|
|
28
|
+
for target in self.targets:
|
|
29
|
+
prefix = target+"_conf"
|
|
30
|
+
_, y_pis = self.classifiers[target].predict_set(data)
|
|
31
|
+
for i,alpha in enumerate(self.alphas):
|
|
32
|
+
data[f'{prefix}-{alpha}'] = y_pis[:,1,i]
|
|
33
|
+
data[f'{prefix}-{alpha}'] = np.where(data[f'{prefix}-{alpha}'] == True,alpha,0)
|
|
34
|
+
return data
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class SingleStack(ClassifierMixin, BaseEstimator):
|
|
38
|
+
def __init__(self, model, estimator_index):
|
|
39
|
+
self.model = model
|
|
40
|
+
self.estimator_index = estimator_index
|
|
41
|
+
|
|
42
|
+
def fit(self):
|
|
43
|
+
self._is_fitted = True
|
|
44
|
+
self.classes_ = [0,1]
|
|
45
|
+
|
|
46
|
+
def predict_proba(self, X):
|
|
47
|
+
metas_pred = dict()
|
|
48
|
+
for i,cont in enumerate(self.model.estimators, start=1):
|
|
49
|
+
_,estimator = cont
|
|
50
|
+
meta_pred = estimator.predict_proba(X)
|
|
51
|
+
metas_pred[f"meta{i}0"] = meta_pred[0][:,1]
|
|
52
|
+
metas_pred[f"meta{i}1"] = meta_pred[1][:,1]
|
|
53
|
+
self.meta_preds_df__ = pd.DataFrame(metas_pred)
|
|
54
|
+
|
|
55
|
+
prediction_vector = list()
|
|
56
|
+
for i,cont in enumerate(self.model.meta_estimators, start=0):
|
|
57
|
+
_,estimator = cont
|
|
58
|
+
metacols = [f"meta{j}{i}" for j in range(1,len(self.model.estimators)+1)]
|
|
59
|
+
preds = estimator.predict_proba(self.meta_preds_df__[metacols].values)
|
|
60
|
+
prediction_vector.append(preds)
|
|
61
|
+
return prediction_vector[self.estimator_index]
|
|
62
|
+
|
|
63
|
+
def predict(self, X):
|
|
64
|
+
prediction_vector = list()
|
|
65
|
+
_ = self.predict_proba(X)
|
|
66
|
+
for i,cont in enumerate(self.model.meta_estimators, start=0):
|
|
67
|
+
_,estimator = cont
|
|
68
|
+
metacols = [f"meta{j}{i}" for j in range(1,len(self.model.estimators)+1)]
|
|
69
|
+
preds = estimator.predict(self.meta_preds_df__[metacols].values)
|
|
70
|
+
prediction_vector.append(preds)
|
|
71
|
+
|
|
72
|
+
p = np.array(tuple(prediction_vector))
|
|
73
|
+
return p.reshape((p.shape[1],p.shape[0]))[:,self.estimator_index]
|
|
74
|
+
|
|
75
|
+
def __sklearn_is_fitted__(self):
|
|
76
|
+
return hasattr(self, "_is_fitted") and self._is_fitted
|
|
77
|
+
|
|
78
|
+
def edge_conformal_lines(data, alphas,threshold = 0.6, plot = False, look_back = 750, offset = 0.08):
|
|
79
|
+
### corect labels ####
|
|
80
|
+
df = data.sort_values('Date').iloc[-look_back:]
|
|
81
|
+
fig = make_subplots(specs=[[{"secondary_y": True}]])
|
|
82
|
+
fig.add_trace(go.Scatter(x=df.Date, y=df.Close,mode='lines+markers',marker = dict(color = 'grey'),line = dict(color = 'grey'),name='Close price'))
|
|
83
|
+
fig.add_trace(go.Scatter(x=df.Date, y=df.proba_target_up,mode='lines',marker = dict(color = 'blue'),showlegend=True,legendgroup='go up', name='go up'),secondary_y=True)
|
|
84
|
+
fig.add_trace(go.Scatter(x=df.Date, y=df.proba_target_down,mode='lines',marker = dict(color = 'coral'),showlegend=True,legendgroup='go down',name='go down'),secondary_y=True)
|
|
85
|
+
for i,alpha in enumerate(alphas, start=1):
|
|
86
|
+
try:
|
|
87
|
+
col_alpha = [x for x in df.columns if str(alpha) in x and 'target_up' in x][0]
|
|
88
|
+
df_ = df[df[col_alpha] != 0]
|
|
89
|
+
fig.add_trace(go.Scatter(x=df_.Date, y=df_.proba_target_up + (offset*i),mode='markers',marker = dict(opacity=0.7,size=10, color = 'blue')
|
|
90
|
+
,showlegend=False,legendgroup='go up',name='go up', text=df_[col_alpha],textposition="bottom center")
|
|
91
|
+
, secondary_y=True)
|
|
92
|
+
except:
|
|
93
|
+
pass
|
|
94
|
+
try:
|
|
95
|
+
col_alpha = [x for x in df.columns if str(alpha) in x and 'target_down' in x][0]
|
|
96
|
+
df_ = df[df[col_alpha] != 0]
|
|
97
|
+
fig.add_trace(go.Scatter(x=df_.Date, y=df_.proba_target_down + (offset*i),mode='markers',marker = dict(opacity=0.7,size=10, color = 'coral')
|
|
98
|
+
,showlegend=False,legendgroup='go down', name='go down',text=df_[col_alpha].astype(str),textposition="bottom center")
|
|
99
|
+
, secondary_y=True)
|
|
100
|
+
except:
|
|
101
|
+
pass
|
|
102
|
+
fig.add_shape(type="line", xref="paper", yref="y2",x0=0.02, y0=threshold, x1=0.9, y1=threshold,line=dict(color="red",dash="dash"))
|
|
103
|
+
fig.update_layout(title_text="sirius - edge probabilities conformal",width=1200,height = 500)
|
|
104
|
+
if plot:
|
|
105
|
+
fig.show()
|
|
106
|
+
return fig
|