virgo-modules 0.1.0__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of virgo-modules might be problematic. Click here for more details.
- {virgo_modules-0.1.0 → virgo_modules-0.1.2}/PKG-INFO +1 -1
- {virgo_modules-0.1.0 → virgo_modules-0.1.2}/setup.py +1 -1
- {virgo_modules-0.1.0 → virgo_modules-0.1.2}/virgo_app/virgo_modules/src/backtester.py +119 -5
- {virgo_modules-0.1.0 → virgo_modules-0.1.2}/virgo_app/virgo_modules/src/ticketer_source.py +0 -448
- {virgo_modules-0.1.0 → virgo_modules-0.1.2}/virgo_app/virgo_modules.egg-info/PKG-INFO +1 -1
- {virgo_modules-0.1.0 → virgo_modules-0.1.2}/LICENSE +0 -0
- {virgo_modules-0.1.0 → virgo_modules-0.1.2}/README.md +0 -0
- {virgo_modules-0.1.0 → virgo_modules-0.1.2}/setup.cfg +0 -0
- {virgo_modules-0.1.0 → virgo_modules-0.1.2}/virgo_app/virgo_modules/__init__.py +0 -0
- {virgo_modules-0.1.0 → virgo_modules-0.1.2}/virgo_app/virgo_modules/src/__init__.py +0 -0
- {virgo_modules-0.1.0 → virgo_modules-0.1.2}/virgo_app/virgo_modules/src/aws_utils.py +0 -0
- {virgo_modules-0.1.0 → virgo_modules-0.1.2}/virgo_app/virgo_modules/src/edge_utils.py +0 -0
- {virgo_modules-0.1.0 → virgo_modules-0.1.2}/virgo_app/virgo_modules/src/pull_artifacts.py +0 -0
- {virgo_modules-0.1.0 → virgo_modules-0.1.2}/virgo_app/virgo_modules/src/re_utils.py +0 -0
- {virgo_modules-0.1.0 → virgo_modules-0.1.2}/virgo_app/virgo_modules.egg-info/SOURCES.txt +0 -0
- {virgo_modules-0.1.0 → virgo_modules-0.1.2}/virgo_app/virgo_modules.egg-info/dependency_links.txt +0 -0
- {virgo_modules-0.1.0 → virgo_modules-0.1.2}/virgo_app/virgo_modules.egg-info/requires.txt +0 -0
- {virgo_modules-0.1.0 → virgo_modules-0.1.2}/virgo_app/virgo_modules.egg-info/top_level.txt +0 -0
|
@@ -5,7 +5,7 @@ with open("virgo_app/README.md", "r") as f:
|
|
|
5
5
|
|
|
6
6
|
setup(
|
|
7
7
|
name="virgo_modules",
|
|
8
|
-
version="0.1.
|
|
8
|
+
version="0.1.2",
|
|
9
9
|
description="data processing and statistical modeling using stock market data",
|
|
10
10
|
package_dir={"": "virgo_app"},
|
|
11
11
|
packages=find_packages(where="virgo_app"),
|
|
@@ -50,9 +50,11 @@ class SignalAnalyserObject:
|
|
|
50
50
|
aws_credentials: dict
|
|
51
51
|
signal_position: int
|
|
52
52
|
if available, signal position to open a position
|
|
53
|
-
df: pd.DataFrame
|
|
53
|
+
df: pd.DataFrame
|
|
54
54
|
transformed data of the selected feature to perform back-test
|
|
55
|
-
|
|
55
|
+
median_return: float
|
|
56
|
+
median return after end low signals
|
|
57
|
+
|
|
56
58
|
Methods
|
|
57
59
|
-------
|
|
58
60
|
signal_analyser(days_list=list):
|
|
@@ -169,12 +171,19 @@ class SignalAnalyserObject:
|
|
|
169
171
|
|
|
170
172
|
df['open_long'] = np.where(df.last_in_chain == True, True, np.nan)
|
|
171
173
|
df['open_short'] = np.where(df.first_in_chain == True, True, np.nan)
|
|
174
|
+
df.signal_type = df.signal_type.map({'up':'go down', 'down': 'go up'})
|
|
175
|
+
|
|
176
|
+
# median return
|
|
177
|
+
returns_list = [f'return_{days}d' for days in days_list]
|
|
178
|
+
df_melt = df[df.open_long == True].pivot_table(index=['signal_type'], values=returns_list, aggfunc='median')
|
|
179
|
+
df_melt['median'] = df_melt[returns_list].median(axis = 1)
|
|
180
|
+
self.median_return = df_melt.loc['go up', 'median']
|
|
172
181
|
|
|
173
182
|
# plotting
|
|
174
183
|
fig, axs = plt.subplots(1, 4, figsize = (20,5))
|
|
175
184
|
palette ={"go down": "tomato", "go up": "lightblue"}
|
|
176
185
|
|
|
177
|
-
df2 = df[df.signal_type.isin(['
|
|
186
|
+
df2 = df[df.signal_type.isin(['go down','go up'])]
|
|
178
187
|
df2['lag_Date'] = df2['Date'].shift(1)
|
|
179
188
|
df2['lag_signal_type'] = df2['signal_type'].shift(1)
|
|
180
189
|
df2 = df2[df2.lag_signal_type != df2.signal_type]
|
|
@@ -183,7 +192,6 @@ class SignalAnalyserObject:
|
|
|
183
192
|
sns.stripplot(data=df2, y="span",ax = axs[0], jitter=True, zorder=1)
|
|
184
193
|
axs[0].set_title('span between last signals')
|
|
185
194
|
|
|
186
|
-
df.signal_type = df.signal_type.map({'up':'go down', 'down': 'go up'})
|
|
187
195
|
df_ = df[df.last_in_chain == True]
|
|
188
196
|
df_['part'] = '-'
|
|
189
197
|
sns.violinplot(data=df_, y="internal_rn", x='part', ax = axs[1], hue="signal_type", inner="quart",palette = palette,gap=0.1, split=True, linewidth=0.7)
|
|
@@ -357,4 +365,110 @@ class SignalAnalyserObject:
|
|
|
357
365
|
del dft
|
|
358
366
|
|
|
359
367
|
if self.return_fig:
|
|
360
|
-
return fig, messages
|
|
368
|
+
return fig, messages
|
|
369
|
+
|
|
370
|
+
class IterateSignalAnalyse(SignalAnalyserObject):
|
|
371
|
+
"""
|
|
372
|
+
object that is going to iterate backtest given a parameter space
|
|
373
|
+
|
|
374
|
+
Attributes
|
|
375
|
+
----------
|
|
376
|
+
test_data_size : int
|
|
377
|
+
feature_name : str
|
|
378
|
+
days_list: list
|
|
379
|
+
list of integers that serve as time horizons
|
|
380
|
+
arguments_to_test : dict
|
|
381
|
+
paramter space
|
|
382
|
+
method: str
|
|
383
|
+
method to use
|
|
384
|
+
object_stock: obj
|
|
385
|
+
object containing data and methods
|
|
386
|
+
plot: boolean
|
|
387
|
+
show summary plot of median results
|
|
388
|
+
best_result: float
|
|
389
|
+
index of the best result, the index corresponds to the parameter space
|
|
390
|
+
|
|
391
|
+
Methods
|
|
392
|
+
-------
|
|
393
|
+
execute(show_plot_iter=boolean):
|
|
394
|
+
display plots for every iteration
|
|
395
|
+
"""
|
|
396
|
+
def __init__(self, test_data_size, feature_name, days_list, arguments_to_test, method, object_stock, plot = False):
|
|
397
|
+
"""
|
|
398
|
+
Parameters
|
|
399
|
+
----------
|
|
400
|
+
test_data_size (int): size of the test data
|
|
401
|
+
feature_name (str): name of the feature
|
|
402
|
+
days_list (list): list of integers that serve as time horizons
|
|
403
|
+
arguments_to_test (dict): paramter space
|
|
404
|
+
method (str): method to use
|
|
405
|
+
object_stock (obj): object containing data and methods
|
|
406
|
+
plot (boolean): show summary plot of median results
|
|
407
|
+
|
|
408
|
+
Returns
|
|
409
|
+
-------
|
|
410
|
+
None
|
|
411
|
+
"""
|
|
412
|
+
self.test_data_size = test_data_size
|
|
413
|
+
self.feature_name = feature_name
|
|
414
|
+
self.days_list = days_list
|
|
415
|
+
self.arguments_to_test = arguments_to_test
|
|
416
|
+
self.method = method
|
|
417
|
+
self.plot = plot
|
|
418
|
+
self.object_stock = object_stock
|
|
419
|
+
|
|
420
|
+
def execute(self,show_plot_iter = False):
|
|
421
|
+
"""
|
|
422
|
+
Iterate backtest and compute median result for every iteration
|
|
423
|
+
|
|
424
|
+
Parameters
|
|
425
|
+
----------
|
|
426
|
+
show_plot_iter (boolean): display plots for every iteration
|
|
427
|
+
|
|
428
|
+
Returns
|
|
429
|
+
-------
|
|
430
|
+
None
|
|
431
|
+
"""
|
|
432
|
+
results = list()
|
|
433
|
+
for key in self.arguments_to_test.keys():
|
|
434
|
+
configuration = self.arguments_to_test.get(key)
|
|
435
|
+
getattr(self.object_stock, self.method)(**configuration)
|
|
436
|
+
signal_assess = SignalAnalyserObject(self.object_stock.df, self.object_stock.stock_code, show_plot = show_plot_iter, test_size = self.test_data_size, feature_name = self.feature_name)
|
|
437
|
+
signal_assess.signal_analyser(days_list = self.days_list)
|
|
438
|
+
mean_median_return = signal_assess.median_return
|
|
439
|
+
results.append(mean_median_return)
|
|
440
|
+
|
|
441
|
+
df_result = pd.DataFrame({'keys':self.arguments_to_test.keys(),'results':results})
|
|
442
|
+
if self.plot:
|
|
443
|
+
plt.plot(df_result['keys'], df_result['results'])
|
|
444
|
+
plt.scatter(df_result['keys'], df_result['results'])
|
|
445
|
+
plt.title('simulation between configurations')
|
|
446
|
+
plt.ylabel('median expected return')
|
|
447
|
+
plt.show()
|
|
448
|
+
|
|
449
|
+
best_result = df_result.sort_values('results',ascending = False)['keys'].values[0]
|
|
450
|
+
self.best_result = best_result
|
|
451
|
+
|
|
452
|
+
def execute_signal_analyser(test_data_size, feature_name, days_list, configuration, method, object_stock, analyser_object, plot = False, backtest= False, exit_params = {}):
|
|
453
|
+
'''
|
|
454
|
+
code snippet that is going run backtest and display analysis messages and plots
|
|
455
|
+
|
|
456
|
+
Parameters:
|
|
457
|
+
test_data_size (int): test data size
|
|
458
|
+
feature_name (str): name of the feature to assess
|
|
459
|
+
days_list (list): tome scope to assess the returns
|
|
460
|
+
configuration (dict): parameters of the method to run
|
|
461
|
+
object_stock (obj): object with data to assess
|
|
462
|
+
method (str): method to use
|
|
463
|
+
analyser_object (obj): signal_analyser object
|
|
464
|
+
plot (boolean): if true, plot results
|
|
465
|
+
backtest (boolean): if true, run backtest
|
|
466
|
+
exit_params (dict): parameters of exit returns
|
|
467
|
+
|
|
468
|
+
Returns:
|
|
469
|
+
None
|
|
470
|
+
'''
|
|
471
|
+
getattr(object_stock, method)(**configuration)
|
|
472
|
+
signal_assess = analyser_object(object_stock.df,object_stock.stock_code,show_plot = plot, feature_name = feature_name, test_size = test_data_size)
|
|
473
|
+
signal_assess.signal_analyser(days_list = days_list)
|
|
474
|
+
signal_assess.create_backtest_signal(backtest, open_in_list = ['down','up'], **exit_params )
|
|
@@ -2790,68 +2790,6 @@ class hmm_feature_selector():
|
|
|
2790
2790
|
self.feature_results = feature_results
|
|
2791
2791
|
self.best_features = pd.DataFrame(self.feature_results).T.sort_values('mean relevance').iloc[-1,:].features
|
|
2792
2792
|
|
|
2793
|
-
def execute_signal_analyser(test_data_size, feature_name, days_list, configuration, method, object_stock, signal_analyser_object, plot = False, backtest= False, exit_params = {}):
|
|
2794
|
-
'''
|
|
2795
|
-
code snippet that is going run some objects. The analysis is signal analyse which is backtesting
|
|
2796
|
-
|
|
2797
|
-
Parameters:
|
|
2798
|
-
test_data_size (int): test data size
|
|
2799
|
-
feature_name (str): name of the feature to assess
|
|
2800
|
-
days_list (list): tome scope to assess the returns
|
|
2801
|
-
configuration (dict): parameters of the method to run
|
|
2802
|
-
object_stock (obj): object with data to assess
|
|
2803
|
-
signal_analyser_object (obj): signal_analyser object
|
|
2804
|
-
plot (boolean): if true, plot results
|
|
2805
|
-
backtest (boolean): if true, run backtest
|
|
2806
|
-
exit_params (dict): parameters of exit returns
|
|
2807
|
-
|
|
2808
|
-
Returns:
|
|
2809
|
-
mean_median_return (float): median return of the backtests
|
|
2810
|
-
'''
|
|
2811
|
-
method(**configuration)
|
|
2812
|
-
signal_assess = signal_analyser_object(object_stock.df,object_stock.stock_code,show_plot = plot)
|
|
2813
|
-
signal_assess.signal_analyser(test_size = test_data_size, feature_name = feature_name, days_list = days_list, threshold = 1)
|
|
2814
|
-
|
|
2815
|
-
if backtest:
|
|
2816
|
-
print('-----------------------back test ---------------------------')
|
|
2817
|
-
signal_assess.create_backtest_signal(backtest, test_data_size, feature_name, **exit_params )
|
|
2818
|
-
|
|
2819
|
-
return signal_assess.mean_median_return
|
|
2820
|
-
|
|
2821
|
-
def iterate_signal_analyser(test_data_size,feature_name, days_list, arguments_to_test, method, object_stock, signal_analyser_object, plot = True):
|
|
2822
|
-
'''
|
|
2823
|
-
code snippet is going to iterate signal analyser
|
|
2824
|
-
|
|
2825
|
-
Parameters:
|
|
2826
|
-
test_data_size (int): test data size
|
|
2827
|
-
feature_name (str): name of the feature to assess
|
|
2828
|
-
days_list (list): tome scope to assess the returns
|
|
2829
|
-
arguments_to_test: parameters to test
|
|
2830
|
-
method: methods to run
|
|
2831
|
-
object_stock (obj): object with data to assess
|
|
2832
|
-
signal_analyser_object (obj): signal_analyser object
|
|
2833
|
-
plot (boolean): if true, plot results
|
|
2834
|
-
|
|
2835
|
-
Returns:
|
|
2836
|
-
best_result (int): index from the arguments_to_test with the best result
|
|
2837
|
-
'''
|
|
2838
|
-
results = list()
|
|
2839
|
-
for key in arguments_to_test.keys():
|
|
2840
|
-
configuration = arguments_to_test.get(key)
|
|
2841
|
-
mean_median_return = execute_signal_analyser(test_data_size, feature_name, days_list, configuration, method, object_stock, signal_analyser_object)
|
|
2842
|
-
results.append(mean_median_return)
|
|
2843
|
-
|
|
2844
|
-
df_result = pd.DataFrame({'keys':arguments_to_test.keys(),'results':results})
|
|
2845
|
-
if plot:
|
|
2846
|
-
plt.plot(df_result['keys'], df_result['results'])
|
|
2847
|
-
plt.scatter(df_result['keys'], df_result['results'])
|
|
2848
|
-
plt.title('simulation between configurations')
|
|
2849
|
-
plt.ylabel('median expected return')
|
|
2850
|
-
plt.show()
|
|
2851
|
-
|
|
2852
|
-
best_result = df_result.sort_values('results',ascending = False)['keys'].values[0]
|
|
2853
|
-
return best_result
|
|
2854
|
-
|
|
2855
2793
|
class analyse_index(stock_eda_panel):
|
|
2856
2794
|
"""
|
|
2857
2795
|
class that is going to train hmm models to perform feature selection
|
|
@@ -3118,389 +3056,3 @@ def get_relevant_beta(data_market, ticket_name, show_plot = True, save_path = F
|
|
|
3118
3056
|
# upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = f'market_plots/{ticket_name}/'+result_plot_name,input_path = save_path+result_plot_name)
|
|
3119
3057
|
upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = save_aws + result_plot_name, input_path = save_path + result_plot_name, aws_credentials = aws_credentials)
|
|
3120
3058
|
return selection
|
|
3121
|
-
|
|
3122
|
-
#### to deprecate
|
|
3123
|
-
|
|
3124
|
-
def sharpe_ratio(return_series):
|
|
3125
|
-
|
|
3126
|
-
'''
|
|
3127
|
-
calculate sharpe ratio for given array.
|
|
3128
|
-
|
|
3129
|
-
Parameters:
|
|
3130
|
-
return_series (pd.series): pandas series of the asset returns
|
|
3131
|
-
|
|
3132
|
-
Returns:
|
|
3133
|
-
sharpe (float): sharpe ratio
|
|
3134
|
-
'''
|
|
3135
|
-
|
|
3136
|
-
N = 255 # Trading days in the year (change to 365 for crypto)
|
|
3137
|
-
rf = 0.005 # Half a percent risk free rare
|
|
3138
|
-
mean = return_series.mean() * N -rf
|
|
3139
|
-
sigma = return_series.std() * np.sqrt(N)
|
|
3140
|
-
sharpe = round(mean / sigma, 3)
|
|
3141
|
-
return sharpe
|
|
3142
|
-
|
|
3143
|
-
class signal_analyser_object:
|
|
3144
|
-
"""
|
|
3145
|
-
class that is going to analyse signals
|
|
3146
|
-
|
|
3147
|
-
Attributes
|
|
3148
|
-
----------
|
|
3149
|
-
data : pd.DataFrame
|
|
3150
|
-
symbol of the asset
|
|
3151
|
-
ticket_name :str
|
|
3152
|
-
asset symbol
|
|
3153
|
-
show_plot : boolean
|
|
3154
|
-
if true show plot for every method
|
|
3155
|
-
save_path : str
|
|
3156
|
-
if true, save results in file
|
|
3157
|
-
save_aws : str
|
|
3158
|
-
if true, export results to remote repo
|
|
3159
|
-
aws_credentials : dict
|
|
3160
|
-
credentials for aws
|
|
3161
|
-
return_fig : boolean
|
|
3162
|
-
if true, methods will return objects
|
|
3163
|
-
create_backtest_signal(days_strategy=list, test_size=int, feature_name=str, high_exit=float, low_exit=float):
|
|
3164
|
-
perform backtest signal analysis
|
|
3165
|
-
|
|
3166
|
-
Methods
|
|
3167
|
-
-------
|
|
3168
|
-
signal_analyser(test_size=int, feature_name=str, days_list=list, threshold=float,verbose=boolean, signal_position=boolean):
|
|
3169
|
-
perform signal analysis and feature extraction
|
|
3170
|
-
|
|
3171
|
-
"""
|
|
3172
|
-
|
|
3173
|
-
def __init__(self, data,symbol_name, show_plot = True, save_path = False, save_aws = False, aws_credentials = False, return_fig = False):
|
|
3174
|
-
"""
|
|
3175
|
-
Initialize object
|
|
3176
|
-
|
|
3177
|
-
Parameters
|
|
3178
|
-
----------
|
|
3179
|
-
data (pd.DataFrame): data
|
|
3180
|
-
ticket_name (str): name of the asset
|
|
3181
|
-
show_plot (boolean): if true show plot for every method
|
|
3182
|
-
save_path (str): if true, save results in file e.g r'C:/path/to/the/file/'
|
|
3183
|
-
save_aws (str): if true, export results to remote repo e.g. 'path/to/file/'
|
|
3184
|
-
aws_credentials (dict): credentials for aws
|
|
3185
|
-
return_fig (boolean): if true, methods will return objects
|
|
3186
|
-
|
|
3187
|
-
Returns
|
|
3188
|
-
-------
|
|
3189
|
-
None
|
|
3190
|
-
"""
|
|
3191
|
-
self.data = data.copy()
|
|
3192
|
-
self.ticket_name = symbol_name
|
|
3193
|
-
self.show_plot = show_plot
|
|
3194
|
-
self.save_path = save_path
|
|
3195
|
-
self.save_aws = save_aws
|
|
3196
|
-
self.aws_credentials = aws_credentials
|
|
3197
|
-
self.return_fig = return_fig
|
|
3198
|
-
|
|
3199
|
-
def signal_analyser(self, test_size, feature_name, days_list, threshold = 0.05,verbose = False, signal_position = False):
|
|
3200
|
-
"""
|
|
3201
|
-
perform signal analysis and feature extraction
|
|
3202
|
-
|
|
3203
|
-
Parameters
|
|
3204
|
-
----------
|
|
3205
|
-
test_size (int): test data size
|
|
3206
|
-
feature_name (str): name of the feature to assess
|
|
3207
|
-
days_list (list): list of integers [3,8,10] to assess
|
|
3208
|
-
threshold (float): alpha or z threshold
|
|
3209
|
-
verbose (boolean): print metrics
|
|
3210
|
-
signal_position (int): if true, the signal is taken at the given step after the signal end
|
|
3211
|
-
|
|
3212
|
-
Returns
|
|
3213
|
-
-------
|
|
3214
|
-
None
|
|
3215
|
-
"""
|
|
3216
|
-
data = self.data
|
|
3217
|
-
self.feature_name = feature_name
|
|
3218
|
-
up_signal, low_signal= f'signal_up_{feature_name}', f'signal_low_{feature_name}'
|
|
3219
|
-
features_base = ['Date', up_signal, low_signal, 'Close']
|
|
3220
|
-
|
|
3221
|
-
df = data[features_base].sort_values('Date').iloc[0:-test_size,:]
|
|
3222
|
-
returns_list = list()
|
|
3223
|
-
|
|
3224
|
-
for days in days_list:
|
|
3225
|
-
|
|
3226
|
-
feature_ = f'return_{days}d'
|
|
3227
|
-
days = days + signal_position if signal_position else days
|
|
3228
|
-
df[feature_] = (df['Close'].shift(-days)/df['Close']-1)*100
|
|
3229
|
-
returns_list.append(feature_)
|
|
3230
|
-
|
|
3231
|
-
df['signal_type'] = np.where(
|
|
3232
|
-
df[up_signal] == 1,
|
|
3233
|
-
'up',
|
|
3234
|
-
np.where(
|
|
3235
|
-
df[low_signal] == 1,
|
|
3236
|
-
'down',
|
|
3237
|
-
None
|
|
3238
|
-
)
|
|
3239
|
-
)
|
|
3240
|
-
df = df[~df.signal_type.isna()]
|
|
3241
|
-
df['lag_Date'] = df['Date'].shift(1)
|
|
3242
|
-
df['lag_signal_type'] = df['signal_type'].shift(1)
|
|
3243
|
-
df['span'] = (pd.to_datetime(df['Date']) - pd.to_datetime(df['lag_Date'])).dt.days - 1
|
|
3244
|
-
df['break'] = np.where((df['span'] > 3) & (df['lag_signal_type'] == df['signal_type']), 1, 0)
|
|
3245
|
-
df['break'] = np.where((df['lag_signal_type'] != df['signal_type']), 1, df['break'])
|
|
3246
|
-
|
|
3247
|
-
df['chain_id'] = df.sort_values(['Date']).groupby(['break']).cumcount() + 1
|
|
3248
|
-
df['chain_id'] = np.where(df['break'] == 1, df['chain_id'], np.nan )
|
|
3249
|
-
df['chain_id'] = df['chain_id'].fillna(method = 'ffill')
|
|
3250
|
-
|
|
3251
|
-
df['internal_rn'] = df.sort_values(['Date']).groupby(['chain_id']).cumcount() + 1
|
|
3252
|
-
df['inv_internal_rn'] = df.sort_values(['Date'],ascending = False).groupby(['chain_id']).cumcount() + 1
|
|
3253
|
-
|
|
3254
|
-
df['first_in_chain'] = np.where(df['internal_rn'] == 1, True, False)
|
|
3255
|
-
df['last_in_chain'] = np.where(df['inv_internal_rn'] == 1, True, False)
|
|
3256
|
-
|
|
3257
|
-
df = df.drop(columns = ['break','span','lag_Date','inv_internal_rn']).sort_values('Date')
|
|
3258
|
-
self.df_signal = df
|
|
3259
|
-
|
|
3260
|
-
n_signals_up = len(list(df[df.signal_type == 'up'].chain_id.unique()))
|
|
3261
|
-
n_signals_down = len(list(df[df.signal_type == 'down'].chain_id.unique()))
|
|
3262
|
-
p_scores = list()
|
|
3263
|
-
medians_down = list()
|
|
3264
|
-
validations = list()
|
|
3265
|
-
|
|
3266
|
-
if signal_position:
|
|
3267
|
-
df['open_long'] = np.where(df.last_in_chain == True, True, np.nan)
|
|
3268
|
-
df['open_long'] = df['open_long'].shift(signal_position)
|
|
3269
|
-
else:
|
|
3270
|
-
df['open_long'] = np.where(df.last_in_chain == True, True, np.nan)
|
|
3271
|
-
|
|
3272
|
-
# df_melt = df[df.last_in_chain == True].melt(id_vars=['signal_type'], value_vars=returns_list, var_name='time', value_name='value')
|
|
3273
|
-
df_melt = df[df.open_long == True].melt(id_vars=['signal_type'], value_vars=returns_list, var_name='time', value_name='value')
|
|
3274
|
-
df_melt = df_melt.dropna()
|
|
3275
|
-
|
|
3276
|
-
for evalx in returns_list:
|
|
3277
|
-
|
|
3278
|
-
sample1 = df_melt[(df_melt.time == evalx) & (df_melt.signal_type == 'up')].value.values
|
|
3279
|
-
sample2 = df_melt[(df_melt.time == evalx) & (df_melt.signal_type == 'down')].value.values
|
|
3280
|
-
pvalue = stats.ttest_ind(sample1, sample2).pvalue
|
|
3281
|
-
median_down = np.median(sample2)
|
|
3282
|
-
median_up = np.median(sample1)
|
|
3283
|
-
validations.append(median_up < 0)
|
|
3284
|
-
validations.append(median_down > 0)
|
|
3285
|
-
p_scores.append(pvalue)
|
|
3286
|
-
medians_down.append(median_down)
|
|
3287
|
-
self.df_melt = df_melt
|
|
3288
|
-
null_ho_eval = threshold > np.mean(p_scores)
|
|
3289
|
-
mean_median_return = np.median(medians_down) ## end metric
|
|
3290
|
-
median_signal_type_eval = validations.count(validations[0]) == len(validations)
|
|
3291
|
-
|
|
3292
|
-
if verbose:
|
|
3293
|
-
print('number of signal up:',n_signals_up)
|
|
3294
|
-
print('number of signal down:',n_signals_down)
|
|
3295
|
-
print('reject ho: ', null_ho_eval)
|
|
3296
|
-
print('mean median:', mean_median_return)
|
|
3297
|
-
print('all validations: ', median_signal_type_eval)
|
|
3298
|
-
|
|
3299
|
-
# if median_signal_type_eval == True and null_ho_eval == True:
|
|
3300
|
-
if null_ho_eval == True:
|
|
3301
|
-
if verbose:
|
|
3302
|
-
print('success evals')
|
|
3303
|
-
self.mean_median_return = mean_median_return
|
|
3304
|
-
else:
|
|
3305
|
-
self.mean_median_return = np.nan
|
|
3306
|
-
|
|
3307
|
-
df2 = df.copy()
|
|
3308
|
-
df2 = df2[df2.open_long == True]
|
|
3309
|
-
|
|
3310
|
-
|
|
3311
|
-
df2['lagdate'] = df2.Date.shift(1)
|
|
3312
|
-
df2['span'] = (pd.to_datetime(df2['Date']) - pd.to_datetime(df2['lagdate'])).dt.days
|
|
3313
|
-
|
|
3314
|
-
fig, axs = plt.subplots(1, 3, figsize = (15,5))
|
|
3315
|
-
|
|
3316
|
-
sns.violinplot(data=df2, y="span",ax = axs[0], color = 'lightblue', linewidth=0.7,inner="quart")
|
|
3317
|
-
sns.stripplot(data=df2, y="span",ax = axs[0], jitter=True, zorder=1)
|
|
3318
|
-
axs[0].set_title('span between last signals')
|
|
3319
|
-
del df2
|
|
3320
|
-
sns.violinplot(data=df[df.last_in_chain == True], y="internal_rn",ax = axs[1], color = 'lightblue', linewidth=0.7,inner="quart")
|
|
3321
|
-
sns.stripplot(data=df[df.last_in_chain == True], y="internal_rn",ax = axs[1], jitter=True, zorder=1)
|
|
3322
|
-
axs[1].set_title('signal duration distribution')
|
|
3323
|
-
|
|
3324
|
-
palette ={"go down": "tomato", "go up": "lightblue"}
|
|
3325
|
-
df_melt.signal_type = df_melt.signal_type.map({'up':'go down', 'down': 'go up'})
|
|
3326
|
-
sns.violinplot(data=df_melt, x="time", y="value", hue="signal_type",ax = axs[2], split=True, gap=0.1, inner="quart",palette = palette, linewidth=0.8)
|
|
3327
|
-
axs[2].axhline(y=0, color='grey', linestyle='--')
|
|
3328
|
-
axs[2].set_title('signal type expected returns distribution at different time lapses')
|
|
3329
|
-
|
|
3330
|
-
if self.show_plot:
|
|
3331
|
-
plt.show()
|
|
3332
|
-
|
|
3333
|
-
if self.save_path:
|
|
3334
|
-
result_plot_name = f'signals_strategy_distribution_{feature_name}.png'
|
|
3335
|
-
fig.savefig(self.save_path+result_plot_name)
|
|
3336
|
-
# pickle.dump(axs, open(self.save_path+result_plot_name, 'wb'))
|
|
3337
|
-
|
|
3338
|
-
if self.save_path and self.save_aws:
|
|
3339
|
-
# upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = f'market_plots/{self.ticket_name}/'+result_plot_name, input_path = self.save_path+result_plot_name)
|
|
3340
|
-
upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = self.save_aws + result_plot_name, input_path = self.save_path + result_plot_name, aws_credentials = self.aws_credentials)
|
|
3341
|
-
if not self.show_plot:
|
|
3342
|
-
plt.close()
|
|
3343
|
-
|
|
3344
|
-
del df
|
|
3345
|
-
|
|
3346
|
-
if self.return_fig:
|
|
3347
|
-
return fig
|
|
3348
|
-
|
|
3349
|
-
def create_backtest_signal(self,days_strategy, test_size, feature_name, high_exit = False, low_exit = False, signal_position = False):
|
|
3350
|
-
"""
|
|
3351
|
-
perform backtest signal analysis
|
|
3352
|
-
|
|
3353
|
-
Parameters
|
|
3354
|
-
----------
|
|
3355
|
-
days_strategy (list): list of days to assess returns
|
|
3356
|
-
test_size (str): test data size
|
|
3357
|
-
feature_name (str): name of the feature to assess
|
|
3358
|
-
high_exit (float): high exit thrshold return in backtest
|
|
3359
|
-
low_exit (float): loss exit thrshold return in backtest
|
|
3360
|
-
signal_position (int): if true, the signal is taken at the given step after the signal end
|
|
3361
|
-
|
|
3362
|
-
Returns
|
|
3363
|
-
-------
|
|
3364
|
-
fig (obj): plots
|
|
3365
|
-
messages (dict): dictionary with key metrics
|
|
3366
|
-
"""
|
|
3367
|
-
asset_1 = 'Close'
|
|
3368
|
-
up_signal, low_signal= f'signal_up_{feature_name}', f'signal_low_{feature_name}'
|
|
3369
|
-
df1 = self.data.iloc[-test_size:,:].copy()
|
|
3370
|
-
df2 = df1.copy()
|
|
3371
|
-
df2['signal_type'] = np.where(
|
|
3372
|
-
df2[up_signal] == 1,
|
|
3373
|
-
'up',
|
|
3374
|
-
np.where(
|
|
3375
|
-
df2[low_signal] == 1,
|
|
3376
|
-
'down',
|
|
3377
|
-
None
|
|
3378
|
-
)
|
|
3379
|
-
)
|
|
3380
|
-
df2 = df2[~df2.signal_type.isna()]
|
|
3381
|
-
df2['lag_Date'] = df2['Date'].shift(1)
|
|
3382
|
-
df2['lag_signal_type'] = df2['signal_type'].shift(1)
|
|
3383
|
-
df2['span'] = (pd.to_datetime(df2['Date']) - pd.to_datetime(df2['lag_Date'])).dt.days - 1
|
|
3384
|
-
df2['break'] = np.where((df2['span'] > 3) & (df2['lag_signal_type'] == df2['signal_type']), 1, 0)
|
|
3385
|
-
df2['break'] = np.where((df2['lag_signal_type'] != df2['signal_type']), 1, df2['break'])
|
|
3386
|
-
|
|
3387
|
-
df2['chain_id'] = df2.sort_values(['Date']).groupby(['break']).cumcount() + 1
|
|
3388
|
-
df2['chain_id'] = np.where(df2['break'] == 1, df2['chain_id'], np.nan )
|
|
3389
|
-
df2['chain_id'] = df2['chain_id'].fillna(method = 'ffill')
|
|
3390
|
-
|
|
3391
|
-
df2['internal_rn'] = df2.sort_values(['Date']).groupby(['chain_id']).cumcount() + 1
|
|
3392
|
-
df2['inv_internal_rn'] = df2.sort_values(['Date'],ascending = False).groupby(['chain_id']).cumcount() + 1
|
|
3393
|
-
|
|
3394
|
-
df2['first_in_chain'] = np.where(df2['internal_rn'] == 1, True, False)
|
|
3395
|
-
df2['last_in_chain'] = np.where(df2['inv_internal_rn'] == 1, True, False)
|
|
3396
|
-
|
|
3397
|
-
df2 = df2.drop(columns = ['break','span','lag_Date','inv_internal_rn']).sort_values('Date')
|
|
3398
|
-
|
|
3399
|
-
df2 = df2[(df2.last_in_chain == True) & (df2.signal_type == 'down')][['last_in_chain']]
|
|
3400
|
-
dft = df1.merge(df2,how = 'left',left_index=True, right_index=True )
|
|
3401
|
-
|
|
3402
|
-
dft['chain_id'] = dft.sort_values(['Date']).groupby(['last_in_chain']).cumcount() + 1
|
|
3403
|
-
dft['chain_id'] = np.where(dft['last_in_chain'] == True, dft['chain_id'], np.nan )
|
|
3404
|
-
dft['chain_id'] = dft['chain_id'].fillna(method = 'ffill')
|
|
3405
|
-
|
|
3406
|
-
dft['internal_rn'] = dft.sort_values(['Date']).groupby(['chain_id']).cumcount() + 1
|
|
3407
|
-
|
|
3408
|
-
dft['lrets_bench'] = np.log(dft[asset_1]/dft[asset_1].shift(1))
|
|
3409
|
-
dft['bench_prod'] = dft['lrets_bench'].cumsum()
|
|
3410
|
-
dft['bench_prod_exp'] = np.exp(dft['bench_prod']) - 1
|
|
3411
|
-
|
|
3412
|
-
if signal_position:
|
|
3413
|
-
dft['open_long'] = np.where(dft.last_in_chain == True, True, np.nan)
|
|
3414
|
-
dft['open_long'] = dft.groupby(['chain_id'])['open_long'].shift(signal_position)
|
|
3415
|
-
dft['flag'] = np.where(dft['internal_rn'] < days_strategy + signal_position, 1,0)
|
|
3416
|
-
dft['flag'] = dft.groupby(['chain_id'])['flag'].shift(signal_position)
|
|
3417
|
-
else:
|
|
3418
|
-
dft['open_long'] = np.where(dft.last_in_chain == True, True, np.nan)
|
|
3419
|
-
dft['flag'] = np.where(dft['internal_rn'] < days_strategy, 1,0)
|
|
3420
|
-
|
|
3421
|
-
if high_exit and low_exit:
|
|
3422
|
-
dft['open_strat'] = np.where(dft.open_long == True, dft.Open, np.nan)
|
|
3423
|
-
dft['open_strat'] = dft['open_strat'].fillna(method = 'ffill')
|
|
3424
|
-
dft['open_strat'] = np.where(dft.flag == 1, dft.open_strat, np.nan)
|
|
3425
|
-
dft['high_strat_ret'] = (dft['High']/dft['open_strat']-1)*100
|
|
3426
|
-
dft['low_strat_ret'] = (dft['Low']/dft['open_strat']-1)*100
|
|
3427
|
-
dft['max_step_chain'] = dft.groupby(['chain_id'])['internal_rn'].transform('max')
|
|
3428
|
-
dft['high_exit'] = np.where(((dft['high_strat_ret'] >= high_exit) | (dft['internal_rn'] == days_strategy) | (dft['max_step_chain'] == dft['internal_rn'])), 1, np.nan)
|
|
3429
|
-
dft['low_exit'] = np.where((dft['low_strat_ret'] <= low_exit), -1, np.nan)
|
|
3430
|
-
|
|
3431
|
-
dft["exit_type"] = dft[["high_exit", "low_exit"]].max(axis=1)
|
|
3432
|
-
dft['exit_type'] = np.where(dft["exit_type"] == 1, 1, np.where(dft["exit_type"] == -1,-1,np.nan))
|
|
3433
|
-
dft['exit'] = np.where(dft['exit_type'].isnull(), np.nan, 1)
|
|
3434
|
-
dft['exit_order'] = dft.sort_values(['Date']).groupby(['chain_id','exit']).cumcount() + 1
|
|
3435
|
-
dft['exit'] = np.where(dft['exit_order'] == 1, True, np.nan)
|
|
3436
|
-
dft = dft.drop(columns = ['exit_order'])
|
|
3437
|
-
## if last signal is near
|
|
3438
|
-
max_id = dft.chain_id.max()
|
|
3439
|
-
dft['max_internal_rn'] = dft.sort_values(['Date']).groupby(['chain_id']).internal_rn.transform('max')
|
|
3440
|
-
dft['exit'] = np.where((dft.chain_id == max_id) & (dft.max_internal_rn < days_strategy) & (dft.max_internal_rn == dft.internal_rn), 1, dft['exit'])
|
|
3441
|
-
|
|
3442
|
-
dft['exit_step'] = np.where(dft.exit == 1, dft.internal_rn, np.nan)
|
|
3443
|
-
dft['exit_step'] = dft.sort_values(['Date']).groupby(['chain_id']).exit_step.transform('max')
|
|
3444
|
-
|
|
3445
|
-
if signal_position:
|
|
3446
|
-
dft['flag'] = np.where( (dft.internal_rn >= signal_position + 1) & (dft.internal_rn <= dft.exit_step) , 1,0)
|
|
3447
|
-
else:
|
|
3448
|
-
dft['flag'] = np.where(dft.internal_rn <= dft.exit_step, 1, 0)
|
|
3449
|
-
|
|
3450
|
-
dft['lrets_strat'] = np.log(dft[asset_1].shift(-1)/dft[asset_1]) * dft['flag']
|
|
3451
|
-
dft['lrets_strat'] = np.where(dft['lrets_strat'].isna(),-0.0,dft['lrets_strat'])
|
|
3452
|
-
dft['lrets_prod'] = dft['lrets_strat'].cumsum()
|
|
3453
|
-
dft['strat_prod_exp'] = np.exp(dft['lrets_prod']) - 1
|
|
3454
|
-
|
|
3455
|
-
bench_rets = round(dft['bench_prod_exp'].values[-1]*100,1)
|
|
3456
|
-
strat_rets = round(dft['strat_prod_exp'].values[-1]*100,1)
|
|
3457
|
-
|
|
3458
|
-
bench_sr = round(sharpe_ratio(dft.bench_prod_exp.dropna()),1)
|
|
3459
|
-
strat_sr = round(sharpe_ratio(dft.strat_prod_exp.dropna()),1)
|
|
3460
|
-
|
|
3461
|
-
message1 = f'{bench_rets}%'
|
|
3462
|
-
message2 = f'{strat_rets}%'
|
|
3463
|
-
|
|
3464
|
-
messages = {
|
|
3465
|
-
'benchmark return:':message1,
|
|
3466
|
-
'benchmark sharpe ratio:': bench_sr,
|
|
3467
|
-
'strategy return:':message2,
|
|
3468
|
-
'strategy sharpe ratio:': strat_sr,
|
|
3469
|
-
}
|
|
3470
|
-
if self.show_plot:
|
|
3471
|
-
print('----------------------------')
|
|
3472
|
-
print(messages)
|
|
3473
|
-
print('----------------------------')
|
|
3474
|
-
|
|
3475
|
-
fig = plt.figure(1)
|
|
3476
|
-
plt.plot(dft.bench_prod_exp.values, label = 'benchmark', color = 'steelblue')
|
|
3477
|
-
plt.scatter(range(len(dft)),np.where(dft[low_signal] == 1,dft.bench_prod_exp.values,np.nan),color = 'red', label = 'signal')
|
|
3478
|
-
plt.plot(dft.strat_prod_exp.values, label = 'strategy', color = 'darksalmon')
|
|
3479
|
-
plt.xlabel("index")
|
|
3480
|
-
plt.ylabel("comulative return")
|
|
3481
|
-
plt.legend()
|
|
3482
|
-
plt.title('strategy and cumulative returns based on signal strategy')
|
|
3483
|
-
if self.show_plot:
|
|
3484
|
-
plt.plot()
|
|
3485
|
-
|
|
3486
|
-
if self.save_path:
|
|
3487
|
-
result_json_name = f'signals_strategy_return_{feature_name}.json'
|
|
3488
|
-
result_plot_name = f'signals_strategy_return_{feature_name}.png'
|
|
3489
|
-
|
|
3490
|
-
plt.savefig(self.save_path+result_plot_name)
|
|
3491
|
-
|
|
3492
|
-
with open(self.save_path+result_json_name, "w") as outfile:
|
|
3493
|
-
json.dump(messages, outfile)
|
|
3494
|
-
|
|
3495
|
-
if self.save_path and self.save_aws:
|
|
3496
|
-
|
|
3497
|
-
upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = self.save_aws + result_json_name, input_path = self.save_path + result_json_name, aws_credentials = self.aws_credentials)
|
|
3498
|
-
upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = self.save_aws + result_plot_name, input_path = self.save_path + result_plot_name, aws_credentials = self.aws_credentials)
|
|
3499
|
-
|
|
3500
|
-
if not self.show_plot:
|
|
3501
|
-
plt.close()
|
|
3502
|
-
|
|
3503
|
-
del df1,df2,dft
|
|
3504
|
-
|
|
3505
|
-
if self.return_fig:
|
|
3506
|
-
return fig, messages
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{virgo_modules-0.1.0 → virgo_modules-0.1.2}/virgo_app/virgo_modules.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|