virgo-modules 0.1.1__tar.gz → 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of virgo-modules might be problematic. Click here for more details.

Files changed (18) hide show
  1. {virgo_modules-0.1.1 → virgo_modules-0.1.2}/PKG-INFO +1 -1
  2. {virgo_modules-0.1.1 → virgo_modules-0.1.2}/setup.py +1 -1
  3. {virgo_modules-0.1.1 → virgo_modules-0.1.2}/virgo_app/virgo_modules/src/backtester.py +107 -1
  4. {virgo_modules-0.1.1 → virgo_modules-0.1.2}/virgo_app/virgo_modules/src/ticketer_source.py +0 -448
  5. {virgo_modules-0.1.1 → virgo_modules-0.1.2}/virgo_app/virgo_modules.egg-info/PKG-INFO +1 -1
  6. {virgo_modules-0.1.1 → virgo_modules-0.1.2}/LICENSE +0 -0
  7. {virgo_modules-0.1.1 → virgo_modules-0.1.2}/README.md +0 -0
  8. {virgo_modules-0.1.1 → virgo_modules-0.1.2}/setup.cfg +0 -0
  9. {virgo_modules-0.1.1 → virgo_modules-0.1.2}/virgo_app/virgo_modules/__init__.py +0 -0
  10. {virgo_modules-0.1.1 → virgo_modules-0.1.2}/virgo_app/virgo_modules/src/__init__.py +0 -0
  11. {virgo_modules-0.1.1 → virgo_modules-0.1.2}/virgo_app/virgo_modules/src/aws_utils.py +0 -0
  12. {virgo_modules-0.1.1 → virgo_modules-0.1.2}/virgo_app/virgo_modules/src/edge_utils.py +0 -0
  13. {virgo_modules-0.1.1 → virgo_modules-0.1.2}/virgo_app/virgo_modules/src/pull_artifacts.py +0 -0
  14. {virgo_modules-0.1.1 → virgo_modules-0.1.2}/virgo_app/virgo_modules/src/re_utils.py +0 -0
  15. {virgo_modules-0.1.1 → virgo_modules-0.1.2}/virgo_app/virgo_modules.egg-info/SOURCES.txt +0 -0
  16. {virgo_modules-0.1.1 → virgo_modules-0.1.2}/virgo_app/virgo_modules.egg-info/dependency_links.txt +0 -0
  17. {virgo_modules-0.1.1 → virgo_modules-0.1.2}/virgo_app/virgo_modules.egg-info/requires.txt +0 -0
  18. {virgo_modules-0.1.1 → virgo_modules-0.1.2}/virgo_app/virgo_modules.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: virgo_modules
3
- Version: 0.1.1
3
+ Version: 0.1.2
4
4
  Summary: data processing and statistical modeling using stock market data
5
5
  Home-page: https://github.com/miguelmayhem92/virgo_module
6
6
  Author: Miguel Mayhuire
@@ -5,7 +5,7 @@ with open("virgo_app/README.md", "r") as f:
5
5
 
6
6
  setup(
7
7
  name="virgo_modules",
8
- version="0.1.1",
8
+ version="0.1.2",
9
9
  description="data processing and statistical modeling using stock market data",
10
10
  package_dir={"": "virgo_app"},
11
11
  packages=find_packages(where="virgo_app"),
@@ -365,4 +365,110 @@ class SignalAnalyserObject:
365
365
  del dft
366
366
 
367
367
  if self.return_fig:
368
- return fig, messages
368
+ return fig, messages
369
+
370
+ class IterateSignalAnalyse(SignalAnalyserObject):
371
+ """
372
+ object that is going to iterate backtest given a parameter space
373
+
374
+ Attributes
375
+ ----------
376
+ test_data_size : int
377
+ feature_name : str
378
+ days_list: list
379
+ list of integers that serve as time horizons
380
+ arguments_to_test : dict
381
+ paramter space
382
+ method: str
383
+ method to use
384
+ object_stock: obj
385
+ object containing data and methods
386
+ plot: boolean
387
+ show summary plot of median results
388
+ best_result: float
389
+ index of the best result, the index corresponds to the parameter space
390
+
391
+ Methods
392
+ -------
393
+ execute(show_plot_iter=boolean):
394
+ display plots for every iteration
395
+ """
396
+ def __init__(self, test_data_size, feature_name, days_list, arguments_to_test, method, object_stock, plot = False):
397
+ """
398
+ Parameters
399
+ ----------
400
+ test_data_size (int): size of the test data
401
+ feature_name (str): name of the feature
402
+ days_list (list): list of integers that serve as time horizons
403
+ arguments_to_test (dict): paramter space
404
+ method (str): method to use
405
+ object_stock (obj): object containing data and methods
406
+ plot (boolean): show summary plot of median results
407
+
408
+ Returns
409
+ -------
410
+ None
411
+ """
412
+ self.test_data_size = test_data_size
413
+ self.feature_name = feature_name
414
+ self.days_list = days_list
415
+ self.arguments_to_test = arguments_to_test
416
+ self.method = method
417
+ self.plot = plot
418
+ self.object_stock = object_stock
419
+
420
+ def execute(self,show_plot_iter = False):
421
+ """
422
+ Iterate backtest and compute median result for every iteration
423
+
424
+ Parameters
425
+ ----------
426
+ show_plot_iter (boolean): display plots for every iteration
427
+
428
+ Returns
429
+ -------
430
+ None
431
+ """
432
+ results = list()
433
+ for key in self.arguments_to_test.keys():
434
+ configuration = self.arguments_to_test.get(key)
435
+ getattr(self.object_stock, self.method)(**configuration)
436
+ signal_assess = SignalAnalyserObject(self.object_stock.df, self.object_stock.stock_code, show_plot = show_plot_iter, test_size = self.test_data_size, feature_name = self.feature_name)
437
+ signal_assess.signal_analyser(days_list = self.days_list)
438
+ mean_median_return = signal_assess.median_return
439
+ results.append(mean_median_return)
440
+
441
+ df_result = pd.DataFrame({'keys':self.arguments_to_test.keys(),'results':results})
442
+ if self.plot:
443
+ plt.plot(df_result['keys'], df_result['results'])
444
+ plt.scatter(df_result['keys'], df_result['results'])
445
+ plt.title('simulation between configurations')
446
+ plt.ylabel('median expected return')
447
+ plt.show()
448
+
449
+ best_result = df_result.sort_values('results',ascending = False)['keys'].values[0]
450
+ self.best_result = best_result
451
+
452
+ def execute_signal_analyser(test_data_size, feature_name, days_list, configuration, method, object_stock, analyser_object, plot = False, backtest= False, exit_params = {}):
453
+ '''
454
+ code snippet that is going run backtest and display analysis messages and plots
455
+
456
+ Parameters:
457
+ test_data_size (int): test data size
458
+ feature_name (str): name of the feature to assess
459
+ days_list (list): tome scope to assess the returns
460
+ configuration (dict): parameters of the method to run
461
+ object_stock (obj): object with data to assess
462
+ method (str): method to use
463
+ analyser_object (obj): signal_analyser object
464
+ plot (boolean): if true, plot results
465
+ backtest (boolean): if true, run backtest
466
+ exit_params (dict): parameters of exit returns
467
+
468
+ Returns:
469
+ None
470
+ '''
471
+ getattr(object_stock, method)(**configuration)
472
+ signal_assess = analyser_object(object_stock.df,object_stock.stock_code,show_plot = plot, feature_name = feature_name, test_size = test_data_size)
473
+ signal_assess.signal_analyser(days_list = days_list)
474
+ signal_assess.create_backtest_signal(backtest, open_in_list = ['down','up'], **exit_params )
@@ -2790,68 +2790,6 @@ class hmm_feature_selector():
2790
2790
  self.feature_results = feature_results
2791
2791
  self.best_features = pd.DataFrame(self.feature_results).T.sort_values('mean relevance').iloc[-1,:].features
2792
2792
 
2793
- def execute_signal_analyser(test_data_size, feature_name, days_list, configuration, method, object_stock, signal_analyser_object, plot = False, backtest= False, exit_params = {}):
2794
- '''
2795
- code snippet that is going run some objects. The analysis is signal analyse which is backtesting
2796
-
2797
- Parameters:
2798
- test_data_size (int): test data size
2799
- feature_name (str): name of the feature to assess
2800
- days_list (list): tome scope to assess the returns
2801
- configuration (dict): parameters of the method to run
2802
- object_stock (obj): object with data to assess
2803
- signal_analyser_object (obj): signal_analyser object
2804
- plot (boolean): if true, plot results
2805
- backtest (boolean): if true, run backtest
2806
- exit_params (dict): parameters of exit returns
2807
-
2808
- Returns:
2809
- mean_median_return (float): median return of the backtests
2810
- '''
2811
- method(**configuration)
2812
- signal_assess = signal_analyser_object(object_stock.df,object_stock.stock_code,show_plot = plot)
2813
- signal_assess.signal_analyser(test_size = test_data_size, feature_name = feature_name, days_list = days_list, threshold = 1)
2814
-
2815
- if backtest:
2816
- print('-----------------------back test ---------------------------')
2817
- signal_assess.create_backtest_signal(backtest, test_data_size, feature_name, **exit_params )
2818
-
2819
- return signal_assess.mean_median_return
2820
-
2821
- def iterate_signal_analyser(test_data_size,feature_name, days_list, arguments_to_test, method, object_stock, signal_analyser_object, plot = True):
2822
- '''
2823
- code snippet is going to iterate signal analyser
2824
-
2825
- Parameters:
2826
- test_data_size (int): test data size
2827
- feature_name (str): name of the feature to assess
2828
- days_list (list): tome scope to assess the returns
2829
- arguments_to_test: parameters to test
2830
- method: methods to run
2831
- object_stock (obj): object with data to assess
2832
- signal_analyser_object (obj): signal_analyser object
2833
- plot (boolean): if true, plot results
2834
-
2835
- Returns:
2836
- best_result (int): index from the arguments_to_test with the best result
2837
- '''
2838
- results = list()
2839
- for key in arguments_to_test.keys():
2840
- configuration = arguments_to_test.get(key)
2841
- mean_median_return = execute_signal_analyser(test_data_size, feature_name, days_list, configuration, method, object_stock, signal_analyser_object)
2842
- results.append(mean_median_return)
2843
-
2844
- df_result = pd.DataFrame({'keys':arguments_to_test.keys(),'results':results})
2845
- if plot:
2846
- plt.plot(df_result['keys'], df_result['results'])
2847
- plt.scatter(df_result['keys'], df_result['results'])
2848
- plt.title('simulation between configurations')
2849
- plt.ylabel('median expected return')
2850
- plt.show()
2851
-
2852
- best_result = df_result.sort_values('results',ascending = False)['keys'].values[0]
2853
- return best_result
2854
-
2855
2793
  class analyse_index(stock_eda_panel):
2856
2794
  """
2857
2795
  class that is going to train hmm models to perform feature selection
@@ -3118,389 +3056,3 @@ def get_relevant_beta(data_market, ticket_name, show_plot = True, save_path = F
3118
3056
  # upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = f'market_plots/{ticket_name}/'+result_plot_name,input_path = save_path+result_plot_name)
3119
3057
  upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = save_aws + result_plot_name, input_path = save_path + result_plot_name, aws_credentials = aws_credentials)
3120
3058
  return selection
3121
-
3122
- #### to deprecate
3123
-
3124
- def sharpe_ratio(return_series):
3125
-
3126
- '''
3127
- calculate sharpe ratio for given array.
3128
-
3129
- Parameters:
3130
- return_series (pd.series): pandas series of the asset returns
3131
-
3132
- Returns:
3133
- sharpe (float): sharpe ratio
3134
- '''
3135
-
3136
- N = 255 # Trading days in the year (change to 365 for crypto)
3137
- rf = 0.005 # Half a percent risk free rare
3138
- mean = return_series.mean() * N -rf
3139
- sigma = return_series.std() * np.sqrt(N)
3140
- sharpe = round(mean / sigma, 3)
3141
- return sharpe
3142
-
3143
- class signal_analyser_object:
3144
- """
3145
- class that is going to analyse signals
3146
-
3147
- Attributes
3148
- ----------
3149
- data : pd.DataFrame
3150
- symbol of the asset
3151
- ticket_name :str
3152
- asset symbol
3153
- show_plot : boolean
3154
- if true show plot for every method
3155
- save_path : str
3156
- if true, save results in file
3157
- save_aws : str
3158
- if true, export results to remote repo
3159
- aws_credentials : dict
3160
- credentials for aws
3161
- return_fig : boolean
3162
- if true, methods will return objects
3163
- create_backtest_signal(days_strategy=list, test_size=int, feature_name=str, high_exit=float, low_exit=float):
3164
- perform backtest signal analysis
3165
-
3166
- Methods
3167
- -------
3168
- signal_analyser(test_size=int, feature_name=str, days_list=list, threshold=float,verbose=boolean, signal_position=boolean):
3169
- perform signal analysis and feature extraction
3170
-
3171
- """
3172
-
3173
- def __init__(self, data,symbol_name, show_plot = True, save_path = False, save_aws = False, aws_credentials = False, return_fig = False):
3174
- """
3175
- Initialize object
3176
-
3177
- Parameters
3178
- ----------
3179
- data (pd.DataFrame): data
3180
- ticket_name (str): name of the asset
3181
- show_plot (boolean): if true show plot for every method
3182
- save_path (str): if true, save results in file e.g r'C:/path/to/the/file/'
3183
- save_aws (str): if true, export results to remote repo e.g. 'path/to/file/'
3184
- aws_credentials (dict): credentials for aws
3185
- return_fig (boolean): if true, methods will return objects
3186
-
3187
- Returns
3188
- -------
3189
- None
3190
- """
3191
- self.data = data.copy()
3192
- self.ticket_name = symbol_name
3193
- self.show_plot = show_plot
3194
- self.save_path = save_path
3195
- self.save_aws = save_aws
3196
- self.aws_credentials = aws_credentials
3197
- self.return_fig = return_fig
3198
-
3199
- def signal_analyser(self, test_size, feature_name, days_list, threshold = 0.05,verbose = False, signal_position = False):
3200
- """
3201
- perform signal analysis and feature extraction
3202
-
3203
- Parameters
3204
- ----------
3205
- test_size (int): test data size
3206
- feature_name (str): name of the feature to assess
3207
- days_list (list): list of integers [3,8,10] to assess
3208
- threshold (float): alpha or z threshold
3209
- verbose (boolean): print metrics
3210
- signal_position (int): if true, the signal is taken at the given step after the signal end
3211
-
3212
- Returns
3213
- -------
3214
- None
3215
- """
3216
- data = self.data
3217
- self.feature_name = feature_name
3218
- up_signal, low_signal= f'signal_up_{feature_name}', f'signal_low_{feature_name}'
3219
- features_base = ['Date', up_signal, low_signal, 'Close']
3220
-
3221
- df = data[features_base].sort_values('Date').iloc[0:-test_size,:]
3222
- returns_list = list()
3223
-
3224
- for days in days_list:
3225
-
3226
- feature_ = f'return_{days}d'
3227
- days = days + signal_position if signal_position else days
3228
- df[feature_] = (df['Close'].shift(-days)/df['Close']-1)*100
3229
- returns_list.append(feature_)
3230
-
3231
- df['signal_type'] = np.where(
3232
- df[up_signal] == 1,
3233
- 'up',
3234
- np.where(
3235
- df[low_signal] == 1,
3236
- 'down',
3237
- None
3238
- )
3239
- )
3240
- df = df[~df.signal_type.isna()]
3241
- df['lag_Date'] = df['Date'].shift(1)
3242
- df['lag_signal_type'] = df['signal_type'].shift(1)
3243
- df['span'] = (pd.to_datetime(df['Date']) - pd.to_datetime(df['lag_Date'])).dt.days - 1
3244
- df['break'] = np.where((df['span'] > 3) & (df['lag_signal_type'] == df['signal_type']), 1, 0)
3245
- df['break'] = np.where((df['lag_signal_type'] != df['signal_type']), 1, df['break'])
3246
-
3247
- df['chain_id'] = df.sort_values(['Date']).groupby(['break']).cumcount() + 1
3248
- df['chain_id'] = np.where(df['break'] == 1, df['chain_id'], np.nan )
3249
- df['chain_id'] = df['chain_id'].fillna(method = 'ffill')
3250
-
3251
- df['internal_rn'] = df.sort_values(['Date']).groupby(['chain_id']).cumcount() + 1
3252
- df['inv_internal_rn'] = df.sort_values(['Date'],ascending = False).groupby(['chain_id']).cumcount() + 1
3253
-
3254
- df['first_in_chain'] = np.where(df['internal_rn'] == 1, True, False)
3255
- df['last_in_chain'] = np.where(df['inv_internal_rn'] == 1, True, False)
3256
-
3257
- df = df.drop(columns = ['break','span','lag_Date','inv_internal_rn']).sort_values('Date')
3258
- self.df_signal = df
3259
-
3260
- n_signals_up = len(list(df[df.signal_type == 'up'].chain_id.unique()))
3261
- n_signals_down = len(list(df[df.signal_type == 'down'].chain_id.unique()))
3262
- p_scores = list()
3263
- medians_down = list()
3264
- validations = list()
3265
-
3266
- if signal_position:
3267
- df['open_long'] = np.where(df.last_in_chain == True, True, np.nan)
3268
- df['open_long'] = df['open_long'].shift(signal_position)
3269
- else:
3270
- df['open_long'] = np.where(df.last_in_chain == True, True, np.nan)
3271
-
3272
- # df_melt = df[df.last_in_chain == True].melt(id_vars=['signal_type'], value_vars=returns_list, var_name='time', value_name='value')
3273
- df_melt = df[df.open_long == True].melt(id_vars=['signal_type'], value_vars=returns_list, var_name='time', value_name='value')
3274
- df_melt = df_melt.dropna()
3275
-
3276
- for evalx in returns_list:
3277
-
3278
- sample1 = df_melt[(df_melt.time == evalx) & (df_melt.signal_type == 'up')].value.values
3279
- sample2 = df_melt[(df_melt.time == evalx) & (df_melt.signal_type == 'down')].value.values
3280
- pvalue = stats.ttest_ind(sample1, sample2).pvalue
3281
- median_down = np.median(sample2)
3282
- median_up = np.median(sample1)
3283
- validations.append(median_up < 0)
3284
- validations.append(median_down > 0)
3285
- p_scores.append(pvalue)
3286
- medians_down.append(median_down)
3287
- self.df_melt = df_melt
3288
- null_ho_eval = threshold > np.mean(p_scores)
3289
- mean_median_return = np.median(medians_down) ## end metric
3290
- median_signal_type_eval = validations.count(validations[0]) == len(validations)
3291
-
3292
- if verbose:
3293
- print('number of signal up:',n_signals_up)
3294
- print('number of signal down:',n_signals_down)
3295
- print('reject ho: ', null_ho_eval)
3296
- print('mean median:', mean_median_return)
3297
- print('all validations: ', median_signal_type_eval)
3298
-
3299
- # if median_signal_type_eval == True and null_ho_eval == True:
3300
- if null_ho_eval == True:
3301
- if verbose:
3302
- print('success evals')
3303
- self.mean_median_return = mean_median_return
3304
- else:
3305
- self.mean_median_return = np.nan
3306
-
3307
- df2 = df.copy()
3308
- df2 = df2[df2.open_long == True]
3309
-
3310
-
3311
- df2['lagdate'] = df2.Date.shift(1)
3312
- df2['span'] = (pd.to_datetime(df2['Date']) - pd.to_datetime(df2['lagdate'])).dt.days
3313
-
3314
- fig, axs = plt.subplots(1, 3, figsize = (15,5))
3315
-
3316
- sns.violinplot(data=df2, y="span",ax = axs[0], color = 'lightblue', linewidth=0.7,inner="quart")
3317
- sns.stripplot(data=df2, y="span",ax = axs[0], jitter=True, zorder=1)
3318
- axs[0].set_title('span between last signals')
3319
- del df2
3320
- sns.violinplot(data=df[df.last_in_chain == True], y="internal_rn",ax = axs[1], color = 'lightblue', linewidth=0.7,inner="quart")
3321
- sns.stripplot(data=df[df.last_in_chain == True], y="internal_rn",ax = axs[1], jitter=True, zorder=1)
3322
- axs[1].set_title('signal duration distribution')
3323
-
3324
- palette ={"go down": "tomato", "go up": "lightblue"}
3325
- df_melt.signal_type = df_melt.signal_type.map({'up':'go down', 'down': 'go up'})
3326
- sns.violinplot(data=df_melt, x="time", y="value", hue="signal_type",ax = axs[2], split=True, gap=0.1, inner="quart",palette = palette, linewidth=0.8)
3327
- axs[2].axhline(y=0, color='grey', linestyle='--')
3328
- axs[2].set_title('signal type expected returns distribution at different time lapses')
3329
-
3330
- if self.show_plot:
3331
- plt.show()
3332
-
3333
- if self.save_path:
3334
- result_plot_name = f'signals_strategy_distribution_{feature_name}.png'
3335
- fig.savefig(self.save_path+result_plot_name)
3336
- # pickle.dump(axs, open(self.save_path+result_plot_name, 'wb'))
3337
-
3338
- if self.save_path and self.save_aws:
3339
- # upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = f'market_plots/{self.ticket_name}/'+result_plot_name, input_path = self.save_path+result_plot_name)
3340
- upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = self.save_aws + result_plot_name, input_path = self.save_path + result_plot_name, aws_credentials = self.aws_credentials)
3341
- if not self.show_plot:
3342
- plt.close()
3343
-
3344
- del df
3345
-
3346
- if self.return_fig:
3347
- return fig
3348
-
3349
- def create_backtest_signal(self,days_strategy, test_size, feature_name, high_exit = False, low_exit = False, signal_position = False):
3350
- """
3351
- perform backtest signal analysis
3352
-
3353
- Parameters
3354
- ----------
3355
- days_strategy (list): list of days to assess returns
3356
- test_size (str): test data size
3357
- feature_name (str): name of the feature to assess
3358
- high_exit (float): high exit thrshold return in backtest
3359
- low_exit (float): loss exit thrshold return in backtest
3360
- signal_position (int): if true, the signal is taken at the given step after the signal end
3361
-
3362
- Returns
3363
- -------
3364
- fig (obj): plots
3365
- messages (dict): dictionary with key metrics
3366
- """
3367
- asset_1 = 'Close'
3368
- up_signal, low_signal= f'signal_up_{feature_name}', f'signal_low_{feature_name}'
3369
- df1 = self.data.iloc[-test_size:,:].copy()
3370
- df2 = df1.copy()
3371
- df2['signal_type'] = np.where(
3372
- df2[up_signal] == 1,
3373
- 'up',
3374
- np.where(
3375
- df2[low_signal] == 1,
3376
- 'down',
3377
- None
3378
- )
3379
- )
3380
- df2 = df2[~df2.signal_type.isna()]
3381
- df2['lag_Date'] = df2['Date'].shift(1)
3382
- df2['lag_signal_type'] = df2['signal_type'].shift(1)
3383
- df2['span'] = (pd.to_datetime(df2['Date']) - pd.to_datetime(df2['lag_Date'])).dt.days - 1
3384
- df2['break'] = np.where((df2['span'] > 3) & (df2['lag_signal_type'] == df2['signal_type']), 1, 0)
3385
- df2['break'] = np.where((df2['lag_signal_type'] != df2['signal_type']), 1, df2['break'])
3386
-
3387
- df2['chain_id'] = df2.sort_values(['Date']).groupby(['break']).cumcount() + 1
3388
- df2['chain_id'] = np.where(df2['break'] == 1, df2['chain_id'], np.nan )
3389
- df2['chain_id'] = df2['chain_id'].fillna(method = 'ffill')
3390
-
3391
- df2['internal_rn'] = df2.sort_values(['Date']).groupby(['chain_id']).cumcount() + 1
3392
- df2['inv_internal_rn'] = df2.sort_values(['Date'],ascending = False).groupby(['chain_id']).cumcount() + 1
3393
-
3394
- df2['first_in_chain'] = np.where(df2['internal_rn'] == 1, True, False)
3395
- df2['last_in_chain'] = np.where(df2['inv_internal_rn'] == 1, True, False)
3396
-
3397
- df2 = df2.drop(columns = ['break','span','lag_Date','inv_internal_rn']).sort_values('Date')
3398
-
3399
- df2 = df2[(df2.last_in_chain == True) & (df2.signal_type == 'down')][['last_in_chain']]
3400
- dft = df1.merge(df2,how = 'left',left_index=True, right_index=True )
3401
-
3402
- dft['chain_id'] = dft.sort_values(['Date']).groupby(['last_in_chain']).cumcount() + 1
3403
- dft['chain_id'] = np.where(dft['last_in_chain'] == True, dft['chain_id'], np.nan )
3404
- dft['chain_id'] = dft['chain_id'].fillna(method = 'ffill')
3405
-
3406
- dft['internal_rn'] = dft.sort_values(['Date']).groupby(['chain_id']).cumcount() + 1
3407
-
3408
- dft['lrets_bench'] = np.log(dft[asset_1]/dft[asset_1].shift(1))
3409
- dft['bench_prod'] = dft['lrets_bench'].cumsum()
3410
- dft['bench_prod_exp'] = np.exp(dft['bench_prod']) - 1
3411
-
3412
- if signal_position:
3413
- dft['open_long'] = np.where(dft.last_in_chain == True, True, np.nan)
3414
- dft['open_long'] = dft.groupby(['chain_id'])['open_long'].shift(signal_position)
3415
- dft['flag'] = np.where(dft['internal_rn'] < days_strategy + signal_position, 1,0)
3416
- dft['flag'] = dft.groupby(['chain_id'])['flag'].shift(signal_position)
3417
- else:
3418
- dft['open_long'] = np.where(dft.last_in_chain == True, True, np.nan)
3419
- dft['flag'] = np.where(dft['internal_rn'] < days_strategy, 1,0)
3420
-
3421
- if high_exit and low_exit:
3422
- dft['open_strat'] = np.where(dft.open_long == True, dft.Open, np.nan)
3423
- dft['open_strat'] = dft['open_strat'].fillna(method = 'ffill')
3424
- dft['open_strat'] = np.where(dft.flag == 1, dft.open_strat, np.nan)
3425
- dft['high_strat_ret'] = (dft['High']/dft['open_strat']-1)*100
3426
- dft['low_strat_ret'] = (dft['Low']/dft['open_strat']-1)*100
3427
- dft['max_step_chain'] = dft.groupby(['chain_id'])['internal_rn'].transform('max')
3428
- dft['high_exit'] = np.where(((dft['high_strat_ret'] >= high_exit) | (dft['internal_rn'] == days_strategy) | (dft['max_step_chain'] == dft['internal_rn'])), 1, np.nan)
3429
- dft['low_exit'] = np.where((dft['low_strat_ret'] <= low_exit), -1, np.nan)
3430
-
3431
- dft["exit_type"] = dft[["high_exit", "low_exit"]].max(axis=1)
3432
- dft['exit_type'] = np.where(dft["exit_type"] == 1, 1, np.where(dft["exit_type"] == -1,-1,np.nan))
3433
- dft['exit'] = np.where(dft['exit_type'].isnull(), np.nan, 1)
3434
- dft['exit_order'] = dft.sort_values(['Date']).groupby(['chain_id','exit']).cumcount() + 1
3435
- dft['exit'] = np.where(dft['exit_order'] == 1, True, np.nan)
3436
- dft = dft.drop(columns = ['exit_order'])
3437
- ## if last signal is near
3438
- max_id = dft.chain_id.max()
3439
- dft['max_internal_rn'] = dft.sort_values(['Date']).groupby(['chain_id']).internal_rn.transform('max')
3440
- dft['exit'] = np.where((dft.chain_id == max_id) & (dft.max_internal_rn < days_strategy) & (dft.max_internal_rn == dft.internal_rn), 1, dft['exit'])
3441
-
3442
- dft['exit_step'] = np.where(dft.exit == 1, dft.internal_rn, np.nan)
3443
- dft['exit_step'] = dft.sort_values(['Date']).groupby(['chain_id']).exit_step.transform('max')
3444
-
3445
- if signal_position:
3446
- dft['flag'] = np.where( (dft.internal_rn >= signal_position + 1) & (dft.internal_rn <= dft.exit_step) , 1,0)
3447
- else:
3448
- dft['flag'] = np.where(dft.internal_rn <= dft.exit_step, 1, 0)
3449
-
3450
- dft['lrets_strat'] = np.log(dft[asset_1].shift(-1)/dft[asset_1]) * dft['flag']
3451
- dft['lrets_strat'] = np.where(dft['lrets_strat'].isna(),-0.0,dft['lrets_strat'])
3452
- dft['lrets_prod'] = dft['lrets_strat'].cumsum()
3453
- dft['strat_prod_exp'] = np.exp(dft['lrets_prod']) - 1
3454
-
3455
- bench_rets = round(dft['bench_prod_exp'].values[-1]*100,1)
3456
- strat_rets = round(dft['strat_prod_exp'].values[-1]*100,1)
3457
-
3458
- bench_sr = round(sharpe_ratio(dft.bench_prod_exp.dropna()),1)
3459
- strat_sr = round(sharpe_ratio(dft.strat_prod_exp.dropna()),1)
3460
-
3461
- message1 = f'{bench_rets}%'
3462
- message2 = f'{strat_rets}%'
3463
-
3464
- messages = {
3465
- 'benchmark return:':message1,
3466
- 'benchmark sharpe ratio:': bench_sr,
3467
- 'strategy return:':message2,
3468
- 'strategy sharpe ratio:': strat_sr,
3469
- }
3470
- if self.show_plot:
3471
- print('----------------------------')
3472
- print(messages)
3473
- print('----------------------------')
3474
-
3475
- fig = plt.figure(1)
3476
- plt.plot(dft.bench_prod_exp.values, label = 'benchmark', color = 'steelblue')
3477
- plt.scatter(range(len(dft)),np.where(dft[low_signal] == 1,dft.bench_prod_exp.values,np.nan),color = 'red', label = 'signal')
3478
- plt.plot(dft.strat_prod_exp.values, label = 'strategy', color = 'darksalmon')
3479
- plt.xlabel("index")
3480
- plt.ylabel("comulative return")
3481
- plt.legend()
3482
- plt.title('strategy and cumulative returns based on signal strategy')
3483
- if self.show_plot:
3484
- plt.plot()
3485
-
3486
- if self.save_path:
3487
- result_json_name = f'signals_strategy_return_{feature_name}.json'
3488
- result_plot_name = f'signals_strategy_return_{feature_name}.png'
3489
-
3490
- plt.savefig(self.save_path+result_plot_name)
3491
-
3492
- with open(self.save_path+result_json_name, "w") as outfile:
3493
- json.dump(messages, outfile)
3494
-
3495
- if self.save_path and self.save_aws:
3496
-
3497
- upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = self.save_aws + result_json_name, input_path = self.save_path + result_json_name, aws_credentials = self.aws_credentials)
3498
- upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = self.save_aws + result_plot_name, input_path = self.save_path + result_plot_name, aws_credentials = self.aws_credentials)
3499
-
3500
- if not self.show_plot:
3501
- plt.close()
3502
-
3503
- del df1,df2,dft
3504
-
3505
- if self.return_fig:
3506
- return fig, messages
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: virgo-modules
3
- Version: 0.1.1
3
+ Version: 0.1.2
4
4
  Summary: data processing and statistical modeling using stock market data
5
5
  Home-page: https://github.com/miguelmayhem92/virgo_module
6
6
  Author: Miguel Mayhuire
File without changes
File without changes
File without changes