virgo-modules 0.0.74__py3-none-any.whl → 0.0.76__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of virgo-modules might be problematic. Click here for more details.

@@ -31,6 +31,18 @@ from pykalman import KalmanFilter
31
31
  from .aws_utils import upload_file_to_aws
32
32
 
33
33
  def calculate_cointegration(series_1, series_2):
34
+ '''
35
+ calculate cointegration score of two time series.
36
+
37
+ Parameters:
38
+ series_1 (pd.series): pandas series of the asset returns
39
+ series_2 (pd.series): pandas series of the asset returns
40
+
41
+ Returns:
42
+ coint_flag (int): cointegration flag, 1 or 0. 1 if p value and coint_t lower than 0.05 and critical value
43
+ hedge_value (float): hedge value
44
+ '''
45
+
34
46
  coint_flag = 0
35
47
  coint_res = coint(series_1, series_2)
36
48
  coint_t = coint_res[0]
@@ -44,8 +56,43 @@ def calculate_cointegration(series_1, series_2):
44
56
  return coint_flag, hedge_value
45
57
 
46
58
  class pair_finder():
59
+ """
60
+ class that is going assess two assets to evaluate whether both are cointegrated
61
+
62
+ Attributes
63
+ ----------
64
+ df : pd.DataFrame
65
+ dataframe of merged assets with spread score
66
+ asset_1 : str
67
+ asset to assess
68
+ asset_2 : str
69
+ secondary asset to assess
70
+
71
+ Methods
72
+ -------
73
+ produce_zscore(window=int, z_threshold=float, verbose=boolean):
74
+ producing z score from the spread. Also getting signals using window functions
75
+ plot_scores():
76
+ display plot of the time series and signals and other plot for pair signal strategy
77
+ evaluate_signal(days_list=list(),test_size=int, signal_position=int,threshold=float,verbose=boolean, plot=boolean):
78
+ evaluate the signal strategy using future returns
79
+ create_backtest_signal(days_strategy=int, test_size=int):
80
+ create back test of the strategy and get somo plot analysis
81
+ """
47
82
  def __init__(self, raw_data , asset_1 ,asset_2):
48
-
83
+ """
84
+ Initialize object, selecting just the two assets and getting the spread between both assets
85
+
86
+ Parameters
87
+ ----------
88
+ raw_data (pd.DataFrame): dataframe of all assets
89
+ asset_1 (str): asset to assess
90
+ asset_2 (str): secondary asset to assess
91
+
92
+ Returns
93
+ -------
94
+ None
95
+ """
49
96
  df = raw_data[[asset_1, asset_2]]
50
97
  coint_flag, hedge_ratio = calculate_cointegration(df[asset_1], df[asset_2])
51
98
  spread = df[asset_1] - (hedge_ratio * df[asset_2])
@@ -55,6 +102,19 @@ class pair_finder():
55
102
  self.asset_2 = asset_2
56
103
 
57
104
  def produce_zscore(self, window, z_threshold, verbose = False):
105
+ """
106
+ producing z score from the spread. Also getting signals using window functions
107
+
108
+ Parameters
109
+ ----------
110
+ window (int): window size
111
+ z_threshold (float): alpha and z threhold for the normalized feature
112
+ verbose (boolean): to print analysis
113
+
114
+ Returns
115
+ -------
116
+ None
117
+ """
58
118
  self.z_threshold = z_threshold
59
119
  spread_series = pd.Series(self.df.spread)
60
120
  mean = spread_series.rolling(center = False, window = window).mean()
@@ -74,7 +134,17 @@ class pair_finder():
74
134
  self.df['low_pair_signal'] = low_signal
75
135
 
76
136
  def plot_scores(self):
77
-
137
+ """
138
+ display plot of the time series and signals and other plot for pair signal strategy
139
+
140
+ Parameters
141
+ ----------
142
+ None
143
+
144
+ Returns
145
+ -------
146
+ None
147
+ """
78
148
  plt.axhline(y=0.0, color='grey', linestyle='--')
79
149
  plt.figure(1, figsize = (10, 4))
80
150
  plt.plot(self.df.spread.values)
@@ -104,7 +174,22 @@ class pair_finder():
104
174
  fig.show()
105
175
 
106
176
  def evaluate_signal(self, days_list,test_size, signal_position = False,threshold = 0.05,verbose = False, plot = False):
107
-
177
+ """
178
+ evaluate the signal strategy using future returns
179
+
180
+ Parameters
181
+ ----------
182
+ days_list (list): list of days future returns
183
+ test_size (int): teste data size, the remainng is taken as training data
184
+ signal_position (int): position of the signal to open position
185
+ threshold (float): alpha or z threshold of the normalized feature
186
+ verbose (boolean): if True, print results
187
+ plot (boolean): if true, display plots
188
+
189
+ Returns
190
+ -------
191
+ None
192
+ """
108
193
  df = self.df.sort_values('Date').iloc[0:-test_size,:].copy()
109
194
  returns_list = list()
110
195
 
@@ -206,6 +291,18 @@ class pair_finder():
206
291
  del df
207
292
 
208
293
  def create_backtest_signal(self,days_strategy, test_size):
294
+ """
295
+ create back test of the strategy and get somo plot analysis
296
+
297
+ Parameters
298
+ ----------
299
+ days_strategy (int): list of days future returns
300
+ test_size (int): teste data size, the remainng is taken as training data
301
+
302
+ Returns
303
+ -------
304
+ None
305
+ """
209
306
  asset_1 = self.asset_1
210
307
  df1 = self.df.iloc[-test_size:,:].copy()
211
308
  df2 = df1.copy()
@@ -273,7 +370,18 @@ class pair_finder():
273
370
  del df1,df2,dft
274
371
 
275
372
  def produce_big_dataset(data_frames, stocks_codes_, feature_list, limit = 500):
276
-
373
+ '''
374
+ combine multiple asset, taking a common schema
375
+
376
+ Parameters:
377
+ data_frames (pd.DataFrame): Base dataframe
378
+ stocks_codes_ (list): assets to select
379
+ feature_list (list): feature list
380
+ limit (int): number of observation per asset
381
+
382
+ Returns:
383
+ dataframe (pd.DataFrame): Base dataframe with extra data
384
+ '''
277
385
  feature_list_ = list()
278
386
  columns_vector = list(data_frames[stocks_codes_[-1]].columns )
279
387
  for feat in feature_list:
@@ -301,7 +409,19 @@ def produce_big_dataset(data_frames, stocks_codes_, feature_list, limit = 500):
301
409
  return dataframe
302
410
 
303
411
  def ranking(data, weighted_features, top = 5, window = 5):
304
-
412
+ '''
413
+ Create a ranking of assets given current signals and weighted average importance
414
+
415
+ Parameters:
416
+ data (pd.Dataframe): base data
417
+ weighted_features (dict): configuration dictionary
418
+ top (int): top n to get result
419
+ window (int): number of days to assess
420
+
421
+ Returns:
422
+ top_up (list): top roof signal asset
423
+ top_low (list): top botton signal asset
424
+ '''
305
425
  features = weighted_features.keys()
306
426
  up_columns = ['signal_up_' + x for x in features]
307
427
  low_columns = ['signal_low_' + x for x in features]
@@ -336,16 +456,23 @@ def ranking(data, weighted_features, top = 5, window = 5):
336
456
  return top_up, top_low
337
457
 
338
458
  def produce_dashboard(data, columns , ticket_list, show_plot = True, nrows = 150,save_name = False, save_path = False, save_aws = False, aws_credential = False):
339
- """
340
- data: pandas df
341
- columns: list
342
- ticket_list: list asset list
343
- nrows: int
344
- show_plot: bool
345
- save_path: str local path for saving e.g r'C:/path/to/the/file/'
346
- save_aws: str remote key in s3 bucket path e.g. 'path/to/file/'
347
- aws_credentials: dict
348
- """
459
+ '''
460
+ produce dashboard using signals and list of assets
461
+
462
+ Parameters:
463
+ data (pd.Dataframe): base data
464
+ columns (list): list of features or signals
465
+ ticket_list (list): list of assets
466
+ show_plot (boolean): if true, display plot
467
+ nrows (int): number of days back to display
468
+ save_name (str): dashboad name resulting file
469
+ save_path (str): local path for saving e.g r'C:/path/to/the/file/'
470
+ save_aws (str): remote key in s3 bucket path e.g. 'path/to/file/'
471
+ aws_credential (dict): aws credentials
472
+
473
+ Returns:
474
+ None
475
+ '''
349
476
  top = len(ticket_list)
350
477
  columns = ['history'] + columns
351
478
  subtitles = list()
@@ -395,7 +522,17 @@ def produce_dashboard(data, columns , ticket_list, show_plot = True, nrows = 150
395
522
 
396
523
 
397
524
  def rank_by_return(data, lag_days, top_n = 5):
398
-
525
+ '''
526
+ produce ranking by returns
527
+
528
+ Parameters:
529
+ data (pd.Dataframe): base data
530
+ lag_days (int): number of days to consider
531
+ top_n (int): top n results assets
532
+
533
+ Returns:
534
+ result (list): resulting assets top n most important
535
+ '''
399
536
  data = data.sort_values(['Ticket','Date'], ascending=[False,False]).reset_index(drop = True)
400
537
  data['first'] = data.sort_values(['Date'], ascending=[False]).groupby(['Ticket']).cumcount() + 1
401
538
  data = data[data['first'] <= lag_days]
@@ -416,18 +553,19 @@ def rank_by_return(data, lag_days, top_n = 5):
416
553
  return result
417
554
 
418
555
  def get_data(ticker_name:str, ticket_settings:dict, n_days:int = False, hmm_available: object = False, data_window:str = '5y') -> object:
419
- """
420
- this functions runs the stock_eda_panel
421
- it is shared between train model and predictions
422
- arguments:
423
- hmm_available: if the hmm is available, in prediction is required
424
- ticker_name: name of the asset
425
- ticket_settings: dictionary with all the parameters to compute features
426
- n_days: to set an arbitrary data size
427
-
428
- returns: stock eda panel
429
- """
430
-
556
+ '''
557
+ this functions runs the stock_eda_panel. It is shared between train model and predictions
558
+
559
+ Parameters:
560
+ ticker_name (str): name of the asset
561
+ ticket_settings (dict): dictionary with all the parameters to compute features
562
+ n_days (int): to set an arbitrary data size
563
+ hmm_available (obj): if the hmm is available, in prediction is required
564
+ data_window (str): window for the data extraction
565
+
566
+ Returns:
567
+ object_stock (obj): resulting object_stock object
568
+ '''
431
569
  object_stock = stock_eda_panel(ticker_name , n_days, data_window)
432
570
  object_stock.get_data()
433
571
 
@@ -524,6 +662,14 @@ def get_data(ticker_name:str, ticket_settings:dict, n_days:int = False, hmm_avai
524
662
  trends = {'adjusted' : 0.001, 'smooth' : 0.0001}
525
663
 
526
664
  def apply_KF(self, trends):
665
+ '''
666
+ create kalman filter feature and attach it to the stock_eda_panel object
667
+
668
+ Parameters:
669
+ trends (dict): configurations of the kalman filter
670
+ Returns:
671
+ none
672
+ '''
527
673
  for ttrend in trends:
528
674
  tcov = trends.get(ttrend)
529
675
  kf = KalmanFilter(transition_matrices = [1],
@@ -538,7 +684,16 @@ def apply_KF(self, trends):
538
684
  stock_eda_panel.apply_KF = apply_KF
539
685
 
540
686
  def call_ml_objects(stock_code, client, call_models = False):
541
-
687
+ '''
688
+ call artifcats from mlflow
689
+
690
+ Parameters:
691
+ stock_code (str): asset name
692
+ client (obj): mlflow client
693
+ call_models (boolean): if true, call ml artifacts
694
+ Returns:
695
+ objects (dict): that contains ml artifacts, data , configs and models
696
+ '''
542
697
  objects = dict()
543
698
 
544
699
  registered_model_name = f'{stock_code}_models'
@@ -584,17 +739,57 @@ def call_ml_objects(stock_code, client, call_models = False):
584
739
  return objects
585
740
 
586
741
  class produce_plotly_plots:
742
+ """
743
+ class that helps to produce different dashboards
744
+
745
+ Attributes
746
+ ----------
747
+ ticket_name : str
748
+ asset name
749
+ data_frame (pd.DataFrame): asset data
750
+ settings : dict
751
+ asset configurations
752
+ show_plot : boolean
753
+ if true, display plots
754
+ save_path : str
755
+ local path for saving e.g r'C:/path/to/the/file/'
756
+ save_aws : str
757
+ remote key in s3 bucket path e.g. 'path/to/file/'
758
+ aws_credentials : dict
759
+ aws credentials
760
+ return_figs : boolean
761
+ if true, methods will return objects
762
+
763
+ Methods
764
+ -------
765
+ plot_asset_signals(feature_list=list, spread_column=list, date_intervals=list):
766
+ Display signals and hmm states over closing prices and feature time series
767
+ explore_states_ts():
768
+ display scaled time series of every hmm state
769
+ plot_hmm_analysis(settings=dict, t_matrix=txt, model=obj):
770
+ display plots that analyse hmm states
771
+ produce_forecasting_plot(predictions=pd.DataFrame):
772
+ display forecasting plots
773
+ """
587
774
  def __init__(self,ticket_name, data_frame,settings, save_path = False, save_aws = False, show_plot= True, aws_credentials = False, return_figs = False):
588
775
  """
589
- ticket_name: str asset name
590
- data_frame: pandas df
591
- settings: dict
592
- show_plot: bool
593
- save_path: str local path for saving e.g r'C:/path/to/the/file/'
594
- save_aws: str remote key in s3 bucket path e.g. 'path/to/file/'
595
- aws_credentials: dict
776
+ Initialize object
777
+
778
+ Parameters
779
+ ----------
780
+ ticket_name (str): asset name
781
+ data_frame (pd.DataFrame): asset data
782
+ settings (dict): asset configurations
783
+ show_plot (boolean): if true, display plots
784
+ save_path (str): local path for saving e.g r'C:/path/to/the/file/'
785
+ save_aws (str): remote key in s3 bucket path e.g. 'path/to/file/'
786
+ aws_credentials (dict): aws credentials
787
+ return_figs (boolean): if true, methods will return objects
788
+
789
+ Returns
790
+ -------
791
+ None
596
792
  """
597
-
598
793
  self.ticket_name = ticket_name
599
794
  self.data_frame = data_frame
600
795
  self.settings = settings
@@ -605,7 +800,19 @@ class produce_plotly_plots:
605
800
  self.return_figs = return_figs
606
801
 
607
802
  def plot_asset_signals(self, feature_list,spread_column, date_intervals = False):
608
-
803
+ """
804
+ Display signals and hmm states over closing prices and feature time series
805
+
806
+ Parameters
807
+ ----------
808
+ feature_list (list): signal list
809
+ spread_column (list): moving average list
810
+ date_intervals (list): list of tuples of dates, e.g [('2022-01-01','2023-01-01'),('2022-01-01','2023-01-01')]
811
+
812
+ Returns
813
+ -------
814
+ fig (obj): plotly dashboard
815
+ """
609
816
  result_json_name = 'panel_signals.json'
610
817
  df = self.data_frame
611
818
  ma1 = self.settings['settings'][spread_column]['ma1']
@@ -695,6 +902,17 @@ class produce_plotly_plots:
695
902
  return fig
696
903
 
697
904
  def explore_states_ts(self):
905
+ """
906
+ display scaled time series of every hmm state
907
+
908
+ Parameters
909
+ ----------
910
+ None
911
+
912
+ Returns
913
+ -------
914
+ fig (obj): plotly dashboard
915
+ """
698
916
  result_json_name = 'ts_hmm.json'
699
917
  df = self.data_frame
700
918
  hmm_n_clust = self.settings['settings']['hmm']['n_clusters']
@@ -743,6 +961,20 @@ class produce_plotly_plots:
743
961
  return fig
744
962
 
745
963
  def plot_hmm_analysis(self,settings, t_matrix, model = False):
964
+ """
965
+ display plots that analyse hmm states
966
+
967
+ Parameters
968
+ ----------
969
+ settings (dict): asset configurations
970
+ t_matrix (txt): asset state transition matrix
971
+ model(obj): hmm model
972
+
973
+ Returns
974
+ -------
975
+ fig (obj): plotly dashboard
976
+ messages (dict): hmm model metrics
977
+ """
746
978
  result_json_name = 'hmm_analysis.json'
747
979
  df = self.data_frame
748
980
  hmm_n_clust = self.settings['settings']['hmm']['n_clusters']
@@ -864,6 +1096,17 @@ class produce_plotly_plots:
864
1096
  if self.return_figs:
865
1097
  return fig, messages
866
1098
  def produce_forecasting_plot(self,predictions):
1099
+ """
1100
+ display forecasting plots
1101
+
1102
+ Parameters
1103
+ ----------
1104
+ predictions (pd.DataFrame): asset predictions
1105
+
1106
+ Returns
1107
+ -------
1108
+ None
1109
+ """
867
1110
  result_json_name = 'forecast_plot.json'
868
1111
  hmm_n_clust = self.settings['settings']['hmm']['n_clusters']
869
1112
  model_type = self.settings.get('model_type',False)
@@ -936,7 +1179,18 @@ class produce_plotly_plots:
936
1179
  upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = self.save_aws + result_json_name, input_path = self.save_path + result_json_name, aws_credentials = self.aws_credentials)
937
1180
 
938
1181
  def plot_hmm_analysis_logger(data_frame,test_data_size, save_path = False, show_plot = True):
939
-
1182
+ '''
1183
+ display box plots train and test of hmm state returns
1184
+
1185
+ Parameters:
1186
+ data_frame (pd.DataFrame): asset data
1187
+ test_data_size (int): test data size, the remaining is training data
1188
+ save_path (str): path/to/save/
1189
+ show_plot (boolean): if true, display plot
1190
+
1191
+ Returns:
1192
+ None
1193
+ '''
940
1194
  df = data_frame
941
1195
  df_ = df[['Date','hmm_feature','Close',"chain_return"]].sort_values('Date')
942
1196
  fig, axs = plt.subplots(1,2,figsize=(10,4))
@@ -950,7 +1204,18 @@ def plot_hmm_analysis_logger(data_frame,test_data_size, save_path = False, show_
950
1204
  plt.close()
951
1205
 
952
1206
  def plot_hmm_tsanalysis_logger(data_frame, test_data_size,save_path = False, show_plot = True):
953
-
1207
+ '''
1208
+ display time series hmm state analisys
1209
+
1210
+ Parameters:
1211
+ data_frame (pd.DataFrame): asset data
1212
+ test_data_size (int): test data size, the remaining is training data
1213
+ save_path (str): path/to/save/
1214
+ show_plot (boolean): if true, display plot
1215
+
1216
+ Returns:
1217
+ None
1218
+ '''
954
1219
  df = data_frame
955
1220
  df_ = df[['Date','hmm_feature','Close',"chain_return"]].sort_values('Date')
956
1221
  states = list(df_['hmm_feature'].unique())
@@ -977,7 +1242,20 @@ def plot_hmm_tsanalysis_logger(data_frame, test_data_size,save_path = False, sho
977
1242
  plt.close()
978
1243
 
979
1244
  def extract_data_traintest(object_stock,features_to_search,configs, target_configs, window_analysis = False, drop_nan= True):
980
-
1245
+ '''
1246
+ code snippet that execute object_stock or stock_eda_panel to get features
1247
+
1248
+ Parameters:
1249
+ object_stock (object): stock_eda_panel object
1250
+ features_to_search (list): list of features
1251
+ configs (dict): asset configurations
1252
+ target_configs (dict): target configurations
1253
+ window_analysis (int): take a sample size data
1254
+ drop_nan (boolean): remove nans from the data
1255
+
1256
+ Returns:
1257
+ object_stock (obj): object_stock with features and signals
1258
+ '''
981
1259
  object_stock.get_data()
982
1260
  object_stock.volatility_analysis(**configs['volatility']['config_params'], plot = False, save_features = False)
983
1261
  target_params_up = target_configs['params_up']
@@ -1003,7 +1281,19 @@ def extract_data_traintest(object_stock,features_to_search,configs, target_confi
1003
1281
  return object_stock
1004
1282
 
1005
1283
  def produce_simple_ts_from_model(stock_code, configs, n_days = 2000 , window_scope = '5y'):
1006
-
1284
+ '''
1285
+ display dashboard analysis of a given asset
1286
+
1287
+ Parameters:
1288
+ stock_code (str): asset name
1289
+ configs (dict): asset configurations
1290
+ n_days (int): data size
1291
+ window_scope (str): window data size
1292
+
1293
+ Returns:
1294
+ fig (obj): plotly dashboard
1295
+ df (pd.DataFrame): result asset dataset
1296
+ '''
1007
1297
  ## getting data
1008
1298
  volat_args = {'lags': 3, 'trad_days': 15, 'window_log_return': 10}
1009
1299
 
@@ -1062,17 +1352,21 @@ def produce_simple_ts_from_model(stock_code, configs, n_days = 2000 , window_sco
1062
1352
  return fig, df
1063
1353
 
1064
1354
  def save_edge_model(data, save_path = False, save_aws = False, show_result = False, aws_credentials = False):
1065
- """
1066
- data: pandas df
1067
- model_name: str
1068
- ticket_name: str name of the asset
1069
- save_path: str local path for saving e.g r'C:/path/to/the/file/'
1070
- save_aws: str remote key in s3 bucket path e.g. 'path/to/file/'
1071
- show_results: bool
1072
- aws_credentials: dict
1073
-
1074
- return a print of the dictionary
1075
- """
1355
+ '''
1356
+ get latest edge execution and edge probability
1357
+
1358
+ Parameters:
1359
+ data (pd.DataFrame): asset data
1360
+ model_name (str): model name
1361
+ ticket_name (str): name of the asset
1362
+ save_path (str): local path for saving e.g r'C:/path/to/the/file/'
1363
+ save_aws (str): remote key in s3 bucket path e.g. 'path/to/file/'
1364
+ show_results (bool): if true, display results
1365
+ aws_credentials (dict): aws credentials
1366
+
1367
+ Returns:
1368
+ None
1369
+ '''
1076
1370
  today = datetime.datetime.today().strftime('%Y-%m-%d')
1077
1371
 
1078
1372
  curent_edge = (
@@ -1096,7 +1390,19 @@ def save_edge_model(data, save_path = False, save_aws = False, show_result = Fal
1096
1390
  print(curent_edge)
1097
1391
 
1098
1392
  def create_feature_edge(model, data,feature_name, threshold, target_variables):
1099
-
1393
+ '''
1394
+ get latest edge execution and edge probability
1395
+
1396
+ Parameters:
1397
+ model (obj): edge model artifact
1398
+ data (pd.DataFrame): asset data
1399
+ feature_name (str): edge feature name
1400
+ threshold (float): edge threshold
1401
+ target_variables (list): names of the target columns
1402
+
1403
+ Returns:
1404
+ result_df (pd.DataFrame): result dataframe with edges
1405
+ '''
1100
1406
  label_prediction = ['proba_'+x for x in target_variables]
1101
1407
  predictions = model.predict_proba(data)
1102
1408
  predictions = pd.DataFrame(predictions, columns = label_prediction, index = data.index)