virgo-modules 0.0.74__py3-none-any.whl → 0.0.76__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of virgo-modules might be problematic. Click here for more details.
- virgo_modules/src/aws_utils.py +34 -2
- virgo_modules/src/edge_utils.py +200 -5
- virgo_modules/src/re_utils.py +360 -54
- virgo_modules/src/ticketer_source.py +1419 -257
- {virgo_modules-0.0.74.dist-info → virgo_modules-0.0.76.dist-info}/METADATA +18 -20
- virgo_modules-0.0.76.dist-info/RECORD +12 -0
- {virgo_modules-0.0.74.dist-info → virgo_modules-0.0.76.dist-info}/WHEEL +1 -1
- virgo_modules-0.0.74.dist-info/RECORD +0 -12
- {virgo_modules-0.0.74.dist-info → virgo_modules-0.0.76.dist-info}/LICENSE +0 -0
- {virgo_modules-0.0.74.dist-info → virgo_modules-0.0.76.dist-info}/top_level.txt +0 -0
virgo_modules/src/re_utils.py
CHANGED
|
@@ -31,6 +31,18 @@ from pykalman import KalmanFilter
|
|
|
31
31
|
from .aws_utils import upload_file_to_aws
|
|
32
32
|
|
|
33
33
|
def calculate_cointegration(series_1, series_2):
|
|
34
|
+
'''
|
|
35
|
+
calculate cointegration score of two time series.
|
|
36
|
+
|
|
37
|
+
Parameters:
|
|
38
|
+
series_1 (pd.series): pandas series of the asset returns
|
|
39
|
+
series_2 (pd.series): pandas series of the asset returns
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
coint_flag (int): cointegration flag, 1 or 0. 1 if p value and coint_t lower than 0.05 and critical value
|
|
43
|
+
hedge_value (float): hedge value
|
|
44
|
+
'''
|
|
45
|
+
|
|
34
46
|
coint_flag = 0
|
|
35
47
|
coint_res = coint(series_1, series_2)
|
|
36
48
|
coint_t = coint_res[0]
|
|
@@ -44,8 +56,43 @@ def calculate_cointegration(series_1, series_2):
|
|
|
44
56
|
return coint_flag, hedge_value
|
|
45
57
|
|
|
46
58
|
class pair_finder():
|
|
59
|
+
"""
|
|
60
|
+
class that is going assess two assets to evaluate whether both are cointegrated
|
|
61
|
+
|
|
62
|
+
Attributes
|
|
63
|
+
----------
|
|
64
|
+
df : pd.DataFrame
|
|
65
|
+
dataframe of merged assets with spread score
|
|
66
|
+
asset_1 : str
|
|
67
|
+
asset to assess
|
|
68
|
+
asset_2 : str
|
|
69
|
+
secondary asset to assess
|
|
70
|
+
|
|
71
|
+
Methods
|
|
72
|
+
-------
|
|
73
|
+
produce_zscore(window=int, z_threshold=float, verbose=boolean):
|
|
74
|
+
producing z score from the spread. Also getting signals using window functions
|
|
75
|
+
plot_scores():
|
|
76
|
+
display plot of the time series and signals and other plot for pair signal strategy
|
|
77
|
+
evaluate_signal(days_list=list(),test_size=int, signal_position=int,threshold=float,verbose=boolean, plot=boolean):
|
|
78
|
+
evaluate the signal strategy using future returns
|
|
79
|
+
create_backtest_signal(days_strategy=int, test_size=int):
|
|
80
|
+
create back test of the strategy and get somo plot analysis
|
|
81
|
+
"""
|
|
47
82
|
def __init__(self, raw_data , asset_1 ,asset_2):
|
|
48
|
-
|
|
83
|
+
"""
|
|
84
|
+
Initialize object, selecting just the two assets and getting the spread between both assets
|
|
85
|
+
|
|
86
|
+
Parameters
|
|
87
|
+
----------
|
|
88
|
+
raw_data (pd.DataFrame): dataframe of all assets
|
|
89
|
+
asset_1 (str): asset to assess
|
|
90
|
+
asset_2 (str): secondary asset to assess
|
|
91
|
+
|
|
92
|
+
Returns
|
|
93
|
+
-------
|
|
94
|
+
None
|
|
95
|
+
"""
|
|
49
96
|
df = raw_data[[asset_1, asset_2]]
|
|
50
97
|
coint_flag, hedge_ratio = calculate_cointegration(df[asset_1], df[asset_2])
|
|
51
98
|
spread = df[asset_1] - (hedge_ratio * df[asset_2])
|
|
@@ -55,6 +102,19 @@ class pair_finder():
|
|
|
55
102
|
self.asset_2 = asset_2
|
|
56
103
|
|
|
57
104
|
def produce_zscore(self, window, z_threshold, verbose = False):
|
|
105
|
+
"""
|
|
106
|
+
producing z score from the spread. Also getting signals using window functions
|
|
107
|
+
|
|
108
|
+
Parameters
|
|
109
|
+
----------
|
|
110
|
+
window (int): window size
|
|
111
|
+
z_threshold (float): alpha and z threhold for the normalized feature
|
|
112
|
+
verbose (boolean): to print analysis
|
|
113
|
+
|
|
114
|
+
Returns
|
|
115
|
+
-------
|
|
116
|
+
None
|
|
117
|
+
"""
|
|
58
118
|
self.z_threshold = z_threshold
|
|
59
119
|
spread_series = pd.Series(self.df.spread)
|
|
60
120
|
mean = spread_series.rolling(center = False, window = window).mean()
|
|
@@ -74,7 +134,17 @@ class pair_finder():
|
|
|
74
134
|
self.df['low_pair_signal'] = low_signal
|
|
75
135
|
|
|
76
136
|
def plot_scores(self):
|
|
77
|
-
|
|
137
|
+
"""
|
|
138
|
+
display plot of the time series and signals and other plot for pair signal strategy
|
|
139
|
+
|
|
140
|
+
Parameters
|
|
141
|
+
----------
|
|
142
|
+
None
|
|
143
|
+
|
|
144
|
+
Returns
|
|
145
|
+
-------
|
|
146
|
+
None
|
|
147
|
+
"""
|
|
78
148
|
plt.axhline(y=0.0, color='grey', linestyle='--')
|
|
79
149
|
plt.figure(1, figsize = (10, 4))
|
|
80
150
|
plt.plot(self.df.spread.values)
|
|
@@ -104,7 +174,22 @@ class pair_finder():
|
|
|
104
174
|
fig.show()
|
|
105
175
|
|
|
106
176
|
def evaluate_signal(self, days_list,test_size, signal_position = False,threshold = 0.05,verbose = False, plot = False):
|
|
107
|
-
|
|
177
|
+
"""
|
|
178
|
+
evaluate the signal strategy using future returns
|
|
179
|
+
|
|
180
|
+
Parameters
|
|
181
|
+
----------
|
|
182
|
+
days_list (list): list of days future returns
|
|
183
|
+
test_size (int): teste data size, the remainng is taken as training data
|
|
184
|
+
signal_position (int): position of the signal to open position
|
|
185
|
+
threshold (float): alpha or z threshold of the normalized feature
|
|
186
|
+
verbose (boolean): if True, print results
|
|
187
|
+
plot (boolean): if true, display plots
|
|
188
|
+
|
|
189
|
+
Returns
|
|
190
|
+
-------
|
|
191
|
+
None
|
|
192
|
+
"""
|
|
108
193
|
df = self.df.sort_values('Date').iloc[0:-test_size,:].copy()
|
|
109
194
|
returns_list = list()
|
|
110
195
|
|
|
@@ -206,6 +291,18 @@ class pair_finder():
|
|
|
206
291
|
del df
|
|
207
292
|
|
|
208
293
|
def create_backtest_signal(self,days_strategy, test_size):
|
|
294
|
+
"""
|
|
295
|
+
create back test of the strategy and get somo plot analysis
|
|
296
|
+
|
|
297
|
+
Parameters
|
|
298
|
+
----------
|
|
299
|
+
days_strategy (int): list of days future returns
|
|
300
|
+
test_size (int): teste data size, the remainng is taken as training data
|
|
301
|
+
|
|
302
|
+
Returns
|
|
303
|
+
-------
|
|
304
|
+
None
|
|
305
|
+
"""
|
|
209
306
|
asset_1 = self.asset_1
|
|
210
307
|
df1 = self.df.iloc[-test_size:,:].copy()
|
|
211
308
|
df2 = df1.copy()
|
|
@@ -273,7 +370,18 @@ class pair_finder():
|
|
|
273
370
|
del df1,df2,dft
|
|
274
371
|
|
|
275
372
|
def produce_big_dataset(data_frames, stocks_codes_, feature_list, limit = 500):
|
|
276
|
-
|
|
373
|
+
'''
|
|
374
|
+
combine multiple asset, taking a common schema
|
|
375
|
+
|
|
376
|
+
Parameters:
|
|
377
|
+
data_frames (pd.DataFrame): Base dataframe
|
|
378
|
+
stocks_codes_ (list): assets to select
|
|
379
|
+
feature_list (list): feature list
|
|
380
|
+
limit (int): number of observation per asset
|
|
381
|
+
|
|
382
|
+
Returns:
|
|
383
|
+
dataframe (pd.DataFrame): Base dataframe with extra data
|
|
384
|
+
'''
|
|
277
385
|
feature_list_ = list()
|
|
278
386
|
columns_vector = list(data_frames[stocks_codes_[-1]].columns )
|
|
279
387
|
for feat in feature_list:
|
|
@@ -301,7 +409,19 @@ def produce_big_dataset(data_frames, stocks_codes_, feature_list, limit = 500):
|
|
|
301
409
|
return dataframe
|
|
302
410
|
|
|
303
411
|
def ranking(data, weighted_features, top = 5, window = 5):
|
|
304
|
-
|
|
412
|
+
'''
|
|
413
|
+
Create a ranking of assets given current signals and weighted average importance
|
|
414
|
+
|
|
415
|
+
Parameters:
|
|
416
|
+
data (pd.Dataframe): base data
|
|
417
|
+
weighted_features (dict): configuration dictionary
|
|
418
|
+
top (int): top n to get result
|
|
419
|
+
window (int): number of days to assess
|
|
420
|
+
|
|
421
|
+
Returns:
|
|
422
|
+
top_up (list): top roof signal asset
|
|
423
|
+
top_low (list): top botton signal asset
|
|
424
|
+
'''
|
|
305
425
|
features = weighted_features.keys()
|
|
306
426
|
up_columns = ['signal_up_' + x for x in features]
|
|
307
427
|
low_columns = ['signal_low_' + x for x in features]
|
|
@@ -336,16 +456,23 @@ def ranking(data, weighted_features, top = 5, window = 5):
|
|
|
336
456
|
return top_up, top_low
|
|
337
457
|
|
|
338
458
|
def produce_dashboard(data, columns , ticket_list, show_plot = True, nrows = 150,save_name = False, save_path = False, save_aws = False, aws_credential = False):
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
459
|
+
'''
|
|
460
|
+
produce dashboard using signals and list of assets
|
|
461
|
+
|
|
462
|
+
Parameters:
|
|
463
|
+
data (pd.Dataframe): base data
|
|
464
|
+
columns (list): list of features or signals
|
|
465
|
+
ticket_list (list): list of assets
|
|
466
|
+
show_plot (boolean): if true, display plot
|
|
467
|
+
nrows (int): number of days back to display
|
|
468
|
+
save_name (str): dashboad name resulting file
|
|
469
|
+
save_path (str): local path for saving e.g r'C:/path/to/the/file/'
|
|
470
|
+
save_aws (str): remote key in s3 bucket path e.g. 'path/to/file/'
|
|
471
|
+
aws_credential (dict): aws credentials
|
|
472
|
+
|
|
473
|
+
Returns:
|
|
474
|
+
None
|
|
475
|
+
'''
|
|
349
476
|
top = len(ticket_list)
|
|
350
477
|
columns = ['history'] + columns
|
|
351
478
|
subtitles = list()
|
|
@@ -395,7 +522,17 @@ def produce_dashboard(data, columns , ticket_list, show_plot = True, nrows = 150
|
|
|
395
522
|
|
|
396
523
|
|
|
397
524
|
def rank_by_return(data, lag_days, top_n = 5):
|
|
398
|
-
|
|
525
|
+
'''
|
|
526
|
+
produce ranking by returns
|
|
527
|
+
|
|
528
|
+
Parameters:
|
|
529
|
+
data (pd.Dataframe): base data
|
|
530
|
+
lag_days (int): number of days to consider
|
|
531
|
+
top_n (int): top n results assets
|
|
532
|
+
|
|
533
|
+
Returns:
|
|
534
|
+
result (list): resulting assets top n most important
|
|
535
|
+
'''
|
|
399
536
|
data = data.sort_values(['Ticket','Date'], ascending=[False,False]).reset_index(drop = True)
|
|
400
537
|
data['first'] = data.sort_values(['Date'], ascending=[False]).groupby(['Ticket']).cumcount() + 1
|
|
401
538
|
data = data[data['first'] <= lag_days]
|
|
@@ -416,18 +553,19 @@ def rank_by_return(data, lag_days, top_n = 5):
|
|
|
416
553
|
return result
|
|
417
554
|
|
|
418
555
|
def get_data(ticker_name:str, ticket_settings:dict, n_days:int = False, hmm_available: object = False, data_window:str = '5y') -> object:
|
|
419
|
-
|
|
420
|
-
this functions runs the stock_eda_panel
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
556
|
+
'''
|
|
557
|
+
this functions runs the stock_eda_panel. It is shared between train model and predictions
|
|
558
|
+
|
|
559
|
+
Parameters:
|
|
560
|
+
ticker_name (str): name of the asset
|
|
561
|
+
ticket_settings (dict): dictionary with all the parameters to compute features
|
|
562
|
+
n_days (int): to set an arbitrary data size
|
|
563
|
+
hmm_available (obj): if the hmm is available, in prediction is required
|
|
564
|
+
data_window (str): window for the data extraction
|
|
565
|
+
|
|
566
|
+
Returns:
|
|
567
|
+
object_stock (obj): resulting object_stock object
|
|
568
|
+
'''
|
|
431
569
|
object_stock = stock_eda_panel(ticker_name , n_days, data_window)
|
|
432
570
|
object_stock.get_data()
|
|
433
571
|
|
|
@@ -524,6 +662,14 @@ def get_data(ticker_name:str, ticket_settings:dict, n_days:int = False, hmm_avai
|
|
|
524
662
|
trends = {'adjusted' : 0.001, 'smooth' : 0.0001}
|
|
525
663
|
|
|
526
664
|
def apply_KF(self, trends):
|
|
665
|
+
'''
|
|
666
|
+
create kalman filter feature and attach it to the stock_eda_panel object
|
|
667
|
+
|
|
668
|
+
Parameters:
|
|
669
|
+
trends (dict): configurations of the kalman filter
|
|
670
|
+
Returns:
|
|
671
|
+
none
|
|
672
|
+
'''
|
|
527
673
|
for ttrend in trends:
|
|
528
674
|
tcov = trends.get(ttrend)
|
|
529
675
|
kf = KalmanFilter(transition_matrices = [1],
|
|
@@ -538,7 +684,16 @@ def apply_KF(self, trends):
|
|
|
538
684
|
stock_eda_panel.apply_KF = apply_KF
|
|
539
685
|
|
|
540
686
|
def call_ml_objects(stock_code, client, call_models = False):
|
|
541
|
-
|
|
687
|
+
'''
|
|
688
|
+
call artifcats from mlflow
|
|
689
|
+
|
|
690
|
+
Parameters:
|
|
691
|
+
stock_code (str): asset name
|
|
692
|
+
client (obj): mlflow client
|
|
693
|
+
call_models (boolean): if true, call ml artifacts
|
|
694
|
+
Returns:
|
|
695
|
+
objects (dict): that contains ml artifacts, data , configs and models
|
|
696
|
+
'''
|
|
542
697
|
objects = dict()
|
|
543
698
|
|
|
544
699
|
registered_model_name = f'{stock_code}_models'
|
|
@@ -584,17 +739,57 @@ def call_ml_objects(stock_code, client, call_models = False):
|
|
|
584
739
|
return objects
|
|
585
740
|
|
|
586
741
|
class produce_plotly_plots:
|
|
742
|
+
"""
|
|
743
|
+
class that helps to produce different dashboards
|
|
744
|
+
|
|
745
|
+
Attributes
|
|
746
|
+
----------
|
|
747
|
+
ticket_name : str
|
|
748
|
+
asset name
|
|
749
|
+
data_frame (pd.DataFrame): asset data
|
|
750
|
+
settings : dict
|
|
751
|
+
asset configurations
|
|
752
|
+
show_plot : boolean
|
|
753
|
+
if true, display plots
|
|
754
|
+
save_path : str
|
|
755
|
+
local path for saving e.g r'C:/path/to/the/file/'
|
|
756
|
+
save_aws : str
|
|
757
|
+
remote key in s3 bucket path e.g. 'path/to/file/'
|
|
758
|
+
aws_credentials : dict
|
|
759
|
+
aws credentials
|
|
760
|
+
return_figs : boolean
|
|
761
|
+
if true, methods will return objects
|
|
762
|
+
|
|
763
|
+
Methods
|
|
764
|
+
-------
|
|
765
|
+
plot_asset_signals(feature_list=list, spread_column=list, date_intervals=list):
|
|
766
|
+
Display signals and hmm states over closing prices and feature time series
|
|
767
|
+
explore_states_ts():
|
|
768
|
+
display scaled time series of every hmm state
|
|
769
|
+
plot_hmm_analysis(settings=dict, t_matrix=txt, model=obj):
|
|
770
|
+
display plots that analyse hmm states
|
|
771
|
+
produce_forecasting_plot(predictions=pd.DataFrame):
|
|
772
|
+
display forecasting plots
|
|
773
|
+
"""
|
|
587
774
|
def __init__(self,ticket_name, data_frame,settings, save_path = False, save_aws = False, show_plot= True, aws_credentials = False, return_figs = False):
|
|
588
775
|
"""
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
776
|
+
Initialize object
|
|
777
|
+
|
|
778
|
+
Parameters
|
|
779
|
+
----------
|
|
780
|
+
ticket_name (str): asset name
|
|
781
|
+
data_frame (pd.DataFrame): asset data
|
|
782
|
+
settings (dict): asset configurations
|
|
783
|
+
show_plot (boolean): if true, display plots
|
|
784
|
+
save_path (str): local path for saving e.g r'C:/path/to/the/file/'
|
|
785
|
+
save_aws (str): remote key in s3 bucket path e.g. 'path/to/file/'
|
|
786
|
+
aws_credentials (dict): aws credentials
|
|
787
|
+
return_figs (boolean): if true, methods will return objects
|
|
788
|
+
|
|
789
|
+
Returns
|
|
790
|
+
-------
|
|
791
|
+
None
|
|
596
792
|
"""
|
|
597
|
-
|
|
598
793
|
self.ticket_name = ticket_name
|
|
599
794
|
self.data_frame = data_frame
|
|
600
795
|
self.settings = settings
|
|
@@ -605,7 +800,19 @@ class produce_plotly_plots:
|
|
|
605
800
|
self.return_figs = return_figs
|
|
606
801
|
|
|
607
802
|
def plot_asset_signals(self, feature_list,spread_column, date_intervals = False):
|
|
608
|
-
|
|
803
|
+
"""
|
|
804
|
+
Display signals and hmm states over closing prices and feature time series
|
|
805
|
+
|
|
806
|
+
Parameters
|
|
807
|
+
----------
|
|
808
|
+
feature_list (list): signal list
|
|
809
|
+
spread_column (list): moving average list
|
|
810
|
+
date_intervals (list): list of tuples of dates, e.g [('2022-01-01','2023-01-01'),('2022-01-01','2023-01-01')]
|
|
811
|
+
|
|
812
|
+
Returns
|
|
813
|
+
-------
|
|
814
|
+
fig (obj): plotly dashboard
|
|
815
|
+
"""
|
|
609
816
|
result_json_name = 'panel_signals.json'
|
|
610
817
|
df = self.data_frame
|
|
611
818
|
ma1 = self.settings['settings'][spread_column]['ma1']
|
|
@@ -695,6 +902,17 @@ class produce_plotly_plots:
|
|
|
695
902
|
return fig
|
|
696
903
|
|
|
697
904
|
def explore_states_ts(self):
|
|
905
|
+
"""
|
|
906
|
+
display scaled time series of every hmm state
|
|
907
|
+
|
|
908
|
+
Parameters
|
|
909
|
+
----------
|
|
910
|
+
None
|
|
911
|
+
|
|
912
|
+
Returns
|
|
913
|
+
-------
|
|
914
|
+
fig (obj): plotly dashboard
|
|
915
|
+
"""
|
|
698
916
|
result_json_name = 'ts_hmm.json'
|
|
699
917
|
df = self.data_frame
|
|
700
918
|
hmm_n_clust = self.settings['settings']['hmm']['n_clusters']
|
|
@@ -743,6 +961,20 @@ class produce_plotly_plots:
|
|
|
743
961
|
return fig
|
|
744
962
|
|
|
745
963
|
def plot_hmm_analysis(self,settings, t_matrix, model = False):
|
|
964
|
+
"""
|
|
965
|
+
display plots that analyse hmm states
|
|
966
|
+
|
|
967
|
+
Parameters
|
|
968
|
+
----------
|
|
969
|
+
settings (dict): asset configurations
|
|
970
|
+
t_matrix (txt): asset state transition matrix
|
|
971
|
+
model(obj): hmm model
|
|
972
|
+
|
|
973
|
+
Returns
|
|
974
|
+
-------
|
|
975
|
+
fig (obj): plotly dashboard
|
|
976
|
+
messages (dict): hmm model metrics
|
|
977
|
+
"""
|
|
746
978
|
result_json_name = 'hmm_analysis.json'
|
|
747
979
|
df = self.data_frame
|
|
748
980
|
hmm_n_clust = self.settings['settings']['hmm']['n_clusters']
|
|
@@ -864,6 +1096,17 @@ class produce_plotly_plots:
|
|
|
864
1096
|
if self.return_figs:
|
|
865
1097
|
return fig, messages
|
|
866
1098
|
def produce_forecasting_plot(self,predictions):
|
|
1099
|
+
"""
|
|
1100
|
+
display forecasting plots
|
|
1101
|
+
|
|
1102
|
+
Parameters
|
|
1103
|
+
----------
|
|
1104
|
+
predictions (pd.DataFrame): asset predictions
|
|
1105
|
+
|
|
1106
|
+
Returns
|
|
1107
|
+
-------
|
|
1108
|
+
None
|
|
1109
|
+
"""
|
|
867
1110
|
result_json_name = 'forecast_plot.json'
|
|
868
1111
|
hmm_n_clust = self.settings['settings']['hmm']['n_clusters']
|
|
869
1112
|
model_type = self.settings.get('model_type',False)
|
|
@@ -936,7 +1179,18 @@ class produce_plotly_plots:
|
|
|
936
1179
|
upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = self.save_aws + result_json_name, input_path = self.save_path + result_json_name, aws_credentials = self.aws_credentials)
|
|
937
1180
|
|
|
938
1181
|
def plot_hmm_analysis_logger(data_frame,test_data_size, save_path = False, show_plot = True):
|
|
939
|
-
|
|
1182
|
+
'''
|
|
1183
|
+
display box plots train and test of hmm state returns
|
|
1184
|
+
|
|
1185
|
+
Parameters:
|
|
1186
|
+
data_frame (pd.DataFrame): asset data
|
|
1187
|
+
test_data_size (int): test data size, the remaining is training data
|
|
1188
|
+
save_path (str): path/to/save/
|
|
1189
|
+
show_plot (boolean): if true, display plot
|
|
1190
|
+
|
|
1191
|
+
Returns:
|
|
1192
|
+
None
|
|
1193
|
+
'''
|
|
940
1194
|
df = data_frame
|
|
941
1195
|
df_ = df[['Date','hmm_feature','Close',"chain_return"]].sort_values('Date')
|
|
942
1196
|
fig, axs = plt.subplots(1,2,figsize=(10,4))
|
|
@@ -950,7 +1204,18 @@ def plot_hmm_analysis_logger(data_frame,test_data_size, save_path = False, show_
|
|
|
950
1204
|
plt.close()
|
|
951
1205
|
|
|
952
1206
|
def plot_hmm_tsanalysis_logger(data_frame, test_data_size,save_path = False, show_plot = True):
|
|
953
|
-
|
|
1207
|
+
'''
|
|
1208
|
+
display time series hmm state analisys
|
|
1209
|
+
|
|
1210
|
+
Parameters:
|
|
1211
|
+
data_frame (pd.DataFrame): asset data
|
|
1212
|
+
test_data_size (int): test data size, the remaining is training data
|
|
1213
|
+
save_path (str): path/to/save/
|
|
1214
|
+
show_plot (boolean): if true, display plot
|
|
1215
|
+
|
|
1216
|
+
Returns:
|
|
1217
|
+
None
|
|
1218
|
+
'''
|
|
954
1219
|
df = data_frame
|
|
955
1220
|
df_ = df[['Date','hmm_feature','Close',"chain_return"]].sort_values('Date')
|
|
956
1221
|
states = list(df_['hmm_feature'].unique())
|
|
@@ -977,7 +1242,20 @@ def plot_hmm_tsanalysis_logger(data_frame, test_data_size,save_path = False, sho
|
|
|
977
1242
|
plt.close()
|
|
978
1243
|
|
|
979
1244
|
def extract_data_traintest(object_stock,features_to_search,configs, target_configs, window_analysis = False, drop_nan= True):
|
|
980
|
-
|
|
1245
|
+
'''
|
|
1246
|
+
code snippet that execute object_stock or stock_eda_panel to get features
|
|
1247
|
+
|
|
1248
|
+
Parameters:
|
|
1249
|
+
object_stock (object): stock_eda_panel object
|
|
1250
|
+
features_to_search (list): list of features
|
|
1251
|
+
configs (dict): asset configurations
|
|
1252
|
+
target_configs (dict): target configurations
|
|
1253
|
+
window_analysis (int): take a sample size data
|
|
1254
|
+
drop_nan (boolean): remove nans from the data
|
|
1255
|
+
|
|
1256
|
+
Returns:
|
|
1257
|
+
object_stock (obj): object_stock with features and signals
|
|
1258
|
+
'''
|
|
981
1259
|
object_stock.get_data()
|
|
982
1260
|
object_stock.volatility_analysis(**configs['volatility']['config_params'], plot = False, save_features = False)
|
|
983
1261
|
target_params_up = target_configs['params_up']
|
|
@@ -1003,7 +1281,19 @@ def extract_data_traintest(object_stock,features_to_search,configs, target_confi
|
|
|
1003
1281
|
return object_stock
|
|
1004
1282
|
|
|
1005
1283
|
def produce_simple_ts_from_model(stock_code, configs, n_days = 2000 , window_scope = '5y'):
|
|
1006
|
-
|
|
1284
|
+
'''
|
|
1285
|
+
display dashboard analysis of a given asset
|
|
1286
|
+
|
|
1287
|
+
Parameters:
|
|
1288
|
+
stock_code (str): asset name
|
|
1289
|
+
configs (dict): asset configurations
|
|
1290
|
+
n_days (int): data size
|
|
1291
|
+
window_scope (str): window data size
|
|
1292
|
+
|
|
1293
|
+
Returns:
|
|
1294
|
+
fig (obj): plotly dashboard
|
|
1295
|
+
df (pd.DataFrame): result asset dataset
|
|
1296
|
+
'''
|
|
1007
1297
|
## getting data
|
|
1008
1298
|
volat_args = {'lags': 3, 'trad_days': 15, 'window_log_return': 10}
|
|
1009
1299
|
|
|
@@ -1062,17 +1352,21 @@ def produce_simple_ts_from_model(stock_code, configs, n_days = 2000 , window_sco
|
|
|
1062
1352
|
return fig, df
|
|
1063
1353
|
|
|
1064
1354
|
def save_edge_model(data, save_path = False, save_aws = False, show_result = False, aws_credentials = False):
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1355
|
+
'''
|
|
1356
|
+
get latest edge execution and edge probability
|
|
1357
|
+
|
|
1358
|
+
Parameters:
|
|
1359
|
+
data (pd.DataFrame): asset data
|
|
1360
|
+
model_name (str): model name
|
|
1361
|
+
ticket_name (str): name of the asset
|
|
1362
|
+
save_path (str): local path for saving e.g r'C:/path/to/the/file/'
|
|
1363
|
+
save_aws (str): remote key in s3 bucket path e.g. 'path/to/file/'
|
|
1364
|
+
show_results (bool): if true, display results
|
|
1365
|
+
aws_credentials (dict): aws credentials
|
|
1366
|
+
|
|
1367
|
+
Returns:
|
|
1368
|
+
None
|
|
1369
|
+
'''
|
|
1076
1370
|
today = datetime.datetime.today().strftime('%Y-%m-%d')
|
|
1077
1371
|
|
|
1078
1372
|
curent_edge = (
|
|
@@ -1096,7 +1390,19 @@ def save_edge_model(data, save_path = False, save_aws = False, show_result = Fal
|
|
|
1096
1390
|
print(curent_edge)
|
|
1097
1391
|
|
|
1098
1392
|
def create_feature_edge(model, data,feature_name, threshold, target_variables):
|
|
1099
|
-
|
|
1393
|
+
'''
|
|
1394
|
+
get latest edge execution and edge probability
|
|
1395
|
+
|
|
1396
|
+
Parameters:
|
|
1397
|
+
model (obj): edge model artifact
|
|
1398
|
+
data (pd.DataFrame): asset data
|
|
1399
|
+
feature_name (str): edge feature name
|
|
1400
|
+
threshold (float): edge threshold
|
|
1401
|
+
target_variables (list): names of the target columns
|
|
1402
|
+
|
|
1403
|
+
Returns:
|
|
1404
|
+
result_df (pd.DataFrame): result dataframe with edges
|
|
1405
|
+
'''
|
|
1100
1406
|
label_prediction = ['proba_'+x for x in target_variables]
|
|
1101
1407
|
predictions = model.predict_proba(data)
|
|
1102
1408
|
predictions = pd.DataFrame(predictions, columns = label_prediction, index = data.index)
|