virgo-modules 0.0.72__py3-none-any.whl → 0.8.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- virgo_modules/src/aws_utils.py +35 -3
- virgo_modules/src/backtester.py +474 -0
- virgo_modules/src/edge_utils/__init__.py +0 -0
- virgo_modules/src/edge_utils/conformal_utils.py +106 -0
- virgo_modules/src/edge_utils/edge_utils.py +502 -0
- virgo_modules/src/edge_utils/feature_selection.py +66 -0
- virgo_modules/src/edge_utils/shap_utils.py +54 -0
- virgo_modules/src/edge_utils/stack_model.py +94 -0
- virgo_modules/src/hmm_utils.py +494 -0
- virgo_modules/src/market/__init__.py +0 -0
- virgo_modules/src/market/market_tools.py +189 -0
- virgo_modules/src/re_utils.py +628 -85
- virgo_modules/src/ticketer_source.py +1278 -1066
- virgo_modules/src/transformer_utils.py +401 -0
- {virgo_modules-0.0.72.dist-info → virgo_modules-0.8.4.dist-info}/METADATA +16 -22
- virgo_modules-0.8.4.dist-info/RECORD +22 -0
- {virgo_modules-0.0.72.dist-info → virgo_modules-0.8.4.dist-info}/WHEEL +1 -1
- virgo_modules/src/edge_utils.py +0 -178
- virgo_modules-0.0.72.dist-info/RECORD +0 -12
- {virgo_modules-0.0.72.dist-info → virgo_modules-0.8.4.dist-info/licenses}/LICENSE +0 -0
- {virgo_modules-0.0.72.dist-info → virgo_modules-0.8.4.dist-info}/top_level.txt +0 -0
virgo_modules/src/re_utils.py
CHANGED
|
@@ -31,6 +31,18 @@ from pykalman import KalmanFilter
|
|
|
31
31
|
from .aws_utils import upload_file_to_aws
|
|
32
32
|
|
|
33
33
|
def calculate_cointegration(series_1, series_2):
|
|
34
|
+
'''
|
|
35
|
+
calculate cointegration score of two time series.
|
|
36
|
+
|
|
37
|
+
Parameters:
|
|
38
|
+
series_1 (pd.series): pandas series of the asset returns
|
|
39
|
+
series_2 (pd.series): pandas series of the asset returns
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
coint_flag (int): cointegration flag, 1 or 0. 1 if p value and coint_t lower than 0.05 and critical value
|
|
43
|
+
hedge_value (float): hedge value
|
|
44
|
+
'''
|
|
45
|
+
|
|
34
46
|
coint_flag = 0
|
|
35
47
|
coint_res = coint(series_1, series_2)
|
|
36
48
|
coint_t = coint_res[0]
|
|
@@ -44,8 +56,43 @@ def calculate_cointegration(series_1, series_2):
|
|
|
44
56
|
return coint_flag, hedge_value
|
|
45
57
|
|
|
46
58
|
class pair_finder():
|
|
59
|
+
"""
|
|
60
|
+
class that is going assess two assets to evaluate whether both are cointegrated
|
|
61
|
+
|
|
62
|
+
Attributes
|
|
63
|
+
----------
|
|
64
|
+
df : pd.DataFrame
|
|
65
|
+
dataframe of merged assets with spread score
|
|
66
|
+
asset_1 : str
|
|
67
|
+
asset to assess
|
|
68
|
+
asset_2 : str
|
|
69
|
+
secondary asset to assess
|
|
70
|
+
|
|
71
|
+
Methods
|
|
72
|
+
-------
|
|
73
|
+
produce_zscore(window=int, z_threshold=float, verbose=boolean):
|
|
74
|
+
producing z score from the spread. Also getting signals using window functions
|
|
75
|
+
plot_scores():
|
|
76
|
+
display plot of the time series and signals and other plot for pair signal strategy
|
|
77
|
+
evaluate_signal(days_list=list(),test_size=int, signal_position=int,threshold=float,verbose=boolean, plot=boolean):
|
|
78
|
+
evaluate the signal strategy using future returns
|
|
79
|
+
create_backtest_signal(days_strategy=int, test_size=int):
|
|
80
|
+
create back test of the strategy and get somo plot analysis
|
|
81
|
+
"""
|
|
47
82
|
def __init__(self, raw_data , asset_1 ,asset_2):
|
|
48
|
-
|
|
83
|
+
"""
|
|
84
|
+
Initialize object, selecting just the two assets and getting the spread between both assets
|
|
85
|
+
|
|
86
|
+
Parameters
|
|
87
|
+
----------
|
|
88
|
+
raw_data (pd.DataFrame): dataframe of all assets
|
|
89
|
+
asset_1 (str): asset to assess
|
|
90
|
+
asset_2 (str): secondary asset to assess
|
|
91
|
+
|
|
92
|
+
Returns
|
|
93
|
+
-------
|
|
94
|
+
None
|
|
95
|
+
"""
|
|
49
96
|
df = raw_data[[asset_1, asset_2]]
|
|
50
97
|
coint_flag, hedge_ratio = calculate_cointegration(df[asset_1], df[asset_2])
|
|
51
98
|
spread = df[asset_1] - (hedge_ratio * df[asset_2])
|
|
@@ -55,6 +102,19 @@ class pair_finder():
|
|
|
55
102
|
self.asset_2 = asset_2
|
|
56
103
|
|
|
57
104
|
def produce_zscore(self, window, z_threshold, verbose = False):
|
|
105
|
+
"""
|
|
106
|
+
producing z score from the spread. Also getting signals using window functions
|
|
107
|
+
|
|
108
|
+
Parameters
|
|
109
|
+
----------
|
|
110
|
+
window (int): window size
|
|
111
|
+
z_threshold (float): alpha and z threhold for the normalized feature
|
|
112
|
+
verbose (boolean): to print analysis
|
|
113
|
+
|
|
114
|
+
Returns
|
|
115
|
+
-------
|
|
116
|
+
None
|
|
117
|
+
"""
|
|
58
118
|
self.z_threshold = z_threshold
|
|
59
119
|
spread_series = pd.Series(self.df.spread)
|
|
60
120
|
mean = spread_series.rolling(center = False, window = window).mean()
|
|
@@ -74,7 +134,17 @@ class pair_finder():
|
|
|
74
134
|
self.df['low_pair_signal'] = low_signal
|
|
75
135
|
|
|
76
136
|
def plot_scores(self):
|
|
77
|
-
|
|
137
|
+
"""
|
|
138
|
+
display plot of the time series and signals and other plot for pair signal strategy
|
|
139
|
+
|
|
140
|
+
Parameters
|
|
141
|
+
----------
|
|
142
|
+
None
|
|
143
|
+
|
|
144
|
+
Returns
|
|
145
|
+
-------
|
|
146
|
+
None
|
|
147
|
+
"""
|
|
78
148
|
plt.axhline(y=0.0, color='grey', linestyle='--')
|
|
79
149
|
plt.figure(1, figsize = (10, 4))
|
|
80
150
|
plt.plot(self.df.spread.values)
|
|
@@ -104,7 +174,22 @@ class pair_finder():
|
|
|
104
174
|
fig.show()
|
|
105
175
|
|
|
106
176
|
def evaluate_signal(self, days_list,test_size, signal_position = False,threshold = 0.05,verbose = False, plot = False):
|
|
107
|
-
|
|
177
|
+
"""
|
|
178
|
+
evaluate the signal strategy using future returns
|
|
179
|
+
|
|
180
|
+
Parameters
|
|
181
|
+
----------
|
|
182
|
+
days_list (list): list of days future returns
|
|
183
|
+
test_size (int): teste data size, the remainng is taken as training data
|
|
184
|
+
signal_position (int): position of the signal to open position
|
|
185
|
+
threshold (float): alpha or z threshold of the normalized feature
|
|
186
|
+
verbose (boolean): if True, print results
|
|
187
|
+
plot (boolean): if true, display plots
|
|
188
|
+
|
|
189
|
+
Returns
|
|
190
|
+
-------
|
|
191
|
+
None
|
|
192
|
+
"""
|
|
108
193
|
df = self.df.sort_values('Date').iloc[0:-test_size,:].copy()
|
|
109
194
|
returns_list = list()
|
|
110
195
|
|
|
@@ -206,6 +291,18 @@ class pair_finder():
|
|
|
206
291
|
del df
|
|
207
292
|
|
|
208
293
|
def create_backtest_signal(self,days_strategy, test_size):
|
|
294
|
+
"""
|
|
295
|
+
create back test of the strategy and get somo plot analysis
|
|
296
|
+
|
|
297
|
+
Parameters
|
|
298
|
+
----------
|
|
299
|
+
days_strategy (int): list of days future returns
|
|
300
|
+
test_size (int): teste data size, the remainng is taken as training data
|
|
301
|
+
|
|
302
|
+
Returns
|
|
303
|
+
-------
|
|
304
|
+
None
|
|
305
|
+
"""
|
|
209
306
|
asset_1 = self.asset_1
|
|
210
307
|
df1 = self.df.iloc[-test_size:,:].copy()
|
|
211
308
|
df2 = df1.copy()
|
|
@@ -273,7 +370,18 @@ class pair_finder():
|
|
|
273
370
|
del df1,df2,dft
|
|
274
371
|
|
|
275
372
|
def produce_big_dataset(data_frames, stocks_codes_, feature_list, limit = 500):
|
|
276
|
-
|
|
373
|
+
'''
|
|
374
|
+
combine multiple asset, taking a common schema
|
|
375
|
+
|
|
376
|
+
Parameters:
|
|
377
|
+
data_frames (pd.DataFrame): Base dataframe
|
|
378
|
+
stocks_codes_ (list): assets to select
|
|
379
|
+
feature_list (list): feature list
|
|
380
|
+
limit (int): number of observation per asset
|
|
381
|
+
|
|
382
|
+
Returns:
|
|
383
|
+
dataframe (pd.DataFrame): Base dataframe with extra data
|
|
384
|
+
'''
|
|
277
385
|
feature_list_ = list()
|
|
278
386
|
columns_vector = list(data_frames[stocks_codes_[-1]].columns )
|
|
279
387
|
for feat in feature_list:
|
|
@@ -301,7 +409,19 @@ def produce_big_dataset(data_frames, stocks_codes_, feature_list, limit = 500):
|
|
|
301
409
|
return dataframe
|
|
302
410
|
|
|
303
411
|
def ranking(data, weighted_features, top = 5, window = 5):
|
|
304
|
-
|
|
412
|
+
'''
|
|
413
|
+
Create a ranking of assets given current signals and weighted average importance
|
|
414
|
+
|
|
415
|
+
Parameters:
|
|
416
|
+
data (pd.Dataframe): base data
|
|
417
|
+
weighted_features (dict): configuration dictionary
|
|
418
|
+
top (int): top n to get result
|
|
419
|
+
window (int): number of days to assess
|
|
420
|
+
|
|
421
|
+
Returns:
|
|
422
|
+
top_up (list): top roof signal asset
|
|
423
|
+
top_low (list): top botton signal asset
|
|
424
|
+
'''
|
|
305
425
|
features = weighted_features.keys()
|
|
306
426
|
up_columns = ['signal_up_' + x for x in features]
|
|
307
427
|
low_columns = ['signal_low_' + x for x in features]
|
|
@@ -333,19 +453,80 @@ def ranking(data, weighted_features, top = 5, window = 5):
|
|
|
333
453
|
top_up = list(df.sort_values('up_signas', ascending = False).index)[:top]
|
|
334
454
|
top_low = list(df.sort_values('low_signas', ascending = False).index)[:top]
|
|
335
455
|
|
|
336
|
-
return top_up, top_low
|
|
456
|
+
return top_up, top_low, df
|
|
457
|
+
|
|
458
|
+
def ranking_first(data, weighted_features, top = 5, window = 5):
|
|
459
|
+
'''
|
|
460
|
+
Create a ranking of assets given current signals and weighted average importance
|
|
461
|
+
|
|
462
|
+
Parameters:
|
|
463
|
+
data (pd.Dataframe): base data
|
|
464
|
+
weighted_features (dict): configuration dictionary
|
|
465
|
+
top (int): top n to get result
|
|
466
|
+
window (int): number of days to assess
|
|
467
|
+
|
|
468
|
+
Returns:
|
|
469
|
+
top_up (list): top roof signal asset
|
|
470
|
+
top_low (list): top botton signal asset
|
|
471
|
+
'''
|
|
472
|
+
features = weighted_features.keys()
|
|
473
|
+
up_columns = ['signal_up_' + x for x in features]
|
|
474
|
+
low_columns = ['signal_low_' + x for x in features]
|
|
475
|
+
|
|
476
|
+
def compute_score(df,col,window):
|
|
477
|
+
score = 0
|
|
478
|
+
for i in range(window):
|
|
479
|
+
row = df.iloc[i]
|
|
480
|
+
if (row[col] == 1) and (i == 0):
|
|
481
|
+
score += 1000
|
|
482
|
+
elif (row[col] == 1) and (i == 1):
|
|
483
|
+
score -= 200
|
|
484
|
+
elif (row[col] == 1) and (i >= 2):
|
|
485
|
+
score -= 50
|
|
486
|
+
return score
|
|
487
|
+
|
|
488
|
+
ticket_list= list(data.Ticket.unique())
|
|
489
|
+
result = dict()
|
|
490
|
+
for ticket in ticket_list:
|
|
491
|
+
result[ticket] = dict()
|
|
492
|
+
df = data[data.Ticket == ticket].sort_values('Date').iloc[-window:]
|
|
493
|
+
|
|
494
|
+
for col in low_columns:
|
|
495
|
+
df = df.sort_values('Date', ascending = False)
|
|
496
|
+
score = compute_score(df,col,window)
|
|
497
|
+
result[ticket][col] = score
|
|
498
|
+
for col in up_columns:
|
|
499
|
+
score = 0
|
|
500
|
+
df = df.sort_values('Date', ascending = False)
|
|
501
|
+
score = compute_score(df,col,window)
|
|
502
|
+
result[ticket][col] = score
|
|
503
|
+
|
|
504
|
+
df = pd.DataFrame(result).T
|
|
505
|
+
df['up_signas'] = df[up_columns].sum(axis=1)
|
|
506
|
+
df['low_signas'] = df[low_columns].sum(axis=1)
|
|
507
|
+
|
|
508
|
+
top_up = list(df.sort_values('up_signas', ascending = False).index)[:top]
|
|
509
|
+
top_low = list(df.sort_values('low_signas', ascending = False).index)[:top]
|
|
510
|
+
return top_up, top_low, df
|
|
337
511
|
|
|
338
512
|
def produce_dashboard(data, columns , ticket_list, show_plot = True, nrows = 150,save_name = False, save_path = False, save_aws = False, aws_credential = False):
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
513
|
+
'''
|
|
514
|
+
produce dashboard using signals and list of assets
|
|
515
|
+
|
|
516
|
+
Parameters:
|
|
517
|
+
data (pd.Dataframe): base data
|
|
518
|
+
columns (list): list of features or signals
|
|
519
|
+
ticket_list (list): list of assets
|
|
520
|
+
show_plot (boolean): if true, display plot
|
|
521
|
+
nrows (int): number of days back to display
|
|
522
|
+
save_name (str): dashboad name resulting file
|
|
523
|
+
save_path (str): local path for saving e.g r'C:/path/to/the/file/'
|
|
524
|
+
save_aws (str): remote key in s3 bucket path e.g. 'path/to/file/'
|
|
525
|
+
aws_credential (dict): aws credentials
|
|
526
|
+
|
|
527
|
+
Returns:
|
|
528
|
+
None
|
|
529
|
+
'''
|
|
349
530
|
top = len(ticket_list)
|
|
350
531
|
columns = ['history'] + columns
|
|
351
532
|
subtitles = list()
|
|
@@ -393,9 +574,66 @@ def produce_dashboard(data, columns , ticket_list, show_plot = True, nrows = 150
|
|
|
393
574
|
# upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = f'multi_dashboards/'+save_name+'.json',input_path = save_path+save_name+'.json')
|
|
394
575
|
upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = save_aws + save_name + '.json', input_path = save_path + save_name + '.json', aws_credentials = aws_credential)
|
|
395
576
|
|
|
577
|
+
def produce_edges_dashboard(dataframe, ticket_list, save_name, show_plot = False, save_path = False, save_aws = False, aws_credentials = False):
|
|
578
|
+
'''
|
|
579
|
+
produce dashboard using signals and list of assets
|
|
580
|
+
|
|
581
|
+
Parameters:
|
|
582
|
+
dataframe (pd.Dataframe): base data
|
|
583
|
+
ticket_list (list): list of assets
|
|
584
|
+
save_name (str): dashboad name resulting file
|
|
585
|
+
show_plot (boolean): if true, display plot
|
|
586
|
+
save_path (str): local path for saving e.g r'C:/path/to/the/file/'
|
|
587
|
+
save_aws (str): remote key in s3 bucket path e.g. 'path/to/file/'
|
|
588
|
+
aws_credential (dict): aws credentials
|
|
589
|
+
|
|
590
|
+
Returns:
|
|
591
|
+
None
|
|
592
|
+
'''
|
|
593
|
+
n_assets = len(ticket_list)
|
|
594
|
+
|
|
595
|
+
result_json_name = save_name
|
|
596
|
+
cols_length = 4
|
|
597
|
+
rows_length = math.ceil(n_assets/2)
|
|
598
|
+
|
|
599
|
+
subtitles = list()
|
|
600
|
+
for x in ticket_list:
|
|
601
|
+
subtitles.append(x)
|
|
602
|
+
subtitles.append(x + ' signal')
|
|
603
|
+
|
|
604
|
+
fig = make_subplots(rows=rows_length, cols=cols_length,vertical_spacing = 0.01, horizontal_spacing = 0.03, shared_xaxes=True, subplot_titles = subtitles)
|
|
605
|
+
|
|
606
|
+
for i,ticket in enumerate(ticket_list):
|
|
607
|
+
j = i%2*2 +1
|
|
608
|
+
i = i+1
|
|
609
|
+
i_r = math.ceil(i/2)
|
|
610
|
+
|
|
611
|
+
show_legend = True if i == 1 else False
|
|
612
|
+
|
|
613
|
+
df = dataframe[dataframe.asset == ticket]
|
|
614
|
+
fig.add_trace(go.Scatter(x=df['Date'], y=df['Close'],legendgroup="Close",showlegend = show_legend , mode='lines',name = 'Close', marker_color = 'blue'),col = j, row = i_r)
|
|
615
|
+
fig.add_trace(go.Scatter(x=df['Date'], y=df['proba_target_up'],legendgroup="proba",showlegend = show_legend , mode='lines',name = 'proba_target_up', marker_color = 'orange'),col = j+1, row = i_r)
|
|
616
|
+
fig.update_layout(height=rows_length*300, width=1500, title_text = f'dashboard top {n_assets} tickets')
|
|
617
|
+
|
|
618
|
+
if save_path:
|
|
619
|
+
fig.write_json(save_path+result_json_name)
|
|
620
|
+
if show_plot:
|
|
621
|
+
fig.show()
|
|
622
|
+
if save_path and save_aws:
|
|
623
|
+
upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = save_aws + result_json_name, input_path = save_path + result_json_name, aws_credentials = aws_credentials)
|
|
396
624
|
|
|
397
625
|
def rank_by_return(data, lag_days, top_n = 5):
|
|
398
|
-
|
|
626
|
+
'''
|
|
627
|
+
produce ranking by returns
|
|
628
|
+
|
|
629
|
+
Parameters:
|
|
630
|
+
data (pd.Dataframe): base data
|
|
631
|
+
lag_days (int): number of days to consider
|
|
632
|
+
top_n (int): top n results assets
|
|
633
|
+
|
|
634
|
+
Returns:
|
|
635
|
+
result (list): resulting assets top n most important
|
|
636
|
+
'''
|
|
399
637
|
data = data.sort_values(['Ticket','Date'], ascending=[False,False]).reset_index(drop = True)
|
|
400
638
|
data['first'] = data.sort_values(['Date'], ascending=[False]).groupby(['Ticket']).cumcount() + 1
|
|
401
639
|
data = data[data['first'] <= lag_days]
|
|
@@ -416,18 +654,19 @@ def rank_by_return(data, lag_days, top_n = 5):
|
|
|
416
654
|
return result
|
|
417
655
|
|
|
418
656
|
def get_data(ticker_name:str, ticket_settings:dict, n_days:int = False, hmm_available: object = False, data_window:str = '5y') -> object:
|
|
419
|
-
|
|
420
|
-
this functions runs the stock_eda_panel
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
657
|
+
'''
|
|
658
|
+
this functions runs the stock_eda_panel. It is shared between train model and predictions
|
|
659
|
+
|
|
660
|
+
Parameters:
|
|
661
|
+
ticker_name (str): name of the asset
|
|
662
|
+
ticket_settings (dict): dictionary with all the parameters to compute features
|
|
663
|
+
n_days (int): to set an arbitrary data size
|
|
664
|
+
hmm_available (obj): if the hmm is available, in prediction is required
|
|
665
|
+
data_window (str): window for the data extraction
|
|
666
|
+
|
|
667
|
+
Returns:
|
|
668
|
+
object_stock (obj): resulting object_stock object
|
|
669
|
+
'''
|
|
431
670
|
object_stock = stock_eda_panel(ticker_name , n_days, data_window)
|
|
432
671
|
object_stock.get_data()
|
|
433
672
|
|
|
@@ -461,7 +700,11 @@ def get_data(ticker_name:str, ticket_settings:dict, n_days:int = False, hmm_avai
|
|
|
461
700
|
'stochastic_feature':'stochastic_feature',
|
|
462
701
|
'william_feature':'william_feature',
|
|
463
702
|
'vortex_feature':'vortex_feature',
|
|
464
|
-
'pair_index_feature':'pair_index_feature' # this has a diff structure!
|
|
703
|
+
'pair_index_feature':'pair_index_feature', # this has a diff structure!
|
|
704
|
+
'min_distance_pricefeature':'minmax_pricefeature',
|
|
705
|
+
'min_relprice_pricefeature':'minmax_pricefeature',
|
|
706
|
+
'max_distance_pricefeature':'minmax_pricefeature',
|
|
707
|
+
'max_relprice_pricefeature':'minmax_pricefeature',
|
|
465
708
|
}
|
|
466
709
|
exceptions = ['pair_feature','pair_index_feature']
|
|
467
710
|
### standar feature
|
|
@@ -506,6 +749,7 @@ def get_data(ticker_name:str, ticket_settings:dict, n_days:int = False, hmm_avai
|
|
|
506
749
|
if len(discrete_features) > 0:
|
|
507
750
|
for feature_name in discrete_features:
|
|
508
751
|
object_stock.produce_order_features(feature_name)
|
|
752
|
+
object_stock.get_order_feature_nosignal(feature_name)
|
|
509
753
|
|
|
510
754
|
if hmm_available:
|
|
511
755
|
object_stock.cluster_hmm_analysis( n_clusters = None,
|
|
@@ -517,13 +761,24 @@ def get_data(ticker_name:str, ticket_settings:dict, n_days:int = False, hmm_avai
|
|
|
517
761
|
object_stock.cluster_hmm_analysis( n_clusters = ticket_settings['settings']['hmm']['n_clusters'],
|
|
518
762
|
features_hmm = ticket_settings['settings']['hmm']['features_hmm'],
|
|
519
763
|
test_data_size = ticket_settings['settings']['hmm']['test_data_size'],
|
|
520
|
-
seed = ticket_settings['settings']['hmm']['seed']
|
|
764
|
+
seed = ticket_settings['settings']['hmm']['seed'],
|
|
765
|
+
corr_threshold = ticket_settings['settings']['hmm'].get('corr_threshold',0.75),
|
|
766
|
+
lag_returns_state = ticket_settings['settings']['hmm'].get('lag_returns_state',7),
|
|
767
|
+
)
|
|
521
768
|
|
|
522
769
|
return object_stock
|
|
523
770
|
|
|
524
771
|
trends = {'adjusted' : 0.001, 'smooth' : 0.0001}
|
|
525
772
|
|
|
526
773
|
def apply_KF(self, trends):
|
|
774
|
+
'''
|
|
775
|
+
create kalman filter feature and attach it to the stock_eda_panel object
|
|
776
|
+
|
|
777
|
+
Parameters:
|
|
778
|
+
trends (dict): configurations of the kalman filter
|
|
779
|
+
Returns:
|
|
780
|
+
none
|
|
781
|
+
'''
|
|
527
782
|
for ttrend in trends:
|
|
528
783
|
tcov = trends.get(ttrend)
|
|
529
784
|
kf = KalmanFilter(transition_matrices = [1],
|
|
@@ -537,11 +792,24 @@ def apply_KF(self, trends):
|
|
|
537
792
|
|
|
538
793
|
stock_eda_panel.apply_KF = apply_KF
|
|
539
794
|
|
|
540
|
-
def call_ml_objects(stock_code, client, call_models = False):
|
|
541
|
-
|
|
795
|
+
def call_ml_objects(stock_code, client, call_models = False, clean_name=False):
|
|
796
|
+
'''
|
|
797
|
+
call artifcats from mlflow
|
|
798
|
+
|
|
799
|
+
Parameters:
|
|
800
|
+
stock_code (str): asset name
|
|
801
|
+
client (obj): mlflow client
|
|
802
|
+
call_models (boolean): if true, call ml artifacts
|
|
803
|
+
Returns:
|
|
804
|
+
objects (dict): that contains ml artifacts, data , configs and models
|
|
805
|
+
'''
|
|
542
806
|
objects = dict()
|
|
543
807
|
|
|
544
|
-
|
|
808
|
+
if clean_name:
|
|
809
|
+
renamed_stock_code = stock_code.replace("^","__",).replace(".","__").replace("=","__").replace("-","__")
|
|
810
|
+
registered_model_name = f'{renamed_stock_code}_models'
|
|
811
|
+
else:
|
|
812
|
+
registered_model_name = f'{stock_code}_models'
|
|
545
813
|
latest_version_info = client.get_latest_versions(registered_model_name, stages=["Production"])
|
|
546
814
|
latest_production_version = latest_version_info[0].version
|
|
547
815
|
run_id_prod_model = latest_version_info[0].run_id
|
|
@@ -552,18 +820,27 @@ def call_ml_objects(stock_code, client, call_models = False):
|
|
|
552
820
|
)
|
|
553
821
|
|
|
554
822
|
## calling models
|
|
555
|
-
|
|
823
|
+
if clean_name:
|
|
824
|
+
path_hmm = f"runs:/{run_id_prod_model}/{renamed_stock_code}-hmm-model"
|
|
825
|
+
else:
|
|
826
|
+
path_hmm = f"runs:/{run_id_prod_model}/{stock_code}-hmm-model"
|
|
827
|
+
|
|
556
828
|
hmm_model = mlflow.pyfunc.load_model(
|
|
557
|
-
|
|
558
|
-
|
|
829
|
+
path_hmm,
|
|
830
|
+
suppress_warnings = True
|
|
559
831
|
)
|
|
560
832
|
objects['called_hmm_models'] = hmm_model
|
|
561
833
|
|
|
562
834
|
if call_models:
|
|
563
835
|
|
|
836
|
+
if clean_name:
|
|
837
|
+
path_model = f"runs:/{run_id_prod_model}/{renamed_stock_code}-forecasting-model"
|
|
838
|
+
else:
|
|
839
|
+
path_model = f"runs:/{run_id_prod_model}/{stock_code}-forecasting-model"
|
|
840
|
+
|
|
564
841
|
forecasting_model = mlflow.pyfunc.load_model(
|
|
565
|
-
|
|
566
|
-
|
|
842
|
+
path_model,
|
|
843
|
+
suppress_warnings = True
|
|
567
844
|
)
|
|
568
845
|
objects['called_forecasting_model'] = forecasting_model
|
|
569
846
|
|
|
@@ -584,17 +861,57 @@ def call_ml_objects(stock_code, client, call_models = False):
|
|
|
584
861
|
return objects
|
|
585
862
|
|
|
586
863
|
class produce_plotly_plots:
|
|
864
|
+
"""
|
|
865
|
+
class that helps to produce different dashboards
|
|
866
|
+
|
|
867
|
+
Attributes
|
|
868
|
+
----------
|
|
869
|
+
ticket_name : str
|
|
870
|
+
asset name
|
|
871
|
+
data_frame (pd.DataFrame): asset data
|
|
872
|
+
settings : dict
|
|
873
|
+
asset configurations
|
|
874
|
+
show_plot : boolean
|
|
875
|
+
if true, display plots
|
|
876
|
+
save_path : str
|
|
877
|
+
local path for saving e.g r'C:/path/to/the/file/'
|
|
878
|
+
save_aws : str
|
|
879
|
+
remote key in s3 bucket path e.g. 'path/to/file/'
|
|
880
|
+
aws_credentials : dict
|
|
881
|
+
aws credentials
|
|
882
|
+
return_figs : boolean
|
|
883
|
+
if true, methods will return objects
|
|
884
|
+
|
|
885
|
+
Methods
|
|
886
|
+
-------
|
|
887
|
+
plot_asset_signals(feature_list=list, spread_column=list, date_intervals=list):
|
|
888
|
+
Display signals and hmm states over closing prices and feature time series
|
|
889
|
+
explore_states_ts():
|
|
890
|
+
display scaled time series of every hmm state
|
|
891
|
+
plot_hmm_analysis(settings=dict, t_matrix=txt, model=obj):
|
|
892
|
+
display plots that analyse hmm states
|
|
893
|
+
produce_forecasting_plot(predictions=pd.DataFrame):
|
|
894
|
+
display forecasting plots
|
|
895
|
+
"""
|
|
587
896
|
def __init__(self,ticket_name, data_frame,settings, save_path = False, save_aws = False, show_plot= True, aws_credentials = False, return_figs = False):
|
|
588
897
|
"""
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
898
|
+
Initialize object
|
|
899
|
+
|
|
900
|
+
Parameters
|
|
901
|
+
----------
|
|
902
|
+
ticket_name (str): asset name
|
|
903
|
+
data_frame (pd.DataFrame): asset data
|
|
904
|
+
settings (dict): asset configurations
|
|
905
|
+
show_plot (boolean): if true, display plots
|
|
906
|
+
save_path (str): local path for saving e.g r'C:/path/to/the/file/'
|
|
907
|
+
save_aws (str): remote key in s3 bucket path e.g. 'path/to/file/'
|
|
908
|
+
aws_credentials (dict): aws credentials
|
|
909
|
+
return_figs (boolean): if true, methods will return objects
|
|
910
|
+
|
|
911
|
+
Returns
|
|
912
|
+
-------
|
|
913
|
+
None
|
|
596
914
|
"""
|
|
597
|
-
|
|
598
915
|
self.ticket_name = ticket_name
|
|
599
916
|
self.data_frame = data_frame
|
|
600
917
|
self.settings = settings
|
|
@@ -604,13 +921,44 @@ class produce_plotly_plots:
|
|
|
604
921
|
self.aws_credentials = aws_credentials
|
|
605
922
|
self.return_figs = return_figs
|
|
606
923
|
|
|
607
|
-
def plot_asset_signals(self, feature_list,spread_column, date_intervals = False):
|
|
608
|
-
|
|
924
|
+
def plot_asset_signals(self, feature_list,spread_column, date_intervals = False, look_back = 800):
|
|
925
|
+
"""
|
|
926
|
+
Display signals and hmm states over closing prices and feature time series
|
|
927
|
+
|
|
928
|
+
Parameters
|
|
929
|
+
----------
|
|
930
|
+
feature_list (list): signal list
|
|
931
|
+
spread_column (list): moving average list
|
|
932
|
+
date_intervals (list): list of tuples of dates, e.g [('2022-01-01','2023-01-01'),('2022-01-01','2023-01-01')]
|
|
933
|
+
|
|
934
|
+
Returns
|
|
935
|
+
-------
|
|
936
|
+
fig (obj): plotly dashboard
|
|
937
|
+
"""
|
|
609
938
|
result_json_name = 'panel_signals.json'
|
|
610
939
|
df = self.data_frame
|
|
940
|
+
if look_back:
|
|
941
|
+
df = df.iloc[-look_back:,:]
|
|
611
942
|
ma1 = self.settings['settings'][spread_column]['ma1']
|
|
612
943
|
ma2 = self.settings['settings'][spread_column]['ma2']
|
|
613
944
|
hmm_n_clust = self.settings['settings']['hmm']['n_clusters']
|
|
945
|
+
|
|
946
|
+
def return_FeatureSingal_lists(feature, feature_2):
|
|
947
|
+
signal_up_list = [f'signal_up_{feature}', f'signal_up_{feature_2}']
|
|
948
|
+
signal_low_list = [f'signal_low_{feature}', f'signal_low_{feature_2}']
|
|
949
|
+
norm_list = [f'norm_{feature}', f'z_{feature}', feature]
|
|
950
|
+
return norm_list, signal_up_list, signal_low_list
|
|
951
|
+
|
|
952
|
+
# feature_list corrector
|
|
953
|
+
new_feature_list = list()
|
|
954
|
+
for feature in feature_list:
|
|
955
|
+
norm_list, _ , _ = return_FeatureSingal_lists(feature, '')
|
|
956
|
+
for norm_feat in norm_list:
|
|
957
|
+
if norm_feat in df.columns:
|
|
958
|
+
new_feature_list.append(feature)
|
|
959
|
+
break
|
|
960
|
+
|
|
961
|
+
feature_list = new_feature_list
|
|
614
962
|
feature_rows = len(feature_list)
|
|
615
963
|
|
|
616
964
|
rows_subplot = feature_rows + 1
|
|
@@ -627,9 +975,8 @@ class produce_plotly_plots:
|
|
|
627
975
|
### signal plots
|
|
628
976
|
for row_i, feature in enumerate(feature_list,start=1):
|
|
629
977
|
feature_2 = 'nan'
|
|
630
|
-
signal_up_list =
|
|
631
|
-
|
|
632
|
-
norm_list = [f'norm_{feature}', f'z_{feature}', feature]
|
|
978
|
+
norm_list, signal_up_list, signal_low_list = return_FeatureSingal_lists(feature, feature_2)
|
|
979
|
+
|
|
633
980
|
# signal
|
|
634
981
|
for norm_feat in norm_list:
|
|
635
982
|
if norm_feat in df.columns:
|
|
@@ -647,7 +994,7 @@ class produce_plotly_plots:
|
|
|
647
994
|
for signal_low in signal_low_list:
|
|
648
995
|
if signal_low in df.columns:
|
|
649
996
|
fig.add_trace(go.Scatter(x=df['Date'], y=np.where(df[signal_low] == 1, df[norm_feat], np.nan),showlegend= False, mode='markers', marker_color = 'red'),col = 1, row = row_i)
|
|
650
|
-
|
|
997
|
+
fig.add_hline(y=0, line_width=2, line_dash="dash", line_color="grey",col = 1, row = row_i)
|
|
651
998
|
fig.update_layout(height=height_plot, width=1600, title_text = f'asset plot and signals: {self.ticket_name}')
|
|
652
999
|
|
|
653
1000
|
## state plot with close prices
|
|
@@ -679,6 +1026,17 @@ class produce_plotly_plots:
|
|
|
679
1026
|
return fig
|
|
680
1027
|
|
|
681
1028
|
def explore_states_ts(self):
|
|
1029
|
+
"""
|
|
1030
|
+
display scaled time series of every hmm state
|
|
1031
|
+
|
|
1032
|
+
Parameters
|
|
1033
|
+
----------
|
|
1034
|
+
None
|
|
1035
|
+
|
|
1036
|
+
Returns
|
|
1037
|
+
-------
|
|
1038
|
+
fig (obj): plotly dashboard
|
|
1039
|
+
"""
|
|
682
1040
|
result_json_name = 'ts_hmm.json'
|
|
683
1041
|
df = self.data_frame
|
|
684
1042
|
hmm_n_clust = self.settings['settings']['hmm']['n_clusters']
|
|
@@ -693,7 +1051,6 @@ class produce_plotly_plots:
|
|
|
693
1051
|
if len(states_subtitles)%2 == 1:
|
|
694
1052
|
states_subtitles = states_subtitles + [None]
|
|
695
1053
|
|
|
696
|
-
|
|
697
1054
|
fig = make_subplots(
|
|
698
1055
|
rows= rows_subplot, cols=2,
|
|
699
1056
|
specs = [[{"type": "scatter"},{"type": "scatter"}]]*state_rows,
|
|
@@ -727,6 +1084,20 @@ class produce_plotly_plots:
|
|
|
727
1084
|
return fig
|
|
728
1085
|
|
|
729
1086
|
def plot_hmm_analysis(self,settings, t_matrix, model = False):
|
|
1087
|
+
"""
|
|
1088
|
+
display plots that analyse hmm states
|
|
1089
|
+
|
|
1090
|
+
Parameters
|
|
1091
|
+
----------
|
|
1092
|
+
settings (dict): asset configurations
|
|
1093
|
+
t_matrix (txt): asset state transition matrix
|
|
1094
|
+
model(obj): hmm model
|
|
1095
|
+
|
|
1096
|
+
Returns
|
|
1097
|
+
-------
|
|
1098
|
+
fig (obj): plotly dashboard
|
|
1099
|
+
messages (dict): hmm model metrics
|
|
1100
|
+
"""
|
|
730
1101
|
result_json_name = 'hmm_analysis.json'
|
|
731
1102
|
df = self.data_frame
|
|
732
1103
|
hmm_n_clust = self.settings['settings']['hmm']['n_clusters']
|
|
@@ -737,7 +1108,7 @@ class produce_plotly_plots:
|
|
|
737
1108
|
states = list(df.hmm_feature.unique())
|
|
738
1109
|
states.sort()
|
|
739
1110
|
### expand hmm analysis
|
|
740
|
-
hmm_titles = ['
|
|
1111
|
+
hmm_titles = ['state return (base first observation)','Transition matrix heatmap','length chains dist']
|
|
741
1112
|
|
|
742
1113
|
fig = make_subplots(
|
|
743
1114
|
rows= rows_subplot, cols=2,
|
|
@@ -758,10 +1129,16 @@ class produce_plotly_plots:
|
|
|
758
1129
|
df_ = df[['Date','hmm_feature','Close',"chain_return"]].sort_values('Date')
|
|
759
1130
|
df_['Daily_Returns'] = df['Close'].pct_change(7)
|
|
760
1131
|
|
|
1132
|
+
df_agg_returns = df_.groupby('hmm_feature', as_index = False).agg(median =('Daily_Returns','median')).copy()
|
|
1133
|
+
current_state = df_.iloc[-1,:].hmm_feature
|
|
1134
|
+
medain_state_return = df_agg_returns[ df_agg_returns.hmm_feature == current_state]['median'].values[0]
|
|
1135
|
+
type_state = 'low state' if medain_state_return < 0 else 'high state'
|
|
1136
|
+
|
|
761
1137
|
for state in states:
|
|
762
1138
|
dfi = df_[df_.hmm_feature == state]
|
|
763
1139
|
fig.add_trace(go.Box(y = dfi.chain_return, name=str(state),showlegend=False, marker_color = color_map[state] ),row=1, col=1)
|
|
764
|
-
|
|
1140
|
+
fig.add_hline(y=0, line_width=2, line_dash="dash", line_color="grey",row=1, col=1)
|
|
1141
|
+
|
|
765
1142
|
## lengths chains by state dist
|
|
766
1143
|
if 'hmm_chain_order' in df.columns:
|
|
767
1144
|
df_agg = df.groupby(['hmm_feature','chain_id'],as_index = False).agg(length_by_chain = ('hmm_chain_order','max'))
|
|
@@ -802,20 +1179,20 @@ class produce_plotly_plots:
|
|
|
802
1179
|
fig.add_trace(go.Box(x = dfi.importance, name=str(feature),showlegend=False ),row=2, col=2)
|
|
803
1180
|
fig.update_yaxes(visible=False, title="feature",row=2, col=2)
|
|
804
1181
|
|
|
805
|
-
|
|
806
1182
|
fig.update_layout(height=height_plot, width=1600, title_text = f'State model analysis: {self.ticket_name}', coloraxis=dict(colorbar_len=0.50))
|
|
807
1183
|
|
|
808
1184
|
date_execution = datetime.datetime.today().strftime('%Y-%m-%d')
|
|
809
1185
|
current_step = df.iloc[-1,:].hmm_chain_order
|
|
810
1186
|
current_state = df.iloc[-1,:].hmm_feature
|
|
811
|
-
message1 =
|
|
812
|
-
message2 =
|
|
1187
|
+
message1 = str(current_state)
|
|
1188
|
+
message2 = str(current_step)
|
|
813
1189
|
message3 = str(date_execution)
|
|
814
1190
|
|
|
815
1191
|
messages = {
|
|
816
1192
|
'current state':message1,
|
|
817
1193
|
'current step in state': message2,
|
|
818
1194
|
'execution date':message3,
|
|
1195
|
+
'type state':type_state,
|
|
819
1196
|
}
|
|
820
1197
|
|
|
821
1198
|
if self.show_plot:
|
|
@@ -847,7 +1224,27 @@ class produce_plotly_plots:
|
|
|
847
1224
|
|
|
848
1225
|
if self.return_figs:
|
|
849
1226
|
return fig, messages
|
|
850
|
-
|
|
1227
|
+
|
|
1228
|
+
def produce_forecasting_plot(self,predictions, window=30):
|
|
1229
|
+
"""
|
|
1230
|
+
display forecasting plots
|
|
1231
|
+
|
|
1232
|
+
Parameters
|
|
1233
|
+
----------
|
|
1234
|
+
predictions (pd.DataFrame): asset predictions
|
|
1235
|
+
window (int): historical data to display
|
|
1236
|
+
|
|
1237
|
+
Returns
|
|
1238
|
+
-------
|
|
1239
|
+
None
|
|
1240
|
+
"""
|
|
1241
|
+
def qs(x):
|
|
1242
|
+
return x.quantile(0.05)
|
|
1243
|
+
def qm(x):
|
|
1244
|
+
return x.quantile(0.50)
|
|
1245
|
+
def ql(x):
|
|
1246
|
+
return x.quantile(0.95)
|
|
1247
|
+
|
|
851
1248
|
result_json_name = 'forecast_plot.json'
|
|
852
1249
|
hmm_n_clust = self.settings['settings']['hmm']['n_clusters']
|
|
853
1250
|
model_type = self.settings.get('model_type',False)
|
|
@@ -863,8 +1260,6 @@ class produce_plotly_plots:
|
|
|
863
1260
|
[{"type": "scatter"}, {"type": "scatter"}]],
|
|
864
1261
|
subplot_titles = [f'asset returns {lags} lags', 'closing prices', 'hidden states']
|
|
865
1262
|
)
|
|
866
|
-
|
|
867
|
-
|
|
868
1263
|
predictions = predictions[predictions.StockCode == self.ticket_name]
|
|
869
1264
|
if len(predictions) > 1:
|
|
870
1265
|
|
|
@@ -880,12 +1275,18 @@ class produce_plotly_plots:
|
|
|
880
1275
|
last_exe_prediction_date = predictions.ExecutionDate.unique()
|
|
881
1276
|
last_date = max(last_exe_prediction_date)
|
|
882
1277
|
|
|
883
|
-
history =
|
|
1278
|
+
history = self.data_frame.sort_values('Date').iloc[-window:,:]
|
|
884
1279
|
cut_date = history.loc[history.iloc[-1:,:].index[0]:,'Date'].item()
|
|
885
|
-
|
|
886
1280
|
prediction = predictions[predictions.Type == 'Prediction']
|
|
887
1281
|
|
|
888
1282
|
## log returns
|
|
1283
|
+
def add_intervals(data,feature,i,w=5):
|
|
1284
|
+
df_qs = data.sort_values('Date')[['Date',feature]].rolling(3,min_periods = 1,on='Date').apply(qs).groupby('Date',as_index=False)[feature].max()
|
|
1285
|
+
df_qm = data.sort_values('Date')[['Date',feature]].rolling(3,min_periods = 1,on='Date').apply(qm).groupby('Date',as_index=False)[feature].max()
|
|
1286
|
+
df_ql = data.sort_values('Date')[['Date',feature]].rolling(3,min_periods = 1,on='Date').apply(ql).groupby('Date',as_index=False)[feature].max()
|
|
1287
|
+
fig.add_trace(go.Scatter(x=df_qs.Date, y=df_qs[feature], mode='lines',marker_color ='#D0D0D0',showlegend=False,opacity=0.05),row=1, col=i)
|
|
1288
|
+
fig.add_trace(go.Scatter(x=df_qm.Date, y=df_qm[feature], mode='lines',marker_color ='#D0D0D0',showlegend=False,opacity=0.05, fill='tonexty'),row=1, col=i)
|
|
1289
|
+
fig.add_trace(go.Scatter(x=df_ql.Date, y=df_ql[feature], mode='lines',marker_color ='#D0D0D0',showlegend=False,opacity=0.05, fill='tonexty'),row=1, col=i)
|
|
889
1290
|
|
|
890
1291
|
fig.add_trace(go.Scatter(x=history.Date, y=history.log_return, mode='lines',marker_color ='blue',showlegend=False),row=1, col=1)
|
|
891
1292
|
|
|
@@ -896,9 +1297,10 @@ class produce_plotly_plots:
|
|
|
896
1297
|
df = prediction[prediction.ExecutionDate == last_date]
|
|
897
1298
|
fig.add_trace(go.Scatter(x=df.Date, y=df.log_return, mode='lines',marker_color ='#ff7f0e',showlegend=False),row=1, col=1)
|
|
898
1299
|
fig.add_trace(go.Scatter(x=df.Date, y=df.log_return, mode='markers',marker_color ='#ff7f0e',showlegend=False),row=1, col=1)
|
|
1300
|
+
fig.add_hline(y=0, line_width=2, line_dash="dash", line_color="grey",col = 1, row = 1)
|
|
1301
|
+
add_intervals(data=prediction,feature='log_return',i=1)
|
|
899
1302
|
|
|
900
1303
|
## closing prices
|
|
901
|
-
|
|
902
1304
|
fig.add_trace(go.Scatter(x=history.Date, y=history.Close, mode='lines',marker_color ='blue',showlegend=False),row=1, col=2)
|
|
903
1305
|
for i,datex in enumerate([x for x in last_exe_prediction_date if x != last_date]):
|
|
904
1306
|
df = prediction[prediction.ExecutionDate == datex]
|
|
@@ -908,6 +1310,7 @@ class produce_plotly_plots:
|
|
|
908
1310
|
fig.add_trace(go.Scatter(x=df.Date, y=df.Close, mode='lines',marker_color ='#ff7f0e',showlegend=False),row=1, col=2)
|
|
909
1311
|
fig.add_trace(go.Scatter(x=df.Date, y=df.Close, mode='markers',marker_color ='#ff7f0e',showlegend=False),row=1, col=2)
|
|
910
1312
|
fig.update_layout(height=height_plot, width=1600, title_text = f'forecasts: {self.ticket_name}')
|
|
1313
|
+
add_intervals(data=prediction,feature='Close',i=2)
|
|
911
1314
|
else:
|
|
912
1315
|
print('no forecasting history')
|
|
913
1316
|
|
|
@@ -918,9 +1321,22 @@ class produce_plotly_plots:
|
|
|
918
1321
|
if self.save_path and self.save_aws:
|
|
919
1322
|
# upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = f'market_plots/{self.ticket_name}/'+result_json_name ,input_path = self.save_path+result_json_name)
|
|
920
1323
|
upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = self.save_aws + result_json_name, input_path = self.save_path + result_json_name, aws_credentials = self.aws_credentials)
|
|
921
|
-
|
|
1324
|
+
if self.return_figs:
|
|
1325
|
+
return fig
|
|
1326
|
+
|
|
922
1327
|
def plot_hmm_analysis_logger(data_frame,test_data_size, save_path = False, show_plot = True):
|
|
923
|
-
|
|
1328
|
+
'''
|
|
1329
|
+
display box plots train and test of hmm state returns
|
|
1330
|
+
|
|
1331
|
+
Parameters:
|
|
1332
|
+
data_frame (pd.DataFrame): asset data
|
|
1333
|
+
test_data_size (int): test data size, the remaining is training data
|
|
1334
|
+
save_path (str): path/to/save/
|
|
1335
|
+
show_plot (boolean): if true, display plot
|
|
1336
|
+
|
|
1337
|
+
Returns:
|
|
1338
|
+
None
|
|
1339
|
+
'''
|
|
924
1340
|
df = data_frame
|
|
925
1341
|
df_ = df[['Date','hmm_feature','Close',"chain_return"]].sort_values('Date')
|
|
926
1342
|
fig, axs = plt.subplots(1,2,figsize=(10,4))
|
|
@@ -934,7 +1350,18 @@ def plot_hmm_analysis_logger(data_frame,test_data_size, save_path = False, show_
|
|
|
934
1350
|
plt.close()
|
|
935
1351
|
|
|
936
1352
|
def plot_hmm_tsanalysis_logger(data_frame, test_data_size,save_path = False, show_plot = True):
|
|
937
|
-
|
|
1353
|
+
'''
|
|
1354
|
+
display time series hmm state analisys
|
|
1355
|
+
|
|
1356
|
+
Parameters:
|
|
1357
|
+
data_frame (pd.DataFrame): asset data
|
|
1358
|
+
test_data_size (int): test data size, the remaining is training data
|
|
1359
|
+
save_path (str): path/to/save/
|
|
1360
|
+
show_plot (boolean): if true, display plot
|
|
1361
|
+
|
|
1362
|
+
Returns:
|
|
1363
|
+
None
|
|
1364
|
+
'''
|
|
938
1365
|
df = data_frame
|
|
939
1366
|
df_ = df[['Date','hmm_feature','Close',"chain_return"]].sort_values('Date')
|
|
940
1367
|
states = list(df_['hmm_feature'].unique())
|
|
@@ -961,7 +1388,20 @@ def plot_hmm_tsanalysis_logger(data_frame, test_data_size,save_path = False, sho
|
|
|
961
1388
|
plt.close()
|
|
962
1389
|
|
|
963
1390
|
def extract_data_traintest(object_stock,features_to_search,configs, target_configs, window_analysis = False, drop_nan= True):
|
|
964
|
-
|
|
1391
|
+
'''
|
|
1392
|
+
code snippet that execute object_stock or stock_eda_panel to get features
|
|
1393
|
+
|
|
1394
|
+
Parameters:
|
|
1395
|
+
object_stock (object): stock_eda_panel object
|
|
1396
|
+
features_to_search (list): list of features
|
|
1397
|
+
configs (dict): asset configurations
|
|
1398
|
+
target_configs (dict): target configurations
|
|
1399
|
+
window_analysis (int): take a sample size data
|
|
1400
|
+
drop_nan (boolean): remove nans from the data
|
|
1401
|
+
|
|
1402
|
+
Returns:
|
|
1403
|
+
object_stock (obj): object_stock with features and signals
|
|
1404
|
+
'''
|
|
965
1405
|
object_stock.get_data()
|
|
966
1406
|
object_stock.volatility_analysis(**configs['volatility']['config_params'], plot = False, save_features = False)
|
|
967
1407
|
target_params_up = target_configs['params_up']
|
|
@@ -972,7 +1412,26 @@ def extract_data_traintest(object_stock,features_to_search,configs, target_confi
|
|
|
972
1412
|
arguments_to_use = configs[feature_name]['config_params']
|
|
973
1413
|
method_to_use = configs[feature_name]['method']
|
|
974
1414
|
getattr(object_stock, method_to_use)(**arguments_to_use, plot = False, save_features = False)
|
|
975
|
-
|
|
1415
|
+
if method_to_use not in ['minmax_pricefeature']:
|
|
1416
|
+
object_stock.produce_order_features(feature_name)
|
|
1417
|
+
object_stock.get_order_feature_nosignal(feature_name)
|
|
1418
|
+
last_signal_featlist = configs.get('custom_transformations',{}).get('compute_last_signal', False)
|
|
1419
|
+
if last_signal_featlist:
|
|
1420
|
+
last_signal_featlist = last_signal_featlist
|
|
1421
|
+
last_signal_featlist = last_signal_featlist.split('//')
|
|
1422
|
+
if feature_name in last_signal_featlist:
|
|
1423
|
+
object_stock.compute_last_signal(feature_name, False)
|
|
1424
|
+
volatility_features = configs.get('custom_transformations',{}).get('volatility_features', False)
|
|
1425
|
+
if volatility_features:
|
|
1426
|
+
for al in volatility_features:
|
|
1427
|
+
object_stock.lag_log_return(lags = al, feature="Close", feature_name=f"asset_{al}_logreturn")
|
|
1428
|
+
object_stock.produce_log_volatility(trad_days=al,feature=f"asset_{al}_logreturn",feature_name=f"asset_{al}_volatility")
|
|
1429
|
+
market_interaction_features = configs.get('custom_transformations',{}).get('market_interaction_features', False)
|
|
1430
|
+
if market_interaction_features:
|
|
1431
|
+
for stage in market_interaction_features.keys():
|
|
1432
|
+
method_to_use = market_interaction_features.get(stage).get("method")
|
|
1433
|
+
arguments_to_use = market_interaction_features.get(stage).get("parameters")
|
|
1434
|
+
getattr(object_stock, method_to_use)(**arguments_to_use)
|
|
976
1435
|
# geting targets
|
|
977
1436
|
object_stock.get_categorical_targets(**target_params_up)
|
|
978
1437
|
object_stock.df = object_stock.df.drop(columns = ['target_down']).rename(columns = {'target_up':'target_up_save'})
|
|
@@ -987,7 +1446,19 @@ def extract_data_traintest(object_stock,features_to_search,configs, target_confi
|
|
|
987
1446
|
return object_stock
|
|
988
1447
|
|
|
989
1448
|
def produce_simple_ts_from_model(stock_code, configs, n_days = 2000 , window_scope = '5y'):
|
|
990
|
-
|
|
1449
|
+
'''
|
|
1450
|
+
display dashboard analysis of a given asset
|
|
1451
|
+
|
|
1452
|
+
Parameters:
|
|
1453
|
+
stock_code (str): asset name
|
|
1454
|
+
configs (dict): asset configurations
|
|
1455
|
+
n_days (int): data size
|
|
1456
|
+
window_scope (str): window data size
|
|
1457
|
+
|
|
1458
|
+
Returns:
|
|
1459
|
+
fig (obj): plotly dashboard
|
|
1460
|
+
df (pd.DataFrame): result asset dataset
|
|
1461
|
+
'''
|
|
991
1462
|
## getting data
|
|
992
1463
|
volat_args = {'lags': 3, 'trad_days': 15, 'window_log_return': 10}
|
|
993
1464
|
|
|
@@ -1038,7 +1509,7 @@ def produce_simple_ts_from_model(stock_code, configs, n_days = 2000 , window_sco
|
|
|
1038
1509
|
for signal_low in signal_low_list:
|
|
1039
1510
|
if signal_low in df.columns:
|
|
1040
1511
|
fig.add_trace(go.Scatter(x=df['Date'], y=np.where(df[signal_low] == 1, df[norm_feat], np.nan),showlegend= False, mode='markers', marker_color = 'red'),col = 1, row = row_i)
|
|
1041
|
-
|
|
1512
|
+
fig.add_hline(y=0, line_width=2, line_dash="dash", line_color="grey",col = 1, row = row_i)
|
|
1042
1513
|
fig.update_layout(height=height_plot, width=1600, title_text = f'asset plot and signals: {stock_code}')
|
|
1043
1514
|
|
|
1044
1515
|
del object_stock
|
|
@@ -1046,17 +1517,21 @@ def produce_simple_ts_from_model(stock_code, configs, n_days = 2000 , window_sco
|
|
|
1046
1517
|
return fig, df
|
|
1047
1518
|
|
|
1048
1519
|
def save_edge_model(data, save_path = False, save_aws = False, show_result = False, aws_credentials = False):
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1520
|
+
'''
|
|
1521
|
+
get latest edge execution and edge probability
|
|
1522
|
+
|
|
1523
|
+
Parameters:
|
|
1524
|
+
data (pd.DataFrame): asset data
|
|
1525
|
+
model_name (str): model name
|
|
1526
|
+
ticket_name (str): name of the asset
|
|
1527
|
+
save_path (str): local path for saving e.g r'C:/path/to/the/file/'
|
|
1528
|
+
save_aws (str): remote key in s3 bucket path e.g. 'path/to/file/'
|
|
1529
|
+
show_results (bool): if true, display results
|
|
1530
|
+
aws_credentials (dict): aws credentials
|
|
1531
|
+
|
|
1532
|
+
Returns:
|
|
1533
|
+
None
|
|
1534
|
+
'''
|
|
1060
1535
|
today = datetime.datetime.today().strftime('%Y-%m-%d')
|
|
1061
1536
|
|
|
1062
1537
|
curent_edge = (
|
|
@@ -1079,10 +1554,25 @@ def save_edge_model(data, save_path = False, save_aws = False, show_result = Fal
|
|
|
1079
1554
|
if show_result:
|
|
1080
1555
|
print(curent_edge)
|
|
1081
1556
|
|
|
1557
|
+
## this function is going to be split and deprecated
|
|
1082
1558
|
def create_feature_edge(model, data,feature_name, threshold, target_variables):
|
|
1083
|
-
|
|
1559
|
+
'''
|
|
1560
|
+
get latest edge execution and edge probability
|
|
1561
|
+
|
|
1562
|
+
Parameters:
|
|
1563
|
+
model (obj): edge model artifact
|
|
1564
|
+
data (pd.DataFrame): asset data
|
|
1565
|
+
feature_name (str): edge feature name
|
|
1566
|
+
threshold (float): edge threshold
|
|
1567
|
+
target_variables (list): names of the target columns
|
|
1568
|
+
|
|
1569
|
+
Returns:
|
|
1570
|
+
result_df (pd.DataFrame): result dataframe with edges
|
|
1571
|
+
'''
|
|
1084
1572
|
label_prediction = ['proba_'+x for x in target_variables]
|
|
1085
1573
|
predictions = model.predict_proba(data)
|
|
1574
|
+
if isinstance(predictions, list):
|
|
1575
|
+
predictions = np.array([ x[:,1].T for x in predictions]).T
|
|
1086
1576
|
predictions = pd.DataFrame(predictions, columns = label_prediction, index = data.index)
|
|
1087
1577
|
|
|
1088
1578
|
result_df = pd.concat([data, predictions], axis=1)
|
|
@@ -1095,4 +1585,57 @@ def create_feature_edge(model, data,feature_name, threshold, target_variables):
|
|
|
1095
1585
|
result_df[f'signal_{type_use}_{feature_name}'] = np.where(result_df[pred_col] >= threshold,1,0)
|
|
1096
1586
|
result_df[f'acc_{type_use}_{feature_name}'] = np.where(result_df[f'signal_{type_use}_{feature_name}'] == result_df[pred_col.replace('proba_','')],1,0)
|
|
1097
1587
|
|
|
1098
|
-
return result_df
|
|
1588
|
+
return result_df
|
|
1589
|
+
|
|
1590
|
+
def produce_probas(model,data, target_variables):
|
|
1591
|
+
"""
|
|
1592
|
+
produce probabilities given a model
|
|
1593
|
+
|
|
1594
|
+
Parameters:
|
|
1595
|
+
model (obj): edge model artifact
|
|
1596
|
+
data (pd.DataFrame): asset data
|
|
1597
|
+
target_variables (list): names of the target columns
|
|
1598
|
+
|
|
1599
|
+
Returns:
|
|
1600
|
+
result_df (pd.DataFrame): result dataframe with edges
|
|
1601
|
+
label_prediction (list): list of resulting label columns
|
|
1602
|
+
"""
|
|
1603
|
+
label_prediction = ['proba_'+x for x in target_variables]
|
|
1604
|
+
predictions = model.predict_proba(data)
|
|
1605
|
+
if isinstance(predictions, list):
|
|
1606
|
+
predictions = np.array([ x[:,1].T for x in predictions]).T
|
|
1607
|
+
predictions = pd.DataFrame(predictions, columns = label_prediction, index = data.index)
|
|
1608
|
+
result_df = pd.concat([data, predictions], axis=1)
|
|
1609
|
+
result_df = result_df[['Date'] + target_variables + label_prediction]
|
|
1610
|
+
|
|
1611
|
+
return result_df, label_prediction
|
|
1612
|
+
|
|
1613
|
+
def produce_signals(result_df, feature_name, threshold, label_prediction):
|
|
1614
|
+
"""
|
|
1615
|
+
produce signals from probabilities
|
|
1616
|
+
|
|
1617
|
+
Parameters:
|
|
1618
|
+
result_df (pd.DataFrame): asset data with probabilities
|
|
1619
|
+
feature_name (str): edge feature name
|
|
1620
|
+
threshold (float): edge threshold
|
|
1621
|
+
label_prediction (list): list of resulting label columns
|
|
1622
|
+
|
|
1623
|
+
Returns:
|
|
1624
|
+
result_df (pd.DataFrame): result dataframe with edges and signals
|
|
1625
|
+
"""
|
|
1626
|
+
for pred_col in label_prediction:
|
|
1627
|
+
type_use = 'low'
|
|
1628
|
+
if 'down' in pred_col:
|
|
1629
|
+
type_use = 'up'
|
|
1630
|
+
|
|
1631
|
+
result_df[f'signal_{type_use}_{feature_name}'] = np.where(result_df[pred_col] >= threshold,1,0)
|
|
1632
|
+
result_df[f'acc_{type_use}_{feature_name}'] = np.where(result_df[f'signal_{type_use}_{feature_name}'] == result_df[pred_col.replace('proba_','')],1,0)
|
|
1633
|
+
|
|
1634
|
+
return result_df
|
|
1635
|
+
|
|
1636
|
+
def clean_cols(data, patterns):
|
|
1637
|
+
drop_cols = list()
|
|
1638
|
+
for pattern in patterns:
|
|
1639
|
+
drop_cols = drop_cols + [ x for x in data.columns if pattern in x]
|
|
1640
|
+
data = data.drop(columns = drop_cols)
|
|
1641
|
+
return data
|