virgo-modules 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of virgo-modules might be problematic. Click here for more details.
- virgo_modules/src/edge_utils.py +1 -2
- virgo_modules/src/hmm_utils.py +54 -2
- virgo_modules/src/re_utils.py +4 -1
- virgo_modules/src/ticketer_source.py +12 -837
- virgo_modules/src/transformer_utils.py +250 -0
- {virgo_modules-0.2.1.dist-info → virgo_modules-0.2.3.dist-info}/METADATA +1 -1
- virgo_modules-0.2.3.dist-info/RECORD +15 -0
- virgo_modules-0.2.1.dist-info/RECORD +0 -14
- {virgo_modules-0.2.1.dist-info → virgo_modules-0.2.3.dist-info}/LICENSE +0 -0
- {virgo_modules-0.2.1.dist-info → virgo_modules-0.2.3.dist-info}/WHEEL +0 -0
- {virgo_modules-0.2.1.dist-info → virgo_modules-0.2.3.dist-info}/top_level.txt +0 -0
virgo_modules/src/edge_utils.py
CHANGED
|
@@ -6,11 +6,10 @@ from sklearn.pipeline import Pipeline
|
|
|
6
6
|
|
|
7
7
|
from feature_engine.selection import DropFeatures, DropCorrelatedFeatures
|
|
8
8
|
from feature_engine.imputation import MeanMedianImputer
|
|
9
|
-
from virgo_modules.src.ticketer_source import FeatureSelector
|
|
10
9
|
from feature_engine.discretisation import EqualWidthDiscretiser
|
|
11
10
|
from feature_engine.datetime import DatetimeFeatures
|
|
12
11
|
|
|
13
|
-
from .
|
|
12
|
+
from .transformer_utils import VirgoWinsorizerFeature, InverseHyperbolicSine, FeaturesEntropy, FeatureSelector
|
|
14
13
|
|
|
15
14
|
class produce_model_wrapper:
|
|
16
15
|
"""
|
virgo_modules/src/hmm_utils.py
CHANGED
|
@@ -2,7 +2,7 @@ from hmmlearn.hmm import GaussianHMM
|
|
|
2
2
|
|
|
3
3
|
from sklearn.pipeline import Pipeline
|
|
4
4
|
from feature_engine.imputation import MeanMedianImputer
|
|
5
|
-
from virgo_modules.src.
|
|
5
|
+
from virgo_modules.src.transformer_utils import FeatureSelector
|
|
6
6
|
from feature_engine.selection import DropCorrelatedFeatures
|
|
7
7
|
from sklearn.preprocessing import RobustScaler
|
|
8
8
|
|
|
@@ -14,7 +14,59 @@ import matplotlib.pyplot as plt
|
|
|
14
14
|
import matplotlib.gridspec as gridspec
|
|
15
15
|
import seaborn as sns; sns.set()
|
|
16
16
|
|
|
17
|
-
|
|
17
|
+
def states_relevance_score(data, default_benchmark_sd = 0.00003, t_threshold = 2):
|
|
18
|
+
'''
|
|
19
|
+
calculate relevance score and summary report for hmm model
|
|
20
|
+
|
|
21
|
+
Parameters:
|
|
22
|
+
default_benchmark_sd (float): default value to bias SD for t calculation
|
|
23
|
+
t_threshold (float): alpha or z threshold for the normalized score
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
mean_relevance (float): mean relevance score of the states
|
|
27
|
+
cluster_returns (pd.DataFrame): summary report of the analysis
|
|
28
|
+
number_relevant_states (int): number of relevant states
|
|
29
|
+
'''
|
|
30
|
+
## legnths
|
|
31
|
+
cluster_lengths = data.groupby(['hmm_feature','chain_id'],as_index = False).agg(chain_lenght = ('hmm_chain_order','max'))
|
|
32
|
+
cluster_lengths = cluster_lengths.groupby('hmm_feature').agg(cluster_length_median = ('chain_lenght','median'))
|
|
33
|
+
## means
|
|
34
|
+
def quantile2(x):
|
|
35
|
+
return x.quantile(0.25)
|
|
36
|
+
def quantile3(x):
|
|
37
|
+
return x.quantile(0.75)
|
|
38
|
+
|
|
39
|
+
cluster_returns = data.groupby('hmm_feature').agg(
|
|
40
|
+
n_uniques = ('chain_id','nunique'),
|
|
41
|
+
n_obs = ('Date','count'),
|
|
42
|
+
cluster_ret_q25 = ('chain_return',quantile2),
|
|
43
|
+
cluster_ret_median = ('chain_return','median'),
|
|
44
|
+
cluster_ret_q75 = ('chain_return',quantile3),
|
|
45
|
+
)
|
|
46
|
+
cluster_returns = cluster_returns.join(cluster_lengths, how = 'left')
|
|
47
|
+
cluster_returns['perc_dispute'] = np.where(
|
|
48
|
+
np.sign(cluster_returns['cluster_ret_q25']) != np.sign(cluster_returns['cluster_ret_q75']),
|
|
49
|
+
1,0
|
|
50
|
+
)
|
|
51
|
+
cluster_returns['iqr'] = cluster_returns.cluster_ret_q75 - cluster_returns.cluster_ret_q25
|
|
52
|
+
cluster_returns['perc_25'] = abs(cluster_returns.cluster_ret_q25)/cluster_returns['iqr']
|
|
53
|
+
cluster_returns['perc_75'] = abs(cluster_returns.cluster_ret_q75)/cluster_returns['iqr']
|
|
54
|
+
cluster_returns['min_perc'] = cluster_returns[['perc_25','perc_75']].min(axis = 1)
|
|
55
|
+
cluster_returns['min_overlap'] = np.where(cluster_returns['perc_dispute'] == 1,cluster_returns['min_perc'],0)
|
|
56
|
+
cluster_returns['abs_median'] = abs(cluster_returns['cluster_ret_median'])
|
|
57
|
+
cluster_returns = cluster_returns.drop(columns = ['perc_25','perc_75','min_perc'])
|
|
58
|
+
|
|
59
|
+
## relevance or importance
|
|
60
|
+
# naive aproach
|
|
61
|
+
cluster_returns['relevance'] = cluster_returns['abs_median'] + ( 0.5 - cluster_returns['min_overlap'])
|
|
62
|
+
cluster_returns['t_calc'] = (cluster_returns['cluster_ret_median'] - 0)/(cluster_returns['iqr']/cluster_returns['n_obs'] + default_benchmark_sd/cluster_returns['n_obs'])**(1/2)
|
|
63
|
+
cluster_returns['abs_t_accpted'] = abs(cluster_returns['t_calc'])
|
|
64
|
+
cluster_returns['t_accpted'] = abs(cluster_returns['abs_t_accpted']) > t_threshold
|
|
65
|
+
|
|
66
|
+
mean_relevance = cluster_returns['abs_t_accpted'].mean()
|
|
67
|
+
number_relevant_states = len(cluster_returns[cluster_returns.t_accpted == True])
|
|
68
|
+
|
|
69
|
+
return mean_relevance, cluster_returns, number_relevant_states
|
|
18
70
|
|
|
19
71
|
def create_hmm_derived_features(df, lag_returns):
|
|
20
72
|
"""
|
virgo_modules/src/re_utils.py
CHANGED
|
@@ -760,7 +760,10 @@ def get_data(ticker_name:str, ticket_settings:dict, n_days:int = False, hmm_avai
|
|
|
760
760
|
object_stock.cluster_hmm_analysis( n_clusters = ticket_settings['settings']['hmm']['n_clusters'],
|
|
761
761
|
features_hmm = ticket_settings['settings']['hmm']['features_hmm'],
|
|
762
762
|
test_data_size = ticket_settings['settings']['hmm']['test_data_size'],
|
|
763
|
-
seed = ticket_settings['settings']['hmm']['seed']
|
|
763
|
+
seed = ticket_settings['settings']['hmm']['seed'],
|
|
764
|
+
corr_threshold = ticket_settings['settings']['hmm'].get('corr_threshold',0.75),
|
|
765
|
+
lag_returns_state = ticket_settings['settings']['hmm'].get('lag_returns_state',7),
|
|
766
|
+
)
|
|
764
767
|
|
|
765
768
|
return object_stock
|
|
766
769
|
|