virgo-modules 0.2.1__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of virgo-modules might be problematic. Click here for more details.

Files changed (20) hide show
  1. {virgo_modules-0.2.1 → virgo_modules-0.2.2}/PKG-INFO +1 -1
  2. {virgo_modules-0.2.1 → virgo_modules-0.2.2}/setup.py +1 -1
  3. {virgo_modules-0.2.1 → virgo_modules-0.2.2}/virgo_app/virgo_modules/src/edge_utils.py +1 -2
  4. {virgo_modules-0.2.1 → virgo_modules-0.2.2}/virgo_app/virgo_modules/src/hmm_utils.py +54 -2
  5. {virgo_modules-0.2.1 → virgo_modules-0.2.2}/virgo_app/virgo_modules/src/ticketer_source.py +12 -837
  6. virgo_modules-0.2.2/virgo_app/virgo_modules/src/transformer_utils.py +250 -0
  7. {virgo_modules-0.2.1 → virgo_modules-0.2.2}/virgo_app/virgo_modules.egg-info/PKG-INFO +1 -1
  8. {virgo_modules-0.2.1 → virgo_modules-0.2.2}/virgo_app/virgo_modules.egg-info/SOURCES.txt +2 -1
  9. {virgo_modules-0.2.1 → virgo_modules-0.2.2}/LICENSE +0 -0
  10. {virgo_modules-0.2.1 → virgo_modules-0.2.2}/README.md +0 -0
  11. {virgo_modules-0.2.1 → virgo_modules-0.2.2}/setup.cfg +0 -0
  12. {virgo_modules-0.2.1 → virgo_modules-0.2.2}/virgo_app/virgo_modules/__init__.py +0 -0
  13. {virgo_modules-0.2.1 → virgo_modules-0.2.2}/virgo_app/virgo_modules/src/__init__.py +0 -0
  14. {virgo_modules-0.2.1 → virgo_modules-0.2.2}/virgo_app/virgo_modules/src/aws_utils.py +0 -0
  15. {virgo_modules-0.2.1 → virgo_modules-0.2.2}/virgo_app/virgo_modules/src/backtester.py +0 -0
  16. {virgo_modules-0.2.1 → virgo_modules-0.2.2}/virgo_app/virgo_modules/src/pull_artifacts.py +0 -0
  17. {virgo_modules-0.2.1 → virgo_modules-0.2.2}/virgo_app/virgo_modules/src/re_utils.py +0 -0
  18. {virgo_modules-0.2.1 → virgo_modules-0.2.2}/virgo_app/virgo_modules.egg-info/dependency_links.txt +0 -0
  19. {virgo_modules-0.2.1 → virgo_modules-0.2.2}/virgo_app/virgo_modules.egg-info/requires.txt +0 -0
  20. {virgo_modules-0.2.1 → virgo_modules-0.2.2}/virgo_app/virgo_modules.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: virgo_modules
3
- Version: 0.2.1
3
+ Version: 0.2.2
4
4
  Summary: data processing and statistical modeling using stock market data
5
5
  Home-page: https://github.com/miguelmayhem92/virgo_module
6
6
  Author: Miguel Mayhuire
@@ -5,7 +5,7 @@ with open("virgo_app/README.md", "r") as f:
5
5
 
6
6
  setup(
7
7
  name="virgo_modules",
8
- version="0.2.1",
8
+ version="0.2.2",
9
9
  description="data processing and statistical modeling using stock market data",
10
10
  package_dir={"": "virgo_app"},
11
11
  packages=find_packages(where="virgo_app"),
@@ -6,11 +6,10 @@ from sklearn.pipeline import Pipeline
6
6
 
7
7
  from feature_engine.selection import DropFeatures, DropCorrelatedFeatures
8
8
  from feature_engine.imputation import MeanMedianImputer
9
- from virgo_modules.src.ticketer_source import FeatureSelector
10
9
  from feature_engine.discretisation import EqualWidthDiscretiser
11
10
  from feature_engine.datetime import DatetimeFeatures
12
11
 
13
- from .ticketer_source import VirgoWinsorizerFeature, InverseHyperbolicSine, FeaturesEntropy
12
+ from .transformer_utils import VirgoWinsorizerFeature, InverseHyperbolicSine, FeaturesEntropy, FeatureSelector
14
13
 
15
14
  class produce_model_wrapper:
16
15
  """
@@ -2,7 +2,7 @@ from hmmlearn.hmm import GaussianHMM
2
2
 
3
3
  from sklearn.pipeline import Pipeline
4
4
  from feature_engine.imputation import MeanMedianImputer
5
- from virgo_modules.src.ticketer_source import FeatureSelector
5
+ from virgo_modules.src.transformer_utils import FeatureSelector
6
6
  from feature_engine.selection import DropCorrelatedFeatures
7
7
  from sklearn.preprocessing import RobustScaler
8
8
 
@@ -14,7 +14,59 @@ import matplotlib.pyplot as plt
14
14
  import matplotlib.gridspec as gridspec
15
15
  import seaborn as sns; sns.set()
16
16
 
17
- from virgo_modules.src.ticketer_source import FeatureSelector, states_relevance_score
17
+ def states_relevance_score(data, default_benchmark_sd = 0.00003, t_threshold = 2):
18
+ '''
19
+ calculate relevance score and summary report for hmm model
20
+
21
+ Parameters:
22
+ default_benchmark_sd (float): default value to bias SD for t calculation
23
+ t_threshold (float): alpha or z threshold for the normalized score
24
+
25
+ Returns:
26
+ mean_relevance (float): mean relevance score of the states
27
+ cluster_returns (pd.DataFrame): summary report of the analysis
28
+ number_relevant_states (int): number of relevant states
29
+ '''
30
+ ## legnths
31
+ cluster_lengths = data.groupby(['hmm_feature','chain_id'],as_index = False).agg(chain_lenght = ('hmm_chain_order','max'))
32
+ cluster_lengths = cluster_lengths.groupby('hmm_feature').agg(cluster_length_median = ('chain_lenght','median'))
33
+ ## means
34
+ def quantile2(x):
35
+ return x.quantile(0.25)
36
+ def quantile3(x):
37
+ return x.quantile(0.75)
38
+
39
+ cluster_returns = data.groupby('hmm_feature').agg(
40
+ n_uniques = ('chain_id','nunique'),
41
+ n_obs = ('Date','count'),
42
+ cluster_ret_q25 = ('chain_return',quantile2),
43
+ cluster_ret_median = ('chain_return','median'),
44
+ cluster_ret_q75 = ('chain_return',quantile3),
45
+ )
46
+ cluster_returns = cluster_returns.join(cluster_lengths, how = 'left')
47
+ cluster_returns['perc_dispute'] = np.where(
48
+ np.sign(cluster_returns['cluster_ret_q25']) != np.sign(cluster_returns['cluster_ret_q75']),
49
+ 1,0
50
+ )
51
+ cluster_returns['iqr'] = cluster_returns.cluster_ret_q75 - cluster_returns.cluster_ret_q25
52
+ cluster_returns['perc_25'] = abs(cluster_returns.cluster_ret_q25)/cluster_returns['iqr']
53
+ cluster_returns['perc_75'] = abs(cluster_returns.cluster_ret_q75)/cluster_returns['iqr']
54
+ cluster_returns['min_perc'] = cluster_returns[['perc_25','perc_75']].min(axis = 1)
55
+ cluster_returns['min_overlap'] = np.where(cluster_returns['perc_dispute'] == 1,cluster_returns['min_perc'],0)
56
+ cluster_returns['abs_median'] = abs(cluster_returns['cluster_ret_median'])
57
+ cluster_returns = cluster_returns.drop(columns = ['perc_25','perc_75','min_perc'])
58
+
59
+ ## relevance or importance
60
+ # naive aproach
61
+ cluster_returns['relevance'] = cluster_returns['abs_median'] + ( 0.5 - cluster_returns['min_overlap'])
62
+ cluster_returns['t_calc'] = (cluster_returns['cluster_ret_median'] - 0)/(cluster_returns['iqr']/cluster_returns['n_obs'] + default_benchmark_sd/cluster_returns['n_obs'])**(1/2)
63
+ cluster_returns['abs_t_accpted'] = abs(cluster_returns['t_calc'])
64
+ cluster_returns['t_accpted'] = abs(cluster_returns['abs_t_accpted']) > t_threshold
65
+
66
+ mean_relevance = cluster_returns['abs_t_accpted'].mean()
67
+ number_relevant_states = len(cluster_returns[cluster_returns.t_accpted == True])
68
+
69
+ return mean_relevance, cluster_returns, number_relevant_states
18
70
 
19
71
  def create_hmm_derived_features(df, lag_returns):
20
72
  """