virgo-modules 0.1.3__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of virgo-modules might be problematic. Click here for more details.

@@ -0,0 +1,250 @@
1
+ from sklearn.base import BaseEstimator, TransformerMixin
2
+ import pandas as pd
3
+ import numpy as np
4
+
5
+ class InverseHyperbolicSine(BaseEstimator, TransformerMixin):
6
+
7
+ """
8
+ Class that applies inverse hyperbolic sine for feature transformation.
9
+ this class is compatible with scikitlearn pipeline
10
+
11
+ Attributes
12
+ ----------
13
+ features : list
14
+ list of features to apply the transformation
15
+ prefix : str
16
+ prefix for the new features. is '' the features are overwrite
17
+
18
+ Methods
19
+ -------
20
+ fit(additional="", X=DataFrame, y=None):
21
+ fit transformation.
22
+ transform(X=DataFrame, y=None):
23
+ apply feature transformation
24
+ """
25
+
26
+ def __init__(self, features, prefix = ''):
27
+ self.features = features
28
+ self.prefix = prefix
29
+
30
+ def fit(self, X, y=None):
31
+ return self
32
+
33
+ def transform(self, X, y=None):
34
+ for feature in self.features:
35
+ X[f'{self.prefix}{feature}'] = np.arcsinh(X[feature])
36
+ return X
37
+
38
+ class VirgoWinsorizerFeature(BaseEstimator, TransformerMixin):
39
+
40
+ """
41
+ Class that applies winsorirization of a feature for feature transformation.
42
+ this class is compatible with scikitlearn pipeline
43
+
44
+ Attributes
45
+ ----------
46
+ feature_configs : dict
47
+ dictionary of features and configurations. the configuration has high and low limits per feature
48
+
49
+ Methods
50
+ -------
51
+ fit(additional="", X=DataFrame, y=None):
52
+ fit transformation.
53
+ transform(X=DataFrame, y=None):
54
+ apply feature transformation
55
+ """
56
+
57
+ def __init__(self, feature_configs):
58
+ self.feature_configs = feature_configs
59
+ def fit(self, X, y=None):
60
+ return self
61
+
62
+ def transform(self, X, y=None):
63
+ for feature in self.feature_configs:
64
+ lower = self.feature_configs[feature]['min']
65
+ upper = self.feature_configs[feature]['max']
66
+ X[feature] = np.where( lower > X[feature], lower, X[feature])
67
+ X[feature] = np.where( upper < X[feature], upper, X[feature])
68
+ return X
69
+
70
+ class FeatureSelector(BaseEstimator, TransformerMixin):
71
+
72
+ """
73
+ Class that applies selection of features.
74
+ this class is compatible with scikitlearn pipeline
75
+
76
+ Attributes
77
+ ----------
78
+ columns : list
79
+ list of features to select
80
+
81
+ Methods
82
+ -------
83
+ fit(additional="", X=DataFrame, y=None):
84
+ fit transformation.
85
+ transform(X=DataFrame, y=None):
86
+ apply feature transformation
87
+ """
88
+
89
+ def __init__(self, columns):
90
+ self.columns = columns
91
+
92
+ def fit(self, X, y=None):
93
+ return self
94
+
95
+ def transform(self, X, y=None):
96
+ return X[self.columns]
97
+
98
+ class FeaturesEntropy(BaseEstimator, TransformerMixin):
99
+ """
100
+ Class that creates a feature that calculate entropy for a given feature classes, but it might get some leackeage in the training set.
101
+ this class is compatible with scikitlearn pipeline
102
+
103
+ Attributes
104
+ ----------
105
+ columns : list
106
+ list of features to select
107
+ entropy_map: pd.DataFrame
108
+ dataframe of the map with the entropies per class
109
+ perc: float
110
+ percentage of the dates using for calculate the entropy map
111
+
112
+ Methods
113
+ -------
114
+ fit(additional="", X=DataFrame, y=None):
115
+ fit transformation.
116
+ transform(X=DataFrame, y=None):
117
+ apply feature transformation
118
+ """
119
+
120
+ def __init__(self, features, target, feature_name = None, feature_type = 'discrete', perc = 0.5, default_null = 0.99):
121
+
122
+ self.features = features
123
+ self.feature_type = feature_type
124
+ self.target = target
125
+ self.perc = perc
126
+ self.default_null = default_null
127
+
128
+ if not feature_name:
129
+ self.feature_name = '_'.join(features)
130
+ self.feature_name = self.feature_name + '_' + target + '_' + feature_type
131
+ else:
132
+ self.feature_name = feature_name
133
+
134
+ def fit(self, X, y=None):
135
+
136
+ unique_dates = list(X['Date'].unique())
137
+ unique_dates.sort()
138
+
139
+ total_length = len(unique_dates)
140
+ cut = int(round(total_length*self.perc,0))
141
+ train_dates = unique_dates[:cut]
142
+ max_train_date = max(train_dates)
143
+
144
+ X_ = X[X['Date'] <= max_train_date].copy()
145
+ df = X_.join(y, how = 'left')
146
+
147
+ column_list = [f'{self.feature_type}_signal_{colx}' for colx in self.features]
148
+
149
+ df_aggr = (
150
+ df
151
+ .groupby(column_list, as_index = False)
152
+ .apply(
153
+ lambda x: pd.Series(
154
+ dict(
155
+ counts = x[self.target].count(),
156
+ trues=(x[self.target] == 1).sum(),
157
+ falses=(x[self.target] == 0).sum(),
158
+ )
159
+ )
160
+ )
161
+ .assign(
162
+ trues_rate=lambda x: x['trues'] / x['counts']
163
+ )
164
+ .assign(
165
+ falses_rate=lambda x: x['falses'] / x['counts']
166
+ )
167
+ .assign(
168
+ log2_trues = lambda x: np.log2(1/x['trues_rate'])
169
+ )
170
+ .assign(
171
+ log2_falses = lambda x: np.log2(1/x['falses_rate'])
172
+ )
173
+ .assign(
174
+ comp1 = lambda x: x['trues_rate']*x['log2_trues']
175
+ )
176
+ .assign(
177
+ comp2 = lambda x: x['falses_rate']*x['log2_falses']
178
+ )
179
+ .assign(
180
+ class_entropy = lambda x: np.round(x['comp1']+x['comp2'],3)
181
+ )
182
+ )
183
+
184
+ self.column_list = column_list
185
+ self.entropy_map = (
186
+ df_aggr
187
+ [column_list+['class_entropy']]
188
+ .rename(columns = {'class_entropy': self.feature_name})
189
+ .copy()
190
+ )
191
+
192
+ del df, df_aggr, X_
193
+ return self
194
+
195
+ def transform(self, X, y=None):
196
+
197
+ X = X.join(self.entropy_map.set_index(self.column_list), on=self.column_list, how = 'left')
198
+ X[self.feature_name] = X[self.feature_name].fillna(self.default_null)
199
+ return X
200
+
201
+ class signal_combiner(BaseEstimator, TransformerMixin):
202
+
203
+ """
204
+ Class that applies feature combination of binary signals.
205
+ this class is compatible with scikitlearn pipeline
206
+
207
+ ...
208
+
209
+ Attributes
210
+ ----------
211
+ columns : list
212
+ list of features to select
213
+ drop : boolean
214
+ drop combining features
215
+ prefix_up : str
216
+ up prefix of the base feature
217
+ prefix_low : str
218
+ low prefix of the base feature
219
+
220
+ Methods
221
+ -------
222
+ fit(additional="", X=DataFrame, y=None):
223
+ fit transformation.
224
+ transform(X=DataFrame, y=None):
225
+ apply feature transformation
226
+ """
227
+
228
+ def __init__(self, columns, drop = True, prefix_up = 'signal_up_', prefix_low = 'signal_low_'):
229
+ self.columns = columns
230
+ self.drop = drop
231
+ self.prefix_up = prefix_up
232
+ self.prefix_low = prefix_low
233
+
234
+ def fit(self, X, y=None):
235
+ return self
236
+
237
+ def transform(self, X, y=None):
238
+ for column in self.columns:
239
+ X['CombSignal_'+column] = np.where(
240
+ X[self.prefix_up + column] == 1,
241
+ 1,
242
+ np.where(
243
+ X[self.prefix_low + column] == 1,
244
+ 1,
245
+ 0
246
+ )
247
+ )
248
+ if self.drop:
249
+ X = X.drop(columns = [self.prefix_up + column, self.prefix_low + column])
250
+ return X
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: virgo-modules
3
- Version: 0.1.3
3
+ Version: 0.2.2
4
4
  Summary: data processing and statistical modeling using stock market data
5
5
  Home-page: https://github.com/miguelmayhem92/virgo_module
6
6
  Author: Miguel Mayhuire
@@ -0,0 +1,15 @@
1
+ virgo_modules/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ virgo_modules/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ virgo_modules/src/aws_utils.py,sha256=q0l7D7ofo09Lu1QQjv-esheQ06uiSy1Pdq3xMul8zvk,2571
4
+ virgo_modules/src/backtester.py,sha256=OhiWyzDX0PthXGuhChyWUmDN3cLkzVYe95zS4nGtia8,22106
5
+ virgo_modules/src/edge_utils.py,sha256=XN2oEOwADXF9IGNUGx0Ai8B1yDAiU2WDateLnEJh5FE,14243
6
+ virgo_modules/src/hmm_utils.py,sha256=fFWxmh9q3rjiKRHnxNk9k7O4fDrxVxkmp3pbpLvktjc,21116
7
+ virgo_modules/src/pull_artifacts.py,sha256=5OPrgR7pcMSdpbevDRhf0ebk7g7ZRjff4NpTIIWAKjE,1989
8
+ virgo_modules/src/re_utils.py,sha256=05pSVzGKBybPMFGm2wcbYgkSOZs3bZECLjyHvlPlFjM,72490
9
+ virgo_modules/src/ticketer_source.py,sha256=8YInJJF_OzjWXaAuOjAzdaQrgkemmsIpQSTkCZs5VFA,95918
10
+ virgo_modules/src/transformer_utils.py,sha256=LLwKYZRq5hrPVimnq3taD0Lh-q3Bq21fy1I4Icbnxi8,7677
11
+ virgo_modules-0.2.2.dist-info/LICENSE,sha256=pNgFyCYgmimaw0o6V20JupZLROycAnOA_HDDh1tX2V4,1097
12
+ virgo_modules-0.2.2.dist-info/METADATA,sha256=4jz0IHgWzYOrvqW4UnCgHkZHJQWCD9wWWaEm2du9hRY,1428
13
+ virgo_modules-0.2.2.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
14
+ virgo_modules-0.2.2.dist-info/top_level.txt,sha256=ZjI-qEkDtT-8mFwGAWnXfqPOKEGlIhWRW1es1VyXc60,14
15
+ virgo_modules-0.2.2.dist-info/RECORD,,
@@ -1,13 +0,0 @@
1
- virgo_modules/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- virgo_modules/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- virgo_modules/src/aws_utils.py,sha256=q0l7D7ofo09Lu1QQjv-esheQ06uiSy1Pdq3xMul8zvk,2571
4
- virgo_modules/src/backtester.py,sha256=OhiWyzDX0PthXGuhChyWUmDN3cLkzVYe95zS4nGtia8,22106
5
- virgo_modules/src/edge_utils.py,sha256=i3Hm3fO-QA-u17jDpnRodLLILMWZ2VTMEkMKijdGKLg,14287
6
- virgo_modules/src/pull_artifacts.py,sha256=5OPrgR7pcMSdpbevDRhf0ebk7g7ZRjff4NpTIIWAKjE,1989
7
- virgo_modules/src/re_utils.py,sha256=05pSVzGKBybPMFGm2wcbYgkSOZs3bZECLjyHvlPlFjM,72490
8
- virgo_modules/src/ticketer_source.py,sha256=fgwF34LJAL_Nr5Pzmp0p5RgHI81-ilRnCXxIBzrfVk4,129045
9
- virgo_modules-0.1.3.dist-info/LICENSE,sha256=pNgFyCYgmimaw0o6V20JupZLROycAnOA_HDDh1tX2V4,1097
10
- virgo_modules-0.1.3.dist-info/METADATA,sha256=AZMtEKvf-j0TTyWMXhczfq1rfIXugowm630MiCzCl7s,1428
11
- virgo_modules-0.1.3.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
12
- virgo_modules-0.1.3.dist-info/top_level.txt,sha256=ZjI-qEkDtT-8mFwGAWnXfqPOKEGlIhWRW1es1VyXc60,14
13
- virgo_modules-0.1.3.dist-info/RECORD,,