virgo-modules 0.1.3__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of virgo-modules might be problematic. Click here for more details.
- virgo_modules/src/edge_utils.py +1 -2
- virgo_modules/src/hmm_utils.py +492 -0
- virgo_modules/src/ticketer_source.py +12 -837
- virgo_modules/src/transformer_utils.py +250 -0
- {virgo_modules-0.1.3.dist-info → virgo_modules-0.2.2.dist-info}/METADATA +1 -1
- virgo_modules-0.2.2.dist-info/RECORD +15 -0
- virgo_modules-0.1.3.dist-info/RECORD +0 -13
- {virgo_modules-0.1.3.dist-info → virgo_modules-0.2.2.dist-info}/LICENSE +0 -0
- {virgo_modules-0.1.3.dist-info → virgo_modules-0.2.2.dist-info}/WHEEL +0 -0
- {virgo_modules-0.1.3.dist-info → virgo_modules-0.2.2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
from sklearn.base import BaseEstimator, TransformerMixin
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
class InverseHyperbolicSine(BaseEstimator, TransformerMixin):
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
Class that applies inverse hyperbolic sine for feature transformation.
|
|
9
|
+
this class is compatible with scikitlearn pipeline
|
|
10
|
+
|
|
11
|
+
Attributes
|
|
12
|
+
----------
|
|
13
|
+
features : list
|
|
14
|
+
list of features to apply the transformation
|
|
15
|
+
prefix : str
|
|
16
|
+
prefix for the new features. is '' the features are overwrite
|
|
17
|
+
|
|
18
|
+
Methods
|
|
19
|
+
-------
|
|
20
|
+
fit(additional="", X=DataFrame, y=None):
|
|
21
|
+
fit transformation.
|
|
22
|
+
transform(X=DataFrame, y=None):
|
|
23
|
+
apply feature transformation
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(self, features, prefix = ''):
|
|
27
|
+
self.features = features
|
|
28
|
+
self.prefix = prefix
|
|
29
|
+
|
|
30
|
+
def fit(self, X, y=None):
|
|
31
|
+
return self
|
|
32
|
+
|
|
33
|
+
def transform(self, X, y=None):
|
|
34
|
+
for feature in self.features:
|
|
35
|
+
X[f'{self.prefix}{feature}'] = np.arcsinh(X[feature])
|
|
36
|
+
return X
|
|
37
|
+
|
|
38
|
+
class VirgoWinsorizerFeature(BaseEstimator, TransformerMixin):
|
|
39
|
+
|
|
40
|
+
"""
|
|
41
|
+
Class that applies winsorirization of a feature for feature transformation.
|
|
42
|
+
this class is compatible with scikitlearn pipeline
|
|
43
|
+
|
|
44
|
+
Attributes
|
|
45
|
+
----------
|
|
46
|
+
feature_configs : dict
|
|
47
|
+
dictionary of features and configurations. the configuration has high and low limits per feature
|
|
48
|
+
|
|
49
|
+
Methods
|
|
50
|
+
-------
|
|
51
|
+
fit(additional="", X=DataFrame, y=None):
|
|
52
|
+
fit transformation.
|
|
53
|
+
transform(X=DataFrame, y=None):
|
|
54
|
+
apply feature transformation
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
def __init__(self, feature_configs):
|
|
58
|
+
self.feature_configs = feature_configs
|
|
59
|
+
def fit(self, X, y=None):
|
|
60
|
+
return self
|
|
61
|
+
|
|
62
|
+
def transform(self, X, y=None):
|
|
63
|
+
for feature in self.feature_configs:
|
|
64
|
+
lower = self.feature_configs[feature]['min']
|
|
65
|
+
upper = self.feature_configs[feature]['max']
|
|
66
|
+
X[feature] = np.where( lower > X[feature], lower, X[feature])
|
|
67
|
+
X[feature] = np.where( upper < X[feature], upper, X[feature])
|
|
68
|
+
return X
|
|
69
|
+
|
|
70
|
+
class FeatureSelector(BaseEstimator, TransformerMixin):
|
|
71
|
+
|
|
72
|
+
"""
|
|
73
|
+
Class that applies selection of features.
|
|
74
|
+
this class is compatible with scikitlearn pipeline
|
|
75
|
+
|
|
76
|
+
Attributes
|
|
77
|
+
----------
|
|
78
|
+
columns : list
|
|
79
|
+
list of features to select
|
|
80
|
+
|
|
81
|
+
Methods
|
|
82
|
+
-------
|
|
83
|
+
fit(additional="", X=DataFrame, y=None):
|
|
84
|
+
fit transformation.
|
|
85
|
+
transform(X=DataFrame, y=None):
|
|
86
|
+
apply feature transformation
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
def __init__(self, columns):
|
|
90
|
+
self.columns = columns
|
|
91
|
+
|
|
92
|
+
def fit(self, X, y=None):
|
|
93
|
+
return self
|
|
94
|
+
|
|
95
|
+
def transform(self, X, y=None):
|
|
96
|
+
return X[self.columns]
|
|
97
|
+
|
|
98
|
+
class FeaturesEntropy(BaseEstimator, TransformerMixin):
|
|
99
|
+
"""
|
|
100
|
+
Class that creates a feature that calculate entropy for a given feature classes, but it might get some leackeage in the training set.
|
|
101
|
+
this class is compatible with scikitlearn pipeline
|
|
102
|
+
|
|
103
|
+
Attributes
|
|
104
|
+
----------
|
|
105
|
+
columns : list
|
|
106
|
+
list of features to select
|
|
107
|
+
entropy_map: pd.DataFrame
|
|
108
|
+
dataframe of the map with the entropies per class
|
|
109
|
+
perc: float
|
|
110
|
+
percentage of the dates using for calculate the entropy map
|
|
111
|
+
|
|
112
|
+
Methods
|
|
113
|
+
-------
|
|
114
|
+
fit(additional="", X=DataFrame, y=None):
|
|
115
|
+
fit transformation.
|
|
116
|
+
transform(X=DataFrame, y=None):
|
|
117
|
+
apply feature transformation
|
|
118
|
+
"""
|
|
119
|
+
|
|
120
|
+
def __init__(self, features, target, feature_name = None, feature_type = 'discrete', perc = 0.5, default_null = 0.99):
|
|
121
|
+
|
|
122
|
+
self.features = features
|
|
123
|
+
self.feature_type = feature_type
|
|
124
|
+
self.target = target
|
|
125
|
+
self.perc = perc
|
|
126
|
+
self.default_null = default_null
|
|
127
|
+
|
|
128
|
+
if not feature_name:
|
|
129
|
+
self.feature_name = '_'.join(features)
|
|
130
|
+
self.feature_name = self.feature_name + '_' + target + '_' + feature_type
|
|
131
|
+
else:
|
|
132
|
+
self.feature_name = feature_name
|
|
133
|
+
|
|
134
|
+
def fit(self, X, y=None):
|
|
135
|
+
|
|
136
|
+
unique_dates = list(X['Date'].unique())
|
|
137
|
+
unique_dates.sort()
|
|
138
|
+
|
|
139
|
+
total_length = len(unique_dates)
|
|
140
|
+
cut = int(round(total_length*self.perc,0))
|
|
141
|
+
train_dates = unique_dates[:cut]
|
|
142
|
+
max_train_date = max(train_dates)
|
|
143
|
+
|
|
144
|
+
X_ = X[X['Date'] <= max_train_date].copy()
|
|
145
|
+
df = X_.join(y, how = 'left')
|
|
146
|
+
|
|
147
|
+
column_list = [f'{self.feature_type}_signal_{colx}' for colx in self.features]
|
|
148
|
+
|
|
149
|
+
df_aggr = (
|
|
150
|
+
df
|
|
151
|
+
.groupby(column_list, as_index = False)
|
|
152
|
+
.apply(
|
|
153
|
+
lambda x: pd.Series(
|
|
154
|
+
dict(
|
|
155
|
+
counts = x[self.target].count(),
|
|
156
|
+
trues=(x[self.target] == 1).sum(),
|
|
157
|
+
falses=(x[self.target] == 0).sum(),
|
|
158
|
+
)
|
|
159
|
+
)
|
|
160
|
+
)
|
|
161
|
+
.assign(
|
|
162
|
+
trues_rate=lambda x: x['trues'] / x['counts']
|
|
163
|
+
)
|
|
164
|
+
.assign(
|
|
165
|
+
falses_rate=lambda x: x['falses'] / x['counts']
|
|
166
|
+
)
|
|
167
|
+
.assign(
|
|
168
|
+
log2_trues = lambda x: np.log2(1/x['trues_rate'])
|
|
169
|
+
)
|
|
170
|
+
.assign(
|
|
171
|
+
log2_falses = lambda x: np.log2(1/x['falses_rate'])
|
|
172
|
+
)
|
|
173
|
+
.assign(
|
|
174
|
+
comp1 = lambda x: x['trues_rate']*x['log2_trues']
|
|
175
|
+
)
|
|
176
|
+
.assign(
|
|
177
|
+
comp2 = lambda x: x['falses_rate']*x['log2_falses']
|
|
178
|
+
)
|
|
179
|
+
.assign(
|
|
180
|
+
class_entropy = lambda x: np.round(x['comp1']+x['comp2'],3)
|
|
181
|
+
)
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
self.column_list = column_list
|
|
185
|
+
self.entropy_map = (
|
|
186
|
+
df_aggr
|
|
187
|
+
[column_list+['class_entropy']]
|
|
188
|
+
.rename(columns = {'class_entropy': self.feature_name})
|
|
189
|
+
.copy()
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
del df, df_aggr, X_
|
|
193
|
+
return self
|
|
194
|
+
|
|
195
|
+
def transform(self, X, y=None):
|
|
196
|
+
|
|
197
|
+
X = X.join(self.entropy_map.set_index(self.column_list), on=self.column_list, how = 'left')
|
|
198
|
+
X[self.feature_name] = X[self.feature_name].fillna(self.default_null)
|
|
199
|
+
return X
|
|
200
|
+
|
|
201
|
+
class signal_combiner(BaseEstimator, TransformerMixin):
|
|
202
|
+
|
|
203
|
+
"""
|
|
204
|
+
Class that applies feature combination of binary signals.
|
|
205
|
+
this class is compatible with scikitlearn pipeline
|
|
206
|
+
|
|
207
|
+
...
|
|
208
|
+
|
|
209
|
+
Attributes
|
|
210
|
+
----------
|
|
211
|
+
columns : list
|
|
212
|
+
list of features to select
|
|
213
|
+
drop : boolean
|
|
214
|
+
drop combining features
|
|
215
|
+
prefix_up : str
|
|
216
|
+
up prefix of the base feature
|
|
217
|
+
prefix_low : str
|
|
218
|
+
low prefix of the base feature
|
|
219
|
+
|
|
220
|
+
Methods
|
|
221
|
+
-------
|
|
222
|
+
fit(additional="", X=DataFrame, y=None):
|
|
223
|
+
fit transformation.
|
|
224
|
+
transform(X=DataFrame, y=None):
|
|
225
|
+
apply feature transformation
|
|
226
|
+
"""
|
|
227
|
+
|
|
228
|
+
def __init__(self, columns, drop = True, prefix_up = 'signal_up_', prefix_low = 'signal_low_'):
|
|
229
|
+
self.columns = columns
|
|
230
|
+
self.drop = drop
|
|
231
|
+
self.prefix_up = prefix_up
|
|
232
|
+
self.prefix_low = prefix_low
|
|
233
|
+
|
|
234
|
+
def fit(self, X, y=None):
|
|
235
|
+
return self
|
|
236
|
+
|
|
237
|
+
def transform(self, X, y=None):
|
|
238
|
+
for column in self.columns:
|
|
239
|
+
X['CombSignal_'+column] = np.where(
|
|
240
|
+
X[self.prefix_up + column] == 1,
|
|
241
|
+
1,
|
|
242
|
+
np.where(
|
|
243
|
+
X[self.prefix_low + column] == 1,
|
|
244
|
+
1,
|
|
245
|
+
0
|
|
246
|
+
)
|
|
247
|
+
)
|
|
248
|
+
if self.drop:
|
|
249
|
+
X = X.drop(columns = [self.prefix_up + column, self.prefix_low + column])
|
|
250
|
+
return X
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
virgo_modules/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
virgo_modules/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
+
virgo_modules/src/aws_utils.py,sha256=q0l7D7ofo09Lu1QQjv-esheQ06uiSy1Pdq3xMul8zvk,2571
|
|
4
|
+
virgo_modules/src/backtester.py,sha256=OhiWyzDX0PthXGuhChyWUmDN3cLkzVYe95zS4nGtia8,22106
|
|
5
|
+
virgo_modules/src/edge_utils.py,sha256=XN2oEOwADXF9IGNUGx0Ai8B1yDAiU2WDateLnEJh5FE,14243
|
|
6
|
+
virgo_modules/src/hmm_utils.py,sha256=fFWxmh9q3rjiKRHnxNk9k7O4fDrxVxkmp3pbpLvktjc,21116
|
|
7
|
+
virgo_modules/src/pull_artifacts.py,sha256=5OPrgR7pcMSdpbevDRhf0ebk7g7ZRjff4NpTIIWAKjE,1989
|
|
8
|
+
virgo_modules/src/re_utils.py,sha256=05pSVzGKBybPMFGm2wcbYgkSOZs3bZECLjyHvlPlFjM,72490
|
|
9
|
+
virgo_modules/src/ticketer_source.py,sha256=8YInJJF_OzjWXaAuOjAzdaQrgkemmsIpQSTkCZs5VFA,95918
|
|
10
|
+
virgo_modules/src/transformer_utils.py,sha256=LLwKYZRq5hrPVimnq3taD0Lh-q3Bq21fy1I4Icbnxi8,7677
|
|
11
|
+
virgo_modules-0.2.2.dist-info/LICENSE,sha256=pNgFyCYgmimaw0o6V20JupZLROycAnOA_HDDh1tX2V4,1097
|
|
12
|
+
virgo_modules-0.2.2.dist-info/METADATA,sha256=4jz0IHgWzYOrvqW4UnCgHkZHJQWCD9wWWaEm2du9hRY,1428
|
|
13
|
+
virgo_modules-0.2.2.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
|
|
14
|
+
virgo_modules-0.2.2.dist-info/top_level.txt,sha256=ZjI-qEkDtT-8mFwGAWnXfqPOKEGlIhWRW1es1VyXc60,14
|
|
15
|
+
virgo_modules-0.2.2.dist-info/RECORD,,
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
virgo_modules/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
virgo_modules/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
-
virgo_modules/src/aws_utils.py,sha256=q0l7D7ofo09Lu1QQjv-esheQ06uiSy1Pdq3xMul8zvk,2571
|
|
4
|
-
virgo_modules/src/backtester.py,sha256=OhiWyzDX0PthXGuhChyWUmDN3cLkzVYe95zS4nGtia8,22106
|
|
5
|
-
virgo_modules/src/edge_utils.py,sha256=i3Hm3fO-QA-u17jDpnRodLLILMWZ2VTMEkMKijdGKLg,14287
|
|
6
|
-
virgo_modules/src/pull_artifacts.py,sha256=5OPrgR7pcMSdpbevDRhf0ebk7g7ZRjff4NpTIIWAKjE,1989
|
|
7
|
-
virgo_modules/src/re_utils.py,sha256=05pSVzGKBybPMFGm2wcbYgkSOZs3bZECLjyHvlPlFjM,72490
|
|
8
|
-
virgo_modules/src/ticketer_source.py,sha256=fgwF34LJAL_Nr5Pzmp0p5RgHI81-ilRnCXxIBzrfVk4,129045
|
|
9
|
-
virgo_modules-0.1.3.dist-info/LICENSE,sha256=pNgFyCYgmimaw0o6V20JupZLROycAnOA_HDDh1tX2V4,1097
|
|
10
|
-
virgo_modules-0.1.3.dist-info/METADATA,sha256=AZMtEKvf-j0TTyWMXhczfq1rfIXugowm630MiCzCl7s,1428
|
|
11
|
-
virgo_modules-0.1.3.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
|
|
12
|
-
virgo_modules-0.1.3.dist-info/top_level.txt,sha256=ZjI-qEkDtT-8mFwGAWnXfqPOKEGlIhWRW1es1VyXc60,14
|
|
13
|
-
virgo_modules-0.1.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|