virgo-modules 0.0.88__py3-none-any.whl → 0.0.90__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of virgo-modules might be problematic. Click here for more details.
- virgo_modules/src/edge_utils.py +14 -3
- virgo_modules/src/ticketer_source.py +104 -2
- {virgo_modules-0.0.88.dist-info → virgo_modules-0.0.90.dist-info}/METADATA +1 -1
- {virgo_modules-0.0.88.dist-info → virgo_modules-0.0.90.dist-info}/RECORD +7 -7
- {virgo_modules-0.0.88.dist-info → virgo_modules-0.0.90.dist-info}/LICENSE +0 -0
- {virgo_modules-0.0.88.dist-info → virgo_modules-0.0.90.dist-info}/WHEEL +0 -0
- {virgo_modules-0.0.88.dist-info → virgo_modules-0.0.90.dist-info}/top_level.txt +0 -0
virgo_modules/src/edge_utils.py
CHANGED
|
@@ -10,7 +10,7 @@ from virgo_modules.src.ticketer_source import FeatureSelector
|
|
|
10
10
|
from feature_engine.discretisation import EqualWidthDiscretiser
|
|
11
11
|
from feature_engine.datetime import DatetimeFeatures
|
|
12
12
|
|
|
13
|
-
from .ticketer_source import VirgoWinsorizerFeature, InverseHyperbolicSine
|
|
13
|
+
from .ticketer_source import VirgoWinsorizerFeature, InverseHyperbolicSine, FeaturesEntropy
|
|
14
14
|
|
|
15
15
|
class produce_model_wrapper:
|
|
16
16
|
"""
|
|
@@ -90,8 +90,8 @@ class produce_model_wrapper:
|
|
|
90
90
|
self.model = model
|
|
91
91
|
self.pipe_transform = pipe
|
|
92
92
|
self.pipeline = Pipeline([('pipe_transform',self.pipe_transform), ('model',self.model)])
|
|
93
|
-
self.features_to_model = self.pipe_transform.fit_transform(self.X_train).columns
|
|
94
93
|
self.pipeline.fit(self.X_train, self.y_train)
|
|
94
|
+
self.features_to_model = self.pipeline[:-1].transform(self.X_train).columns
|
|
95
95
|
|
|
96
96
|
class register_results():
|
|
97
97
|
"""
|
|
@@ -217,6 +217,7 @@ def data_processing_pipeline_classifier(
|
|
|
217
217
|
bins_discretize = 10, correlation = 0.85, fillna = True,
|
|
218
218
|
invhypervolsin_features = False,
|
|
219
219
|
date_features_list = False,
|
|
220
|
+
entropy_set_list = False,
|
|
220
221
|
pipeline_order = 'selector//winzorizer//discretizer//median_inputer//drop//correlation'
|
|
221
222
|
):
|
|
222
223
|
|
|
@@ -233,6 +234,7 @@ def data_processing_pipeline_classifier(
|
|
|
233
234
|
fillna (boolean): if true to fill na features
|
|
234
235
|
invhypervolsin_features (list): list of features to apply inverse hyperbolic sine
|
|
235
236
|
date_features_list (list): list of features to compute from Date field. (list of features from feature_engine)
|
|
237
|
+
entropy_set_list (list): list of dictionaries that contains features and targets to compute entropy
|
|
236
238
|
pipeline_order (str): custom pipeline order eg. selector//winzorizer//discretizer//median_inputer//drop//correlation
|
|
237
239
|
Returns:
|
|
238
240
|
pipe (obj): pipeline object
|
|
@@ -245,7 +247,15 @@ def data_processing_pipeline_classifier(
|
|
|
245
247
|
median_imputer_pipe = [('median_imputer', MeanMedianImputer())] if fillna else []
|
|
246
248
|
invhypersin_pipe = [('invhypervolsin scaler', InverseHyperbolicSine(features = invhypervolsin_features))] if invhypervolsin_features else []
|
|
247
249
|
datetimeFeatures_pipe = [('date features', DatetimeFeatures(features_to_extract = date_features_list, variables = 'Date', drop_original = False))] if date_features_list else []
|
|
248
|
-
|
|
250
|
+
|
|
251
|
+
entropy_pipe = list()
|
|
252
|
+
if entropy_set_list:
|
|
253
|
+
for setx_ in entropy_set_list:
|
|
254
|
+
setx = setx_['set'].split('//')
|
|
255
|
+
target_ = setx_['target']
|
|
256
|
+
subpipe_name = '_'.join(setx) + 'entropy'
|
|
257
|
+
entropy_pipe.append((subpipe_name, FeaturesEntropy(features = setx, target = target_)))
|
|
258
|
+
|
|
249
259
|
pipe_dictionary = {
|
|
250
260
|
'selector': select_pipe,
|
|
251
261
|
'winzorizer':winzorizer_pipe,
|
|
@@ -255,6 +265,7 @@ def data_processing_pipeline_classifier(
|
|
|
255
265
|
'median_inputer':median_imputer_pipe,
|
|
256
266
|
'arcsinh_scaler': invhypersin_pipe,
|
|
257
267
|
'date_features': datetimeFeatures_pipe,
|
|
268
|
+
'entropy_features' : entropy_pipe,
|
|
258
269
|
}
|
|
259
270
|
|
|
260
271
|
pipeline_steps = pipeline_order.split('//')
|
|
@@ -147,6 +147,109 @@ class FeatureSelector(BaseEstimator, TransformerMixin):
|
|
|
147
147
|
def transform(self, X, y=None):
|
|
148
148
|
return X[self.columns]
|
|
149
149
|
|
|
150
|
+
class FeaturesEntropy(BaseEstimator, TransformerMixin):
|
|
151
|
+
"""
|
|
152
|
+
Class that creates a feature that calculate entropy for a given feature classes, but it might get some leackeage in the training set.
|
|
153
|
+
this class is compatible with scikitlearn pipeline
|
|
154
|
+
|
|
155
|
+
Attributes
|
|
156
|
+
----------
|
|
157
|
+
columns : list
|
|
158
|
+
list of features to select
|
|
159
|
+
entropy_map: pd.DataFrame
|
|
160
|
+
dataframe of the map with the entropies per class
|
|
161
|
+
perc: float
|
|
162
|
+
percentage of the dates using for calculate the entropy map
|
|
163
|
+
|
|
164
|
+
Methods
|
|
165
|
+
-------
|
|
166
|
+
fit(additional="", X=DataFrame, y=None):
|
|
167
|
+
fit transformation.
|
|
168
|
+
transform(X=DataFrame, y=None):
|
|
169
|
+
apply feature transformation
|
|
170
|
+
"""
|
|
171
|
+
|
|
172
|
+
def __init__(self, features, target, feature_name = None, feature_type = 'discrete', perc = 0.5, default_null = 0.99):
|
|
173
|
+
|
|
174
|
+
self.features = features
|
|
175
|
+
self.feature_type = feature_type
|
|
176
|
+
self.target = target
|
|
177
|
+
self.perc = perc
|
|
178
|
+
self.default_null = default_null
|
|
179
|
+
|
|
180
|
+
if not feature_name:
|
|
181
|
+
self.feature_name = '_'.join(features)
|
|
182
|
+
self.feature_name = self.feature_name + '_' + target + '_' + feature_type
|
|
183
|
+
else:
|
|
184
|
+
self.feature_name = feature_name
|
|
185
|
+
|
|
186
|
+
def fit(self, X, y=None):
|
|
187
|
+
|
|
188
|
+
unique_dates = list(X['Date'].unique())
|
|
189
|
+
unique_dates.sort()
|
|
190
|
+
|
|
191
|
+
total_length = len(unique_dates)
|
|
192
|
+
cut = int(round(total_length*self.perc,0))
|
|
193
|
+
train_dates = unique_dates[:cut]
|
|
194
|
+
max_train_date = max(train_dates)
|
|
195
|
+
|
|
196
|
+
X_ = X[X['Date'] <= max_train_date].copy()
|
|
197
|
+
df = X_.join(y, how = 'left')
|
|
198
|
+
|
|
199
|
+
column_list = [f'{self.feature_type}_signal_{colx}' for colx in self.features]
|
|
200
|
+
|
|
201
|
+
df_aggr = (
|
|
202
|
+
df
|
|
203
|
+
.groupby(column_list, as_index = False)
|
|
204
|
+
.apply(
|
|
205
|
+
lambda x: pd.Series(
|
|
206
|
+
dict(
|
|
207
|
+
counts = x[self.target].count(),
|
|
208
|
+
trues=(x[self.target] == 1).sum(),
|
|
209
|
+
falses=(x[self.target] == 0).sum(),
|
|
210
|
+
)
|
|
211
|
+
)
|
|
212
|
+
)
|
|
213
|
+
.assign(
|
|
214
|
+
trues_rate=lambda x: x['trues'] / x['counts']
|
|
215
|
+
)
|
|
216
|
+
.assign(
|
|
217
|
+
falses_rate=lambda x: x['falses'] / x['counts']
|
|
218
|
+
)
|
|
219
|
+
.assign(
|
|
220
|
+
log2_trues = lambda x: np.log2(1/x['trues_rate'])
|
|
221
|
+
)
|
|
222
|
+
.assign(
|
|
223
|
+
log2_falses = lambda x: np.log2(1/x['falses_rate'])
|
|
224
|
+
)
|
|
225
|
+
.assign(
|
|
226
|
+
comp1 = lambda x: x['trues_rate']*x['log2_trues']
|
|
227
|
+
)
|
|
228
|
+
.assign(
|
|
229
|
+
comp2 = lambda x: x['falses_rate']*x['log2_falses']
|
|
230
|
+
)
|
|
231
|
+
.assign(
|
|
232
|
+
class_entropy = lambda x: np.round(x['comp1']+x['comp2'],3)
|
|
233
|
+
)
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
self.column_list = column_list
|
|
237
|
+
self.entropy_map = (
|
|
238
|
+
df_aggr
|
|
239
|
+
[column_list+['class_entropy']]
|
|
240
|
+
.rename(columns = {'class_entropy': self.feature_name})
|
|
241
|
+
.copy()
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
del df, df_aggr, X_
|
|
245
|
+
return self
|
|
246
|
+
|
|
247
|
+
def transform(self, X, y=None):
|
|
248
|
+
|
|
249
|
+
X = X.join(self.entropy_map.set_index(self.column_list), on=self.column_list, how = 'left')
|
|
250
|
+
X[self.feature_name] = X[self.feature_name].fillna(self.default_null)
|
|
251
|
+
return X
|
|
252
|
+
|
|
150
253
|
def sharpe_ratio(return_series):
|
|
151
254
|
|
|
152
255
|
'''
|
|
@@ -2495,9 +2598,8 @@ class produce_model:
|
|
|
2495
2598
|
self.model = model
|
|
2496
2599
|
self.pipe_transform = pipe
|
|
2497
2600
|
self.pipeline = Pipeline([('pipe_transform',self.pipe_transform), ('model',self.model)])
|
|
2498
|
-
self.features_to_model = self.pipe_transform.fit_transform(self.X_train).columns
|
|
2499
2601
|
self.pipeline.fit(self.X_train, self.y_train)
|
|
2500
|
-
|
|
2602
|
+
self.features_to_model = self.pipeline[:-1].transform(self.X_train).columns
|
|
2501
2603
|
|
|
2502
2604
|
class hmm_feature_selector():
|
|
2503
2605
|
"""
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
virgo_modules/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
2
|
virgo_modules/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
3
|
virgo_modules/src/aws_utils.py,sha256=q0l7D7ofo09Lu1QQjv-esheQ06uiSy1Pdq3xMul8zvk,2571
|
|
4
|
-
virgo_modules/src/edge_utils.py,sha256=
|
|
4
|
+
virgo_modules/src/edge_utils.py,sha256=i3Hm3fO-QA-u17jDpnRodLLILMWZ2VTMEkMKijdGKLg,14287
|
|
5
5
|
virgo_modules/src/pull_artifacts.py,sha256=5OPrgR7pcMSdpbevDRhf0ebk7g7ZRjff4NpTIIWAKjE,1989
|
|
6
6
|
virgo_modules/src/re_utils.py,sha256=ndPUW3F0QkljtKLR1dqtBm2I2LtceduSgLRIk3HszWk,72244
|
|
7
|
-
virgo_modules/src/ticketer_source.py,sha256=
|
|
8
|
-
virgo_modules-0.0.
|
|
9
|
-
virgo_modules-0.0.
|
|
10
|
-
virgo_modules-0.0.
|
|
11
|
-
virgo_modules-0.0.
|
|
12
|
-
virgo_modules-0.0.
|
|
7
|
+
virgo_modules/src/ticketer_source.py,sha256=30xCmfL16SHMPQOs4qKsKSfvfdfv-9IkYY4X9gJgx70,150116
|
|
8
|
+
virgo_modules-0.0.90.dist-info/LICENSE,sha256=pNgFyCYgmimaw0o6V20JupZLROycAnOA_HDDh1tX2V4,1097
|
|
9
|
+
virgo_modules-0.0.90.dist-info/METADATA,sha256=6KCZW4HK_io_AsQjBV733cVNeNlyRKqJ6MdFCFdmTWY,1429
|
|
10
|
+
virgo_modules-0.0.90.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
|
|
11
|
+
virgo_modules-0.0.90.dist-info/top_level.txt,sha256=ZjI-qEkDtT-8mFwGAWnXfqPOKEGlIhWRW1es1VyXc60,14
|
|
12
|
+
virgo_modules-0.0.90.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|