virgo-modules 0.0.88__py3-none-any.whl → 0.0.90__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of virgo-modules might be problematic. Click here for more details.

@@ -10,7 +10,7 @@ from virgo_modules.src.ticketer_source import FeatureSelector
10
10
  from feature_engine.discretisation import EqualWidthDiscretiser
11
11
  from feature_engine.datetime import DatetimeFeatures
12
12
 
13
- from .ticketer_source import VirgoWinsorizerFeature, InverseHyperbolicSine
13
+ from .ticketer_source import VirgoWinsorizerFeature, InverseHyperbolicSine, FeaturesEntropy
14
14
 
15
15
  class produce_model_wrapper:
16
16
  """
@@ -90,8 +90,8 @@ class produce_model_wrapper:
90
90
  self.model = model
91
91
  self.pipe_transform = pipe
92
92
  self.pipeline = Pipeline([('pipe_transform',self.pipe_transform), ('model',self.model)])
93
- self.features_to_model = self.pipe_transform.fit_transform(self.X_train).columns
94
93
  self.pipeline.fit(self.X_train, self.y_train)
94
+ self.features_to_model = self.pipeline[:-1].transform(self.X_train).columns
95
95
 
96
96
  class register_results():
97
97
  """
@@ -217,6 +217,7 @@ def data_processing_pipeline_classifier(
217
217
  bins_discretize = 10, correlation = 0.85, fillna = True,
218
218
  invhypervolsin_features = False,
219
219
  date_features_list = False,
220
+ entropy_set_list = False,
220
221
  pipeline_order = 'selector//winzorizer//discretizer//median_inputer//drop//correlation'
221
222
  ):
222
223
 
@@ -233,6 +234,7 @@ def data_processing_pipeline_classifier(
233
234
  fillna (boolean): if true to fill na features
234
235
  invhypervolsin_features (list): list of features to apply inverse hyperbolic sine
235
236
  date_features_list (list): list of features to compute from Date field. (list of features from feature_engine)
237
+ entropy_set_list (list): list of dictionaries that contains features and targets to compute entropy
236
238
  pipeline_order (str): custom pipeline order eg. selector//winzorizer//discretizer//median_inputer//drop//correlation
237
239
  Returns:
238
240
  pipe (obj): pipeline object
@@ -245,7 +247,15 @@ def data_processing_pipeline_classifier(
245
247
  median_imputer_pipe = [('median_imputer', MeanMedianImputer())] if fillna else []
246
248
  invhypersin_pipe = [('invhypervolsin scaler', InverseHyperbolicSine(features = invhypervolsin_features))] if invhypervolsin_features else []
247
249
  datetimeFeatures_pipe = [('date features', DatetimeFeatures(features_to_extract = date_features_list, variables = 'Date', drop_original = False))] if date_features_list else []
248
-
250
+
251
+ entropy_pipe = list()
252
+ if entropy_set_list:
253
+ for setx_ in entropy_set_list:
254
+ setx = setx_['set'].split('//')
255
+ target_ = setx_['target']
256
+ subpipe_name = '_'.join(setx) + 'entropy'
257
+ entropy_pipe.append((subpipe_name, FeaturesEntropy(features = setx, target = target_)))
258
+
249
259
  pipe_dictionary = {
250
260
  'selector': select_pipe,
251
261
  'winzorizer':winzorizer_pipe,
@@ -255,6 +265,7 @@ def data_processing_pipeline_classifier(
255
265
  'median_inputer':median_imputer_pipe,
256
266
  'arcsinh_scaler': invhypersin_pipe,
257
267
  'date_features': datetimeFeatures_pipe,
268
+ 'entropy_features' : entropy_pipe,
258
269
  }
259
270
 
260
271
  pipeline_steps = pipeline_order.split('//')
@@ -147,6 +147,109 @@ class FeatureSelector(BaseEstimator, TransformerMixin):
147
147
  def transform(self, X, y=None):
148
148
  return X[self.columns]
149
149
 
150
+ class FeaturesEntropy(BaseEstimator, TransformerMixin):
151
+ """
152
+ Class that creates a feature that calculate entropy for a given feature classes, but it might get some leackeage in the training set.
153
+ this class is compatible with scikitlearn pipeline
154
+
155
+ Attributes
156
+ ----------
157
+ columns : list
158
+ list of features to select
159
+ entropy_map: pd.DataFrame
160
+ dataframe of the map with the entropies per class
161
+ perc: float
162
+ percentage of the dates using for calculate the entropy map
163
+
164
+ Methods
165
+ -------
166
+ fit(additional="", X=DataFrame, y=None):
167
+ fit transformation.
168
+ transform(X=DataFrame, y=None):
169
+ apply feature transformation
170
+ """
171
+
172
+ def __init__(self, features, target, feature_name = None, feature_type = 'discrete', perc = 0.5, default_null = 0.99):
173
+
174
+ self.features = features
175
+ self.feature_type = feature_type
176
+ self.target = target
177
+ self.perc = perc
178
+ self.default_null = default_null
179
+
180
+ if not feature_name:
181
+ self.feature_name = '_'.join(features)
182
+ self.feature_name = self.feature_name + '_' + target + '_' + feature_type
183
+ else:
184
+ self.feature_name = feature_name
185
+
186
+ def fit(self, X, y=None):
187
+
188
+ unique_dates = list(X['Date'].unique())
189
+ unique_dates.sort()
190
+
191
+ total_length = len(unique_dates)
192
+ cut = int(round(total_length*self.perc,0))
193
+ train_dates = unique_dates[:cut]
194
+ max_train_date = max(train_dates)
195
+
196
+ X_ = X[X['Date'] <= max_train_date].copy()
197
+ df = X_.join(y, how = 'left')
198
+
199
+ column_list = [f'{self.feature_type}_signal_{colx}' for colx in self.features]
200
+
201
+ df_aggr = (
202
+ df
203
+ .groupby(column_list, as_index = False)
204
+ .apply(
205
+ lambda x: pd.Series(
206
+ dict(
207
+ counts = x[self.target].count(),
208
+ trues=(x[self.target] == 1).sum(),
209
+ falses=(x[self.target] == 0).sum(),
210
+ )
211
+ )
212
+ )
213
+ .assign(
214
+ trues_rate=lambda x: x['trues'] / x['counts']
215
+ )
216
+ .assign(
217
+ falses_rate=lambda x: x['falses'] / x['counts']
218
+ )
219
+ .assign(
220
+ log2_trues = lambda x: np.log2(1/x['trues_rate'])
221
+ )
222
+ .assign(
223
+ log2_falses = lambda x: np.log2(1/x['falses_rate'])
224
+ )
225
+ .assign(
226
+ comp1 = lambda x: x['trues_rate']*x['log2_trues']
227
+ )
228
+ .assign(
229
+ comp2 = lambda x: x['falses_rate']*x['log2_falses']
230
+ )
231
+ .assign(
232
+ class_entropy = lambda x: np.round(x['comp1']+x['comp2'],3)
233
+ )
234
+ )
235
+
236
+ self.column_list = column_list
237
+ self.entropy_map = (
238
+ df_aggr
239
+ [column_list+['class_entropy']]
240
+ .rename(columns = {'class_entropy': self.feature_name})
241
+ .copy()
242
+ )
243
+
244
+ del df, df_aggr, X_
245
+ return self
246
+
247
+ def transform(self, X, y=None):
248
+
249
+ X = X.join(self.entropy_map.set_index(self.column_list), on=self.column_list, how = 'left')
250
+ X[self.feature_name] = X[self.feature_name].fillna(self.default_null)
251
+ return X
252
+
150
253
  def sharpe_ratio(return_series):
151
254
 
152
255
  '''
@@ -2495,9 +2598,8 @@ class produce_model:
2495
2598
  self.model = model
2496
2599
  self.pipe_transform = pipe
2497
2600
  self.pipeline = Pipeline([('pipe_transform',self.pipe_transform), ('model',self.model)])
2498
- self.features_to_model = self.pipe_transform.fit_transform(self.X_train).columns
2499
2601
  self.pipeline.fit(self.X_train, self.y_train)
2500
-
2602
+ self.features_to_model = self.pipeline[:-1].transform(self.X_train).columns
2501
2603
 
2502
2604
  class hmm_feature_selector():
2503
2605
  """
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: virgo-modules
3
- Version: 0.0.88
3
+ Version: 0.0.90
4
4
  Summary: data processing and statistical modeling using stock market data
5
5
  Home-page: https://github.com/miguelmayhem92/virgo_module
6
6
  Author: Miguel Mayhuire
@@ -1,12 +1,12 @@
1
1
  virgo_modules/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  virgo_modules/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  virgo_modules/src/aws_utils.py,sha256=q0l7D7ofo09Lu1QQjv-esheQ06uiSy1Pdq3xMul8zvk,2571
4
- virgo_modules/src/edge_utils.py,sha256=tMpt0bfnoOyD_qqh4wD6TQeOhaMcGE59DbvIj3qnp-0,13732
4
+ virgo_modules/src/edge_utils.py,sha256=i3Hm3fO-QA-u17jDpnRodLLILMWZ2VTMEkMKijdGKLg,14287
5
5
  virgo_modules/src/pull_artifacts.py,sha256=5OPrgR7pcMSdpbevDRhf0ebk7g7ZRjff4NpTIIWAKjE,1989
6
6
  virgo_modules/src/re_utils.py,sha256=ndPUW3F0QkljtKLR1dqtBm2I2LtceduSgLRIk3HszWk,72244
7
- virgo_modules/src/ticketer_source.py,sha256=mhNPWbluKYVqpX0E8Uh6fTXi1Bn7zsG6rHIp_TklZr0,146629
8
- virgo_modules-0.0.88.dist-info/LICENSE,sha256=pNgFyCYgmimaw0o6V20JupZLROycAnOA_HDDh1tX2V4,1097
9
- virgo_modules-0.0.88.dist-info/METADATA,sha256=C1I5H8ceh1-j9gZW7nykhZvzs952oy0Aqx9dWXkufBY,1429
10
- virgo_modules-0.0.88.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
11
- virgo_modules-0.0.88.dist-info/top_level.txt,sha256=ZjI-qEkDtT-8mFwGAWnXfqPOKEGlIhWRW1es1VyXc60,14
12
- virgo_modules-0.0.88.dist-info/RECORD,,
7
+ virgo_modules/src/ticketer_source.py,sha256=30xCmfL16SHMPQOs4qKsKSfvfdfv-9IkYY4X9gJgx70,150116
8
+ virgo_modules-0.0.90.dist-info/LICENSE,sha256=pNgFyCYgmimaw0o6V20JupZLROycAnOA_HDDh1tX2V4,1097
9
+ virgo_modules-0.0.90.dist-info/METADATA,sha256=6KCZW4HK_io_AsQjBV733cVNeNlyRKqJ6MdFCFdmTWY,1429
10
+ virgo_modules-0.0.90.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
11
+ virgo_modules-0.0.90.dist-info/top_level.txt,sha256=ZjI-qEkDtT-8mFwGAWnXfqPOKEGlIhWRW1es1VyXc60,14
12
+ virgo_modules-0.0.90.dist-info/RECORD,,