virgo-modules 0.4.5__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of virgo-modules might be problematic. Click here for more details.

@@ -1,5 +1,7 @@
1
1
  import numpy as np
2
2
  import itertools
3
+ import random
4
+ import math
3
5
 
4
6
  from sklearn.metrics import roc_auc_score, precision_score, recall_score
5
7
  from sklearn.pipeline import Pipeline
@@ -310,7 +312,7 @@ class ExpandingMultipleTimeSeriesKFold:
310
312
  get number of splits
311
313
  """
312
314
 
313
- def __init__(self, df, window_size = 100, number_window=3, overlap_size = 0):
315
+ def __init__(self, df, window_size = 100, number_window=3, overlap_size = 0, sample_parts = None):
314
316
  """
315
317
  Initialize object
316
318
 
@@ -320,6 +322,7 @@ class ExpandingMultipleTimeSeriesKFold:
320
322
  number_window (int): number of train splits
321
323
  window_size (int): window size data
322
324
  overlap_size (int): overlap size
325
+ sample_individuals (tuple(float, str)): sample partition units to remove from the train set, tuple()
323
326
 
324
327
  Returns
325
328
  -------
@@ -329,6 +332,7 @@ class ExpandingMultipleTimeSeriesKFold:
329
332
  self.number_window = number_window
330
333
  self.window_size = window_size
331
334
  self.overlap_size = overlap_size
335
+ self.sample_parts = sample_parts
332
336
 
333
337
  def split(self, X, y, groups=None):
334
338
  """
@@ -372,9 +376,21 @@ class ExpandingMultipleTimeSeriesKFold:
372
376
  max_train_date = max(train_dates)
373
377
  min_test_date, max_test_date = min(test_dates), max(test_dates)
374
378
 
375
- cut = cut - (self.window_size - self.overlap_size)
376
-
377
- train_index = self.df[self.df.index.get_level_values('Date_i') <= max_train_date].index.get_level_values('i')
379
+ cut = cut - (self.window_size - self.overlap_size)
380
+
381
+ if self.sample_parts:
382
+ sample_part = self.sample_parts[0]
383
+ part_col = self.sample_parts[1]
384
+ unique_parts = list(self.df.index.get_level_values(part_col).unique())
385
+ random.shuffle(unique_parts)
386
+ n_select = math.ceil(len(unique_parts)*sample_part)
387
+ to_drop = unique_parts[0:n_select]
388
+ train_index = self.df[
389
+ (self.df.index.get_level_values('Date_i') <= max_train_date)
390
+ &
391
+ (~self.df.index.get_level_values(part_col).isin(to_drop))].index.get_level_values('i')
392
+ else:
393
+ train_index = self.df[self.df.index.get_level_values('Date_i') <= max_train_date].index.get_level_values('i')
378
394
  test_index = self.df[(self.df.index.get_level_values('Date_i') >= min_test_date) & (self.df.index.get_level_values('Date_i') <= max_test_date)].index.get_level_values('i')
379
395
 
380
396
  yield train_index, test_index
@@ -0,0 +1,94 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+
4
+ from sklearn.base import BaseEstimator, ClassifierMixin
5
+
6
+ class MyStackingClassifierMultiClass(ClassifierMixin, BaseEstimator):
7
+ def __init__(self, estimators, meta_estimators,targets,perc=None,stack_size=None, **kwargs):
8
+ self.estimators = estimators
9
+ self.meta_estimators = meta_estimators
10
+ self.targets = targets
11
+ if stack_size and perc:
12
+ raise Exception('just one option')
13
+ if not stack_size and not perc:
14
+ raise Exception('set one option')
15
+ self.stack_size = stack_size
16
+ self.perc = perc
17
+
18
+ def get_index_training(self, X):
19
+ if self.stack_size:
20
+ unique_dates = list(X.index.get_level_values('Date_i').unique())
21
+ unique_dates.sort()
22
+ stack_chunk = unique_dates[-self.stack_size:]
23
+ base_indexes = X[~X.index.get_level_values('Date_i').isin(stack_chunk)].index.get_level_values('i')
24
+ meta_indexes = X[X.index.get_level_values('Date_i').isin(stack_chunk)].index.get_level_values('i')
25
+ elif self.perc:
26
+ meta_indexes = X.sample(frac = self.perc).index.get_level_values('i')
27
+ base_indexes = X[~X.index.get_level_values('i').isin(meta_indexes)].index.get_level_values('i')
28
+ else:
29
+ raise Exception("error", self.stack_size, self.perc)
30
+ return base_indexes, meta_indexes
31
+ def train_base_learner(self, classifier, X, y,indexes):
32
+ base_X = X[X.index.get_level_values('i').isin(indexes)]
33
+ base_y = y[y.index.get_level_values('i').isin(indexes)]
34
+ classifier.fit(base_X, base_y)
35
+ def fit(self, X, y):
36
+ # #base learners
37
+ base_indexes, meta_indexes = self.get_index_training(X)
38
+ for name,estimator in self.estimators:
39
+ self.train_base_learner(estimator,X, y, base_indexes)
40
+
41
+ #stack meta learner
42
+ metas_pred = dict()
43
+ for i,cont in enumerate(self.estimators, start=1):
44
+ _,estimator = cont
45
+ meta_pred = estimator.predict_proba(X[X.index.get_level_values('i').isin(meta_indexes)])
46
+ metas_pred[f"meta{i}0"] = meta_pred[0][:,1]
47
+ metas_pred[f"meta{i}1"] = meta_pred[1][:,1]
48
+ meta_preds_df = pd.DataFrame(metas_pred)
49
+
50
+ for i,metaest in enumerate(self.meta_estimators,start=0):
51
+ _,metaest = metaest
52
+ metacols = [f"meta{j}{i}" for j in range(1,len(self.estimators)+1)]
53
+ metaest.fit(
54
+ meta_preds_df[metacols],
55
+ y[X.index.get_level_values('i').isin(meta_indexes)][self.targets[i]]
56
+ )
57
+ self.is_fitted_ = True
58
+ self.classes_ = np.array([[0,1],[0,1]])
59
+
60
+ def predict_proba(self, X):
61
+ metas_pred = dict()
62
+ for i,cont in enumerate(self.estimators, start=1):
63
+ _,estimator = cont
64
+ meta_pred = estimator.predict_proba(X)
65
+ metas_pred[f"meta{i}0"] = meta_pred[0][:,1]
66
+ metas_pred[f"meta{i}1"] = meta_pred[1][:,1]
67
+ self.meta_preds_df__ = pd.DataFrame(metas_pred)
68
+
69
+ prediction_vector = list()
70
+ for i,cont in enumerate(self.meta_estimators, start=0):
71
+ _,estimator = cont
72
+ metacols = [f"meta{j}{i}" for j in range(1,len(self.estimators)+1)]
73
+ preds = estimator.predict_proba(self.meta_preds_df__[metacols].values)
74
+ prediction_vector.append(preds)
75
+ return prediction_vector
76
+
77
+ def predict(self, X):
78
+ prediction_vector = list()
79
+ _ = self.predict_proba(X)
80
+ for i,cont in enumerate(self.meta_estimators, start=0):
81
+ _,estimator = cont
82
+ metacols = [f"meta{j}{i}" for j in range(1,len(self.estimators)+1)]
83
+ preds = estimator.predict(self.meta_preds_df__[metacols].values)
84
+ prediction_vector.append(preds)
85
+
86
+ p = np.array(tuple(prediction_vector))
87
+ return p.reshape((p.shape[1],p.shape[0]))
88
+
89
+ def get_params(self, deep=True):
90
+ return {k:v for k, v in self.__dict__.items()}
91
+
92
+ def set_params(self, **parms):
93
+ for k,v in parms.items():
94
+ setattr(self,k,v)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: virgo-modules
3
- Version: 0.4.5
3
+ Version: 0.5.1
4
4
  Summary: data processing and statistical modeling using stock market data
5
5
  Home-page: https://github.com/miguelmayhem92/virgo_module
6
6
  Author: Miguel Mayhuire
@@ -9,10 +9,11 @@ virgo_modules/src/ticketer_source.py,sha256=jxP-OOeoyN2JxRQg-mX6t6WNJXiIrhWKDywD
9
9
  virgo_modules/src/transformer_utils.py,sha256=ysCUp3cB3_7Jr9OHDqhg2_6Vu0k1YVjfqbvQNbxpbhI,8990
10
10
  virgo_modules/src/edge_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  virgo_modules/src/edge_utils/conformal_utils.py,sha256=WK54ctvdnFrCAv3_slFBC71Ojy9xgC1wODR7xmvXGgg,3743
12
- virgo_modules/src/edge_utils/edge_utils.py,sha256=unCrkimwzlJx-osUWz6f6Vfiuv-unIATYva5UkK-Xik,18661
12
+ virgo_modules/src/edge_utils/edge_utils.py,sha256=7nYPLDNyKqeKIuOOwQi4wsBibzs9gP1HgYMISXJX1Y8,19522
13
13
  virgo_modules/src/edge_utils/shap_utils.py,sha256=OMKVO-4gtOng9GeSuhEgAEQe4FF3AtqXjm-GUBLqYFc,3349
14
- virgo_modules-0.4.5.dist-info/LICENSE,sha256=pNgFyCYgmimaw0o6V20JupZLROycAnOA_HDDh1tX2V4,1097
15
- virgo_modules-0.4.5.dist-info/METADATA,sha256=GR7pTBoguHajgjq17Z6gXRSdjRz-3w4X5QD3-0N7Ge4,876
16
- virgo_modules-0.4.5.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
17
- virgo_modules-0.4.5.dist-info/top_level.txt,sha256=ZjI-qEkDtT-8mFwGAWnXfqPOKEGlIhWRW1es1VyXc60,14
18
- virgo_modules-0.4.5.dist-info/RECORD,,
14
+ virgo_modules/src/edge_utils/stack_model.py,sha256=QqE91uLo2KauGEj91AVNANB1xE7J4Fa49YOX7k5mFng,4257
15
+ virgo_modules-0.5.1.dist-info/LICENSE,sha256=pNgFyCYgmimaw0o6V20JupZLROycAnOA_HDDh1tX2V4,1097
16
+ virgo_modules-0.5.1.dist-info/METADATA,sha256=cDHMIPEJe-YznvEfgdToAL6GsMIrcrB0ti9sTjPlhcw,876
17
+ virgo_modules-0.5.1.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
18
+ virgo_modules-0.5.1.dist-info/top_level.txt,sha256=ZjI-qEkDtT-8mFwGAWnXfqPOKEGlIhWRW1es1VyXc60,14
19
+ virgo_modules-0.5.1.dist-info/RECORD,,