virgo-modules 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of virgo-modules might be problematic. Click here for more details.

@@ -11,7 +11,14 @@ from feature_engine.imputation import MeanMedianImputer
11
11
  from feature_engine.discretisation import EqualWidthDiscretiser
12
12
  from feature_engine.datetime import DatetimeFeatures
13
13
 
14
- from ..transformer_utils import VirgoWinsorizerFeature, InverseHyperbolicSine, FeaturesEntropy, FeatureSelector, InteractionFeatures
14
+ from ..transformer_utils import (
15
+ VirgoWinsorizerFeature,
16
+ InverseHyperbolicSine,
17
+ FeaturesEntropy,
18
+ FeatureSelector,
19
+ InteractionFeatures,
20
+ SplineMarketReturnJumpWaves
21
+ )
15
22
 
16
23
  from plotly.subplots import make_subplots
17
24
  import plotly.graph_objects as go
@@ -223,6 +230,7 @@ def data_processing_pipeline_classifier(
223
230
  date_features_list = False,
224
231
  entropy_set_list = False,
225
232
  interaction_features_cont = False,
233
+ spline_regression_config = False,
226
234
  pipeline_order = 'selector//winzorizer//discretizer//median_inputer//drop//correlation'
227
235
  ):
228
236
 
@@ -254,7 +262,12 @@ def data_processing_pipeline_classifier(
254
262
  invhypersin_pipe = [('invhypervolsin scaler', InverseHyperbolicSine(features = invhypervolsin_features))] if invhypervolsin_features else []
255
263
  datetimeFeatures_pipe = [('date features', DatetimeFeatures(features_to_extract = date_features_list, variables = 'Date', drop_original = False))] if date_features_list else []
256
264
  interaction_features = [("interaction features", InteractionFeatures(interaction_features_cont[0], interaction_features_cont[1]))] if interaction_features_cont else []
257
-
265
+ spline_features = [("spline features", SplineMarketReturnJumpWaves(
266
+ return_feature_names=spline_regression_config.get("return_feature_names"),
267
+ target_variables=spline_regression_config.get("target_variables"),
268
+ feature_label=spline_regression_config.get("feature_label"),
269
+ ))] if spline_regression_config else []
270
+
258
271
  entropy_pipe = list()
259
272
  if entropy_set_list:
260
273
  for setx_ in entropy_set_list:
@@ -274,6 +287,7 @@ def data_processing_pipeline_classifier(
274
287
  'date_features': datetimeFeatures_pipe,
275
288
  'interaction_features': interaction_features,
276
289
  'entropy_features' : entropy_pipe,
290
+ "spline_features": spline_features,
277
291
  }
278
292
 
279
293
  pipeline_steps = pipeline_order.split('//')
@@ -0,0 +1,54 @@
1
+ import random
2
+
3
+ from numpy.random import choice
4
+ import numpy as np
5
+ from scipy import stats
6
+ from sklearn.feature_selection import RFE
7
+
8
+ class StackRFE:
9
+ def __init__(self, model, n_features, batch_elim, step_elim, cv, max_iterations):
10
+ self.model = model
11
+ self.n_features = n_features
12
+ self.batch_elim = batch_elim
13
+ self.step_elim = step_elim
14
+ self.cv = cv
15
+ self.max_iterations = max_iterations
16
+
17
+ def _suggest_elimination(self, uniform=False):
18
+ """
19
+ suggest based on mean ranking, lower the mean rank higher the prob to be selected
20
+ """
21
+ ds = self.feature_rankings
22
+ ds_mean = {k:np.mean(ds.get(k)) for k in ds}
23
+ max_ = np.max([x for x in ds_mean.values()])
24
+ ds_weight = {k: (max_-v+1) for k,v in ds_mean.items()}
25
+ sum_ = np.sum([x for x in ds_weight.values()])
26
+ ds_prob = {k: v/sum_ for k,v in ds_weight.items()}
27
+ result = list(choice(list(ds_prob.keys()), self.batch_elim,p=list(ds_prob.values()), replace=False))
28
+ if uniform:
29
+ features = list(ds_prob.keys())
30
+ random.shuffle(features)
31
+ result = features[0:self.batch_elim]
32
+ return result
33
+
34
+ def fit(self, X, y):
35
+ features = list(X.columns).copy()
36
+ self.feature_rankings = {f:[1] for f in features}
37
+ for iteration in range(self.max_iterations):
38
+ # shuffling
39
+ if random.random() > 0.5:
40
+ batch_features = self._suggest_elimination()
41
+ else:
42
+ batch_features = self._suggest_elimination()
43
+ # selector and elimination
44
+ tmp_feature_ranking = {k: list() for k in batch_features}
45
+ selector = RFE(self.model, n_features_to_select=self.n_features, step=self.step_elim)
46
+ for train_index, test_index in self.cv.split(X, y):
47
+ X_ = X[X.index.get_level_values('i').isin(train_index)][batch_features]
48
+ y_ = y[y.index.get_level_values('i').isin(train_index)]
49
+ selector = selector.fit(X_, y_)
50
+ for k,r in zip(tmp_feature_ranking.keys(), selector.ranking_):
51
+ tmp_feature_ranking[k].append(r)
52
+ rankings = [stats.mode(v).mode for v in tmp_feature_ranking.values()]
53
+ for f,r in zip(batch_features, rankings):
54
+ self.feature_rankings[f].append(r)
@@ -1421,6 +1421,12 @@ def extract_data_traintest(object_stock,features_to_search,configs, target_confi
1421
1421
  last_signal_featlist = last_signal_featlist.split('//')
1422
1422
  if feature_name in last_signal_featlist:
1423
1423
  object_stock.compute_last_signal(feature_name, False)
1424
+ market_interaction_features = configs.get('custom_transformations',{}).get('market_interaction_features', False)
1425
+ if market_interaction_features:
1426
+ for stage in market_interaction_features.keys():
1427
+ method_to_use = market_interaction_features.get(stage).get("method")
1428
+ arguments_to_use = market_interaction_features.get(stage).get("parameters")
1429
+ getattr(object_stock, method_to_use)(**arguments_to_use)
1424
1430
  # geting targets
1425
1431
  object_stock.get_categorical_targets(**target_params_up)
1426
1432
  object_stock.df = object_stock.df.drop(columns = ['target_down']).rename(columns = {'target_up':'target_up_save'})
@@ -809,13 +809,11 @@ class stock_eda_panel(object):
809
809
  .transform(lambda x: x.rolling(ma2, min_periods=1).mean())
810
810
  )
811
811
 
812
- print('--------------------------------------------------------------------')
813
812
  if save_features:
814
813
  self.log_features_standard(feature_name)
815
814
  self.settings_relative_spread_ma = {'ma1':ma1, 'ma2':ma2, 'threshold':threshold}
816
815
 
817
816
  if plot:
818
-
819
817
  self.signal_plotter(feature_name)
820
818
 
821
819
  def pair_feature(self, pair_symbol, plot = False):
@@ -871,6 +869,24 @@ class stock_eda_panel(object):
871
869
  plt.legend()
872
870
  plt.show()
873
871
 
872
+ def smooth_logrets_interaction_term(self, feature_interact_with, resulting_feature_name="persisted_clip_diff_smooths", rollmean_window = 5, ext_threhold=0.015, persist_days = 3, save_features=False):
873
+ """
874
+ create an interaction term that is going to compare the distance of asset wolling window mean and market rolling window mean.
875
+ then get the outliers or high values using abs and this value persist for some days
876
+ goal persist big differences of market and asset returns
877
+
878
+ feature_interact_with: name of the market return
879
+ rollmean_window: rolling window or smoothing number of days
880
+ ext_threhold: threshold
881
+ persist_days: number of days to persis the signal
882
+ """
883
+ self.df["smooth_log_return"] = self.df['log_return'].rolling(rollmean_window).mean().values
884
+ self.df["smooth_market_log_return"] = self.df[feature_interact_with].rolling(rollmean_window).mean().values
885
+ self.df["diff_smooths"] = self.df["smooth_market_log_return"]-self.df["smooth_log_return"]
886
+ self.df["clip_diff_smooths"] = np.where(np.abs(self.df["diff_smooths"]) > ext_threhold, self.df["diff_smooths"] , 0)
887
+ self.df[resulting_feature_name] = self.df['clip_diff_smooths'].rolling(persist_days).mean().values
888
+ self.df = self.df.drop(columns=["smooth_log_return","smooth_market_log_return","diff_smooths","clip_diff_smooths"])
889
+
874
890
  def calculate_cointegration(self,series_1, series_2):
875
891
  """
876
892
  calculate cointegration score for two time series
@@ -1,6 +1,11 @@
1
+ import gc
2
+
1
3
  from sklearn.base import BaseEstimator, TransformerMixin
2
4
  import pandas as pd
3
5
  import numpy as np
6
+ import statsmodels.api as sm
7
+ from patsy import dmatrix
8
+ import matplotlib.pyplot as plt
4
9
 
5
10
  class InverseHyperbolicSine(BaseEstimator, TransformerMixin):
6
11
 
@@ -289,3 +294,108 @@ class InteractionFeatures(BaseEstimator, TransformerMixin):
289
294
  fn = 'iterm_'+f1.replace("norm_","")+"_"+f2.replace("norm_","")
290
295
  X = self.simple_div_interaction(X, f1, f2, fn)
291
296
  return X
297
+
298
+
299
+ class SplineMarketReturnJumpWaves(BaseEstimator, TransformerMixin):
300
+ """
301
+ Class that gets a feature returns and performs countings so that a spline regression model can be fitted
302
+
303
+ Attributes
304
+ ----------
305
+ return_feature_names : list
306
+ list of the name of the features to apply spline regresion
307
+ target_variables : list
308
+ list of target features
309
+ feature_label : str
310
+ prefix for the new features.
311
+ sample_perc : float
312
+ sample size of the traninig data taking into consideration time
313
+
314
+ Methods
315
+ -------
316
+ fit(additional="", X=DataFrame, y=DataFrame):
317
+ fit transformation.
318
+ transform(X=DataFrame, y=None):
319
+ apply feature transformation
320
+ """
321
+
322
+ def __init__(self, return_feature_names, target_variables, feature_label,
323
+ sample_perc=0.5,parts = 6, e_floor=-0.001,e_top=0.0001, d=3):
324
+ self.sample_perc = sample_perc
325
+ self.return_feature_names=return_feature_names
326
+ self.target_variables = target_variables
327
+ self.glms = dict()
328
+ self.feature_label = feature_label
329
+ self.parts = parts
330
+ self.e_floor = e_floor
331
+ self.e_top = e_top
332
+ self.d = d
333
+ def fit(self, X, y, plot = False):
334
+ #complete dataset with y
335
+ X_set=X.copy()
336
+ X_set[self.target_variables] = y
337
+ #sampling
338
+ if plot:
339
+ fig, ax = plt.subplots(len(self.return_feature_names),1)
340
+ for i,return_feature_name in enumerate(self.return_feature_names):
341
+ X_aggregated = (
342
+ X_set
343
+ .groupby("Date",as_index=False)
344
+ .agg(
345
+ count_target_up = ("target_up","sum"),
346
+ count_target_down = ("target_down","sum"),
347
+ return_feature = (return_feature_name,"max"),
348
+ )
349
+ .sort_values("Date",ascending=True)
350
+ .dropna()
351
+ .copy()
352
+ )
353
+ del X
354
+ gc.collect()
355
+ nlines = X_aggregated.shape[0]
356
+ threshold = int(round((1-nlines*self.sample_perc),0))
357
+ train_ = X_aggregated.iloc[:threshold,:]
358
+ self.glms[return_feature_name] = dict()
359
+ for target in self.target_variables:
360
+ X = train_[["return_feature"]].round(4).values.reshape(-1, 1)
361
+ y = np.log(train_.dropna()[f"count_{target}"].values + 1)
362
+ knot_str = self._get_knot(X)
363
+ transformed_x = dmatrix(f"bs(train, knots=({knot_str}), degree=3, include_intercept=False)", {"train": X}, return_type='dataframe')
364
+ model = sm.GLM(y, transformed_x).fit()
365
+ self.glms[return_feature_name][target] = {
366
+ "model":model,
367
+ }
368
+ if plot:
369
+ x_transfomed = dmatrix(f"bs(valid, knots=({knot_str}), degree={self.d}, include_intercept=False)", {"valid":X}, return_type='dataframe')
370
+ pred = model.predict(x_transfomed)
371
+ ax[i].scatter(X, np.exp(y),s=2,alpha=0.2)
372
+ ax[i].scatter(X, np.exp(pred), alpha=0.2, s=1)
373
+ #self.X_aggregated = X_aggregated
374
+ return self
375
+
376
+ def transform(self, X, y=None, plot =False):
377
+ if plot:
378
+ fig, ax = plt.subplots(len(self.return_feature_names),1)
379
+ for i, return_feature_name in enumerate(self.return_feature_names):
380
+ for target in self.target_variables:
381
+ model = self.glms[return_feature_name][target].get("model")
382
+ vect = X[return_feature_name]
383
+ knot_str = self._get_knot(vect)
384
+ X_transformed = dmatrix(f"bs(valid, knots=({knot_str}), degree={self.d}, include_intercept=False)",
385
+ {"valid":vect.fillna(0)},
386
+ return_type='dataframe')
387
+ X[f"{self.feature_label}_{return_feature_name}_{target}"] = model.predict(
388
+ X_transformed
389
+ )
390
+ if plot:
391
+ pred = model.predict(X_transformed)
392
+ ax[i].scatter(X, np.exp(pred), alpha=0.2, s=1)
393
+ return X
394
+
395
+ def _get_knot(self, input):
396
+ min_, max_ = np.min(input)-self.e_floor, np.max(input)+self.e_top
397
+ r = (max_ - min_)/self.parts
398
+ knot_tuple = [str(i*r+min_) for i,_ in enumerate(range(self.parts),start=0)]
399
+ knot_str = ",".join(knot_tuple)
400
+ knot_str = f"({knot_str})"
401
+ return knot_str
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
2
- Name: virgo-modules
3
- Version: 0.7.0
1
+ Metadata-Version: 2.4
2
+ Name: virgo_modules
3
+ Version: 0.8.0
4
4
  Summary: data processing and statistical modeling using stock market data
5
5
  Home-page: https://github.com/miguelmayhem92/virgo_module
6
6
  Author: Miguel Mayhuire
@@ -13,7 +13,18 @@ Requires-Python: >=3.9
13
13
  Description-Content-Type: text/markdown
14
14
  License-File: LICENSE
15
15
  Provides-Extra: dev
16
- Requires-Dist: pytest >=7.0 ; extra == 'dev'
16
+ Requires-Dist: pytest>=7.0; extra == "dev"
17
+ Dynamic: author
18
+ Dynamic: author-email
19
+ Dynamic: classifier
20
+ Dynamic: description
21
+ Dynamic: description-content-type
22
+ Dynamic: home-page
23
+ Dynamic: license
24
+ Dynamic: license-file
25
+ Dynamic: provides-extra
26
+ Dynamic: requires-python
27
+ Dynamic: summary
17
28
 
18
29
  # Virgo Package
19
30
 
@@ -4,18 +4,19 @@ virgo_modules/src/aws_utils.py,sha256=q0l7D7ofo09Lu1QQjv-esheQ06uiSy1Pdq3xMul8zv
4
4
  virgo_modules/src/backtester.py,sha256=OhiWyzDX0PthXGuhChyWUmDN3cLkzVYe95zS4nGtia8,22106
5
5
  virgo_modules/src/hmm_utils.py,sha256=D7axAnCdSe1_1EgRyli2PAnM2f6699hTY9GcxjPXG-o,21221
6
6
  virgo_modules/src/pull_artifacts.py,sha256=5OPrgR7pcMSdpbevDRhf0ebk7g7ZRjff4NpTIIWAKjE,1989
7
- virgo_modules/src/re_utils.py,sha256=DBY_VBB1wKm5D7znutpF_66CTLZhJfx54h8Ws0YzdN4,74641
8
- virgo_modules/src/ticketer_source.py,sha256=aJZNB_YK0JrSrUBUBkAfolIMxcTozNFrZeoNxkhpqK8,102547
9
- virgo_modules/src/transformer_utils.py,sha256=ysCUp3cB3_7Jr9OHDqhg2_6Vu0k1YVjfqbvQNbxpbhI,8990
7
+ virgo_modules/src/re_utils.py,sha256=GZCkAfgw2tVJRJ_Gw5Yewc14ebiE9wSImPiYQN8FsW0,75095
8
+ virgo_modules/src/ticketer_source.py,sha256=528WhGoANOm4IKnxGSWsbQxxUh3-qlZfvGRNAafMMcE,103883
9
+ virgo_modules/src/transformer_utils.py,sha256=SnYdtsFPnSF6u4UFIat0-X3-qVuUWvv_T46kiB-H0Sk,13682
10
10
  virgo_modules/src/edge_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  virgo_modules/src/edge_utils/conformal_utils.py,sha256=cKm4KSM261Eu1FJn4oowKYiKIesW81VbqITIvopGSVk,5410
12
- virgo_modules/src/edge_utils/edge_utils.py,sha256=7nYPLDNyKqeKIuOOwQi4wsBibzs9gP1HgYMISXJX1Y8,19522
12
+ virgo_modules/src/edge_utils/edge_utils.py,sha256=4uXVWthzJDzkJ4Uq19ZYL9aPcA6CDUS3xYD4FY-a2AM,20018
13
+ virgo_modules/src/edge_utils/feature_selection.py,sha256=HYbQ0JLPDiRYhn-5-C438YEKbuNduDmuvboFC_VkHww,2453
13
14
  virgo_modules/src/edge_utils/shap_utils.py,sha256=FgcHkfddvdFSeUqEubYa2ExRGVAWSthqK4b-eKagEmo,2333
14
15
  virgo_modules/src/edge_utils/stack_model.py,sha256=QqE91uLo2KauGEj91AVNANB1xE7J4Fa49YOX7k5mFng,4257
15
16
  virgo_modules/src/market/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
17
  virgo_modules/src/market/market_tools.py,sha256=vBt66_7E3ANz7avzfeNw_RHMGvG9lh5PRhxmcf_Oyjc,6880
17
- virgo_modules-0.7.0.dist-info/LICENSE,sha256=pNgFyCYgmimaw0o6V20JupZLROycAnOA_HDDh1tX2V4,1097
18
- virgo_modules-0.7.0.dist-info/METADATA,sha256=tart49AI1D8oLYtFI7mxY43ReNUxWpsX34PuByszh3Q,876
19
- virgo_modules-0.7.0.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
20
- virgo_modules-0.7.0.dist-info/top_level.txt,sha256=ZjI-qEkDtT-8mFwGAWnXfqPOKEGlIhWRW1es1VyXc60,14
21
- virgo_modules-0.7.0.dist-info/RECORD,,
18
+ virgo_modules-0.8.0.dist-info/licenses/LICENSE,sha256=pNgFyCYgmimaw0o6V20JupZLROycAnOA_HDDh1tX2V4,1097
19
+ virgo_modules-0.8.0.dist-info/METADATA,sha256=sCkdOmbxrEEXvGUIwh6vIl_vIcue5C0BbvRtvP9yows,1122
20
+ virgo_modules-0.8.0.dist-info/WHEEL,sha256=lTU6B6eIfYoiQJTZNc-fyaR6BpL6ehTzU3xGYxn2n8k,91
21
+ virgo_modules-0.8.0.dist-info/top_level.txt,sha256=ZjI-qEkDtT-8mFwGAWnXfqPOKEGlIhWRW1es1VyXc60,14
22
+ virgo_modules-0.8.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.41.2)
2
+ Generator: setuptools (78.1.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5