virgo-modules 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of virgo-modules might be problematic. Click here for more details.
- virgo_modules/src/edge_utils/edge_utils.py +16 -2
- virgo_modules/src/edge_utils/feature_selection.py +54 -0
- virgo_modules/src/re_utils.py +6 -0
- virgo_modules/src/ticketer_source.py +18 -2
- virgo_modules/src/transformer_utils.py +110 -0
- {virgo_modules-0.7.0.dist-info → virgo_modules-0.8.0.dist-info}/METADATA +15 -4
- {virgo_modules-0.7.0.dist-info → virgo_modules-0.8.0.dist-info}/RECORD +10 -9
- {virgo_modules-0.7.0.dist-info → virgo_modules-0.8.0.dist-info}/WHEEL +1 -1
- {virgo_modules-0.7.0.dist-info → virgo_modules-0.8.0.dist-info/licenses}/LICENSE +0 -0
- {virgo_modules-0.7.0.dist-info → virgo_modules-0.8.0.dist-info}/top_level.txt +0 -0
|
@@ -11,7 +11,14 @@ from feature_engine.imputation import MeanMedianImputer
|
|
|
11
11
|
from feature_engine.discretisation import EqualWidthDiscretiser
|
|
12
12
|
from feature_engine.datetime import DatetimeFeatures
|
|
13
13
|
|
|
14
|
-
from ..transformer_utils import
|
|
14
|
+
from ..transformer_utils import (
|
|
15
|
+
VirgoWinsorizerFeature,
|
|
16
|
+
InverseHyperbolicSine,
|
|
17
|
+
FeaturesEntropy,
|
|
18
|
+
FeatureSelector,
|
|
19
|
+
InteractionFeatures,
|
|
20
|
+
SplineMarketReturnJumpWaves
|
|
21
|
+
)
|
|
15
22
|
|
|
16
23
|
from plotly.subplots import make_subplots
|
|
17
24
|
import plotly.graph_objects as go
|
|
@@ -223,6 +230,7 @@ def data_processing_pipeline_classifier(
|
|
|
223
230
|
date_features_list = False,
|
|
224
231
|
entropy_set_list = False,
|
|
225
232
|
interaction_features_cont = False,
|
|
233
|
+
spline_regression_config = False,
|
|
226
234
|
pipeline_order = 'selector//winzorizer//discretizer//median_inputer//drop//correlation'
|
|
227
235
|
):
|
|
228
236
|
|
|
@@ -254,7 +262,12 @@ def data_processing_pipeline_classifier(
|
|
|
254
262
|
invhypersin_pipe = [('invhypervolsin scaler', InverseHyperbolicSine(features = invhypervolsin_features))] if invhypervolsin_features else []
|
|
255
263
|
datetimeFeatures_pipe = [('date features', DatetimeFeatures(features_to_extract = date_features_list, variables = 'Date', drop_original = False))] if date_features_list else []
|
|
256
264
|
interaction_features = [("interaction features", InteractionFeatures(interaction_features_cont[0], interaction_features_cont[1]))] if interaction_features_cont else []
|
|
257
|
-
|
|
265
|
+
spline_features = [("spline features", SplineMarketReturnJumpWaves(
|
|
266
|
+
return_feature_names=spline_regression_config.get("return_feature_names"),
|
|
267
|
+
target_variables=spline_regression_config.get("target_variables"),
|
|
268
|
+
feature_label=spline_regression_config.get("feature_label"),
|
|
269
|
+
))] if spline_regression_config else []
|
|
270
|
+
|
|
258
271
|
entropy_pipe = list()
|
|
259
272
|
if entropy_set_list:
|
|
260
273
|
for setx_ in entropy_set_list:
|
|
@@ -274,6 +287,7 @@ def data_processing_pipeline_classifier(
|
|
|
274
287
|
'date_features': datetimeFeatures_pipe,
|
|
275
288
|
'interaction_features': interaction_features,
|
|
276
289
|
'entropy_features' : entropy_pipe,
|
|
290
|
+
"spline_features": spline_features,
|
|
277
291
|
}
|
|
278
292
|
|
|
279
293
|
pipeline_steps = pipeline_order.split('//')
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import random
|
|
2
|
+
|
|
3
|
+
from numpy.random import choice
|
|
4
|
+
import numpy as np
|
|
5
|
+
from scipy import stats
|
|
6
|
+
from sklearn.feature_selection import RFE
|
|
7
|
+
|
|
8
|
+
class StackRFE:
|
|
9
|
+
def __init__(self, model, n_features, batch_elim, step_elim, cv, max_iterations):
|
|
10
|
+
self.model = model
|
|
11
|
+
self.n_features = n_features
|
|
12
|
+
self.batch_elim = batch_elim
|
|
13
|
+
self.step_elim = step_elim
|
|
14
|
+
self.cv = cv
|
|
15
|
+
self.max_iterations = max_iterations
|
|
16
|
+
|
|
17
|
+
def _suggest_elimination(self, uniform=False):
|
|
18
|
+
"""
|
|
19
|
+
suggest based on mean ranking, lower the mean rank higher the prob to be selected
|
|
20
|
+
"""
|
|
21
|
+
ds = self.feature_rankings
|
|
22
|
+
ds_mean = {k:np.mean(ds.get(k)) for k in ds}
|
|
23
|
+
max_ = np.max([x for x in ds_mean.values()])
|
|
24
|
+
ds_weight = {k: (max_-v+1) for k,v in ds_mean.items()}
|
|
25
|
+
sum_ = np.sum([x for x in ds_weight.values()])
|
|
26
|
+
ds_prob = {k: v/sum_ for k,v in ds_weight.items()}
|
|
27
|
+
result = list(choice(list(ds_prob.keys()), self.batch_elim,p=list(ds_prob.values()), replace=False))
|
|
28
|
+
if uniform:
|
|
29
|
+
features = list(ds_prob.keys())
|
|
30
|
+
random.shuffle(features)
|
|
31
|
+
result = features[0:self.batch_elim]
|
|
32
|
+
return result
|
|
33
|
+
|
|
34
|
+
def fit(self, X, y):
|
|
35
|
+
features = list(X.columns).copy()
|
|
36
|
+
self.feature_rankings = {f:[1] for f in features}
|
|
37
|
+
for iteration in range(self.max_iterations):
|
|
38
|
+
# shuffling
|
|
39
|
+
if random.random() > 0.5:
|
|
40
|
+
batch_features = self._suggest_elimination()
|
|
41
|
+
else:
|
|
42
|
+
batch_features = self._suggest_elimination()
|
|
43
|
+
# selector and elimination
|
|
44
|
+
tmp_feature_ranking = {k: list() for k in batch_features}
|
|
45
|
+
selector = RFE(self.model, n_features_to_select=self.n_features, step=self.step_elim)
|
|
46
|
+
for train_index, test_index in self.cv.split(X, y):
|
|
47
|
+
X_ = X[X.index.get_level_values('i').isin(train_index)][batch_features]
|
|
48
|
+
y_ = y[y.index.get_level_values('i').isin(train_index)]
|
|
49
|
+
selector = selector.fit(X_, y_)
|
|
50
|
+
for k,r in zip(tmp_feature_ranking.keys(), selector.ranking_):
|
|
51
|
+
tmp_feature_ranking[k].append(r)
|
|
52
|
+
rankings = [stats.mode(v).mode for v in tmp_feature_ranking.values()]
|
|
53
|
+
for f,r in zip(batch_features, rankings):
|
|
54
|
+
self.feature_rankings[f].append(r)
|
virgo_modules/src/re_utils.py
CHANGED
|
@@ -1421,6 +1421,12 @@ def extract_data_traintest(object_stock,features_to_search,configs, target_confi
|
|
|
1421
1421
|
last_signal_featlist = last_signal_featlist.split('//')
|
|
1422
1422
|
if feature_name in last_signal_featlist:
|
|
1423
1423
|
object_stock.compute_last_signal(feature_name, False)
|
|
1424
|
+
market_interaction_features = configs.get('custom_transformations',{}).get('market_interaction_features', False)
|
|
1425
|
+
if market_interaction_features:
|
|
1426
|
+
for stage in market_interaction_features.keys():
|
|
1427
|
+
method_to_use = market_interaction_features.get(stage).get("method")
|
|
1428
|
+
arguments_to_use = market_interaction_features.get(stage).get("parameters")
|
|
1429
|
+
getattr(object_stock, method_to_use)(**arguments_to_use)
|
|
1424
1430
|
# geting targets
|
|
1425
1431
|
object_stock.get_categorical_targets(**target_params_up)
|
|
1426
1432
|
object_stock.df = object_stock.df.drop(columns = ['target_down']).rename(columns = {'target_up':'target_up_save'})
|
|
@@ -809,13 +809,11 @@ class stock_eda_panel(object):
|
|
|
809
809
|
.transform(lambda x: x.rolling(ma2, min_periods=1).mean())
|
|
810
810
|
)
|
|
811
811
|
|
|
812
|
-
print('--------------------------------------------------------------------')
|
|
813
812
|
if save_features:
|
|
814
813
|
self.log_features_standard(feature_name)
|
|
815
814
|
self.settings_relative_spread_ma = {'ma1':ma1, 'ma2':ma2, 'threshold':threshold}
|
|
816
815
|
|
|
817
816
|
if plot:
|
|
818
|
-
|
|
819
817
|
self.signal_plotter(feature_name)
|
|
820
818
|
|
|
821
819
|
def pair_feature(self, pair_symbol, plot = False):
|
|
@@ -871,6 +869,24 @@ class stock_eda_panel(object):
|
|
|
871
869
|
plt.legend()
|
|
872
870
|
plt.show()
|
|
873
871
|
|
|
872
|
+
def smooth_logrets_interaction_term(self, feature_interact_with, resulting_feature_name="persisted_clip_diff_smooths", rollmean_window = 5, ext_threhold=0.015, persist_days = 3, save_features=False):
|
|
873
|
+
"""
|
|
874
|
+
create an interaction term that is going to compare the distance of asset wolling window mean and market rolling window mean.
|
|
875
|
+
then get the outliers or high values using abs and this value persist for some days
|
|
876
|
+
goal persist big differences of market and asset returns
|
|
877
|
+
|
|
878
|
+
feature_interact_with: name of the market return
|
|
879
|
+
rollmean_window: rolling window or smoothing number of days
|
|
880
|
+
ext_threhold: threshold
|
|
881
|
+
persist_days: number of days to persis the signal
|
|
882
|
+
"""
|
|
883
|
+
self.df["smooth_log_return"] = self.df['log_return'].rolling(rollmean_window).mean().values
|
|
884
|
+
self.df["smooth_market_log_return"] = self.df[feature_interact_with].rolling(rollmean_window).mean().values
|
|
885
|
+
self.df["diff_smooths"] = self.df["smooth_market_log_return"]-self.df["smooth_log_return"]
|
|
886
|
+
self.df["clip_diff_smooths"] = np.where(np.abs(self.df["diff_smooths"]) > ext_threhold, self.df["diff_smooths"] , 0)
|
|
887
|
+
self.df[resulting_feature_name] = self.df['clip_diff_smooths'].rolling(persist_days).mean().values
|
|
888
|
+
self.df = self.df.drop(columns=["smooth_log_return","smooth_market_log_return","diff_smooths","clip_diff_smooths"])
|
|
889
|
+
|
|
874
890
|
def calculate_cointegration(self,series_1, series_2):
|
|
875
891
|
"""
|
|
876
892
|
calculate cointegration score for two time series
|
|
@@ -1,6 +1,11 @@
|
|
|
1
|
+
import gc
|
|
2
|
+
|
|
1
3
|
from sklearn.base import BaseEstimator, TransformerMixin
|
|
2
4
|
import pandas as pd
|
|
3
5
|
import numpy as np
|
|
6
|
+
import statsmodels.api as sm
|
|
7
|
+
from patsy import dmatrix
|
|
8
|
+
import matplotlib.pyplot as plt
|
|
4
9
|
|
|
5
10
|
class InverseHyperbolicSine(BaseEstimator, TransformerMixin):
|
|
6
11
|
|
|
@@ -289,3 +294,108 @@ class InteractionFeatures(BaseEstimator, TransformerMixin):
|
|
|
289
294
|
fn = 'iterm_'+f1.replace("norm_","")+"_"+f2.replace("norm_","")
|
|
290
295
|
X = self.simple_div_interaction(X, f1, f2, fn)
|
|
291
296
|
return X
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
class SplineMarketReturnJumpWaves(BaseEstimator, TransformerMixin):
|
|
300
|
+
"""
|
|
301
|
+
Class that gets a feature returns and performs countings so that a spline regression model can be fitted
|
|
302
|
+
|
|
303
|
+
Attributes
|
|
304
|
+
----------
|
|
305
|
+
return_feature_names : list
|
|
306
|
+
list of the name of the features to apply spline regresion
|
|
307
|
+
target_variables : list
|
|
308
|
+
list of target features
|
|
309
|
+
feature_label : str
|
|
310
|
+
prefix for the new features.
|
|
311
|
+
sample_perc : float
|
|
312
|
+
sample size of the traninig data taking into consideration time
|
|
313
|
+
|
|
314
|
+
Methods
|
|
315
|
+
-------
|
|
316
|
+
fit(additional="", X=DataFrame, y=DataFrame):
|
|
317
|
+
fit transformation.
|
|
318
|
+
transform(X=DataFrame, y=None):
|
|
319
|
+
apply feature transformation
|
|
320
|
+
"""
|
|
321
|
+
|
|
322
|
+
def __init__(self, return_feature_names, target_variables, feature_label,
|
|
323
|
+
sample_perc=0.5,parts = 6, e_floor=-0.001,e_top=0.0001, d=3):
|
|
324
|
+
self.sample_perc = sample_perc
|
|
325
|
+
self.return_feature_names=return_feature_names
|
|
326
|
+
self.target_variables = target_variables
|
|
327
|
+
self.glms = dict()
|
|
328
|
+
self.feature_label = feature_label
|
|
329
|
+
self.parts = parts
|
|
330
|
+
self.e_floor = e_floor
|
|
331
|
+
self.e_top = e_top
|
|
332
|
+
self.d = d
|
|
333
|
+
def fit(self, X, y, plot = False):
|
|
334
|
+
#complete dataset with y
|
|
335
|
+
X_set=X.copy()
|
|
336
|
+
X_set[self.target_variables] = y
|
|
337
|
+
#sampling
|
|
338
|
+
if plot:
|
|
339
|
+
fig, ax = plt.subplots(len(self.return_feature_names),1)
|
|
340
|
+
for i,return_feature_name in enumerate(self.return_feature_names):
|
|
341
|
+
X_aggregated = (
|
|
342
|
+
X_set
|
|
343
|
+
.groupby("Date",as_index=False)
|
|
344
|
+
.agg(
|
|
345
|
+
count_target_up = ("target_up","sum"),
|
|
346
|
+
count_target_down = ("target_down","sum"),
|
|
347
|
+
return_feature = (return_feature_name,"max"),
|
|
348
|
+
)
|
|
349
|
+
.sort_values("Date",ascending=True)
|
|
350
|
+
.dropna()
|
|
351
|
+
.copy()
|
|
352
|
+
)
|
|
353
|
+
del X
|
|
354
|
+
gc.collect()
|
|
355
|
+
nlines = X_aggregated.shape[0]
|
|
356
|
+
threshold = int(round((1-nlines*self.sample_perc),0))
|
|
357
|
+
train_ = X_aggregated.iloc[:threshold,:]
|
|
358
|
+
self.glms[return_feature_name] = dict()
|
|
359
|
+
for target in self.target_variables:
|
|
360
|
+
X = train_[["return_feature"]].round(4).values.reshape(-1, 1)
|
|
361
|
+
y = np.log(train_.dropna()[f"count_{target}"].values + 1)
|
|
362
|
+
knot_str = self._get_knot(X)
|
|
363
|
+
transformed_x = dmatrix(f"bs(train, knots=({knot_str}), degree=3, include_intercept=False)", {"train": X}, return_type='dataframe')
|
|
364
|
+
model = sm.GLM(y, transformed_x).fit()
|
|
365
|
+
self.glms[return_feature_name][target] = {
|
|
366
|
+
"model":model,
|
|
367
|
+
}
|
|
368
|
+
if plot:
|
|
369
|
+
x_transfomed = dmatrix(f"bs(valid, knots=({knot_str}), degree={self.d}, include_intercept=False)", {"valid":X}, return_type='dataframe')
|
|
370
|
+
pred = model.predict(x_transfomed)
|
|
371
|
+
ax[i].scatter(X, np.exp(y),s=2,alpha=0.2)
|
|
372
|
+
ax[i].scatter(X, np.exp(pred), alpha=0.2, s=1)
|
|
373
|
+
#self.X_aggregated = X_aggregated
|
|
374
|
+
return self
|
|
375
|
+
|
|
376
|
+
def transform(self, X, y=None, plot =False):
|
|
377
|
+
if plot:
|
|
378
|
+
fig, ax = plt.subplots(len(self.return_feature_names),1)
|
|
379
|
+
for i, return_feature_name in enumerate(self.return_feature_names):
|
|
380
|
+
for target in self.target_variables:
|
|
381
|
+
model = self.glms[return_feature_name][target].get("model")
|
|
382
|
+
vect = X[return_feature_name]
|
|
383
|
+
knot_str = self._get_knot(vect)
|
|
384
|
+
X_transformed = dmatrix(f"bs(valid, knots=({knot_str}), degree={self.d}, include_intercept=False)",
|
|
385
|
+
{"valid":vect.fillna(0)},
|
|
386
|
+
return_type='dataframe')
|
|
387
|
+
X[f"{self.feature_label}_{return_feature_name}_{target}"] = model.predict(
|
|
388
|
+
X_transformed
|
|
389
|
+
)
|
|
390
|
+
if plot:
|
|
391
|
+
pred = model.predict(X_transformed)
|
|
392
|
+
ax[i].scatter(X, np.exp(pred), alpha=0.2, s=1)
|
|
393
|
+
return X
|
|
394
|
+
|
|
395
|
+
def _get_knot(self, input):
|
|
396
|
+
min_, max_ = np.min(input)-self.e_floor, np.max(input)+self.e_top
|
|
397
|
+
r = (max_ - min_)/self.parts
|
|
398
|
+
knot_tuple = [str(i*r+min_) for i,_ in enumerate(range(self.parts),start=0)]
|
|
399
|
+
knot_str = ",".join(knot_tuple)
|
|
400
|
+
knot_str = f"({knot_str})"
|
|
401
|
+
return knot_str
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
2
|
-
Name:
|
|
3
|
-
Version: 0.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: virgo_modules
|
|
3
|
+
Version: 0.8.0
|
|
4
4
|
Summary: data processing and statistical modeling using stock market data
|
|
5
5
|
Home-page: https://github.com/miguelmayhem92/virgo_module
|
|
6
6
|
Author: Miguel Mayhuire
|
|
@@ -13,7 +13,18 @@ Requires-Python: >=3.9
|
|
|
13
13
|
Description-Content-Type: text/markdown
|
|
14
14
|
License-File: LICENSE
|
|
15
15
|
Provides-Extra: dev
|
|
16
|
-
Requires-Dist: pytest
|
|
16
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
17
|
+
Dynamic: author
|
|
18
|
+
Dynamic: author-email
|
|
19
|
+
Dynamic: classifier
|
|
20
|
+
Dynamic: description
|
|
21
|
+
Dynamic: description-content-type
|
|
22
|
+
Dynamic: home-page
|
|
23
|
+
Dynamic: license
|
|
24
|
+
Dynamic: license-file
|
|
25
|
+
Dynamic: provides-extra
|
|
26
|
+
Dynamic: requires-python
|
|
27
|
+
Dynamic: summary
|
|
17
28
|
|
|
18
29
|
# Virgo Package
|
|
19
30
|
|
|
@@ -4,18 +4,19 @@ virgo_modules/src/aws_utils.py,sha256=q0l7D7ofo09Lu1QQjv-esheQ06uiSy1Pdq3xMul8zv
|
|
|
4
4
|
virgo_modules/src/backtester.py,sha256=OhiWyzDX0PthXGuhChyWUmDN3cLkzVYe95zS4nGtia8,22106
|
|
5
5
|
virgo_modules/src/hmm_utils.py,sha256=D7axAnCdSe1_1EgRyli2PAnM2f6699hTY9GcxjPXG-o,21221
|
|
6
6
|
virgo_modules/src/pull_artifacts.py,sha256=5OPrgR7pcMSdpbevDRhf0ebk7g7ZRjff4NpTIIWAKjE,1989
|
|
7
|
-
virgo_modules/src/re_utils.py,sha256=
|
|
8
|
-
virgo_modules/src/ticketer_source.py,sha256=
|
|
9
|
-
virgo_modules/src/transformer_utils.py,sha256=
|
|
7
|
+
virgo_modules/src/re_utils.py,sha256=GZCkAfgw2tVJRJ_Gw5Yewc14ebiE9wSImPiYQN8FsW0,75095
|
|
8
|
+
virgo_modules/src/ticketer_source.py,sha256=528WhGoANOm4IKnxGSWsbQxxUh3-qlZfvGRNAafMMcE,103883
|
|
9
|
+
virgo_modules/src/transformer_utils.py,sha256=SnYdtsFPnSF6u4UFIat0-X3-qVuUWvv_T46kiB-H0Sk,13682
|
|
10
10
|
virgo_modules/src/edge_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
11
|
virgo_modules/src/edge_utils/conformal_utils.py,sha256=cKm4KSM261Eu1FJn4oowKYiKIesW81VbqITIvopGSVk,5410
|
|
12
|
-
virgo_modules/src/edge_utils/edge_utils.py,sha256=
|
|
12
|
+
virgo_modules/src/edge_utils/edge_utils.py,sha256=4uXVWthzJDzkJ4Uq19ZYL9aPcA6CDUS3xYD4FY-a2AM,20018
|
|
13
|
+
virgo_modules/src/edge_utils/feature_selection.py,sha256=HYbQ0JLPDiRYhn-5-C438YEKbuNduDmuvboFC_VkHww,2453
|
|
13
14
|
virgo_modules/src/edge_utils/shap_utils.py,sha256=FgcHkfddvdFSeUqEubYa2ExRGVAWSthqK4b-eKagEmo,2333
|
|
14
15
|
virgo_modules/src/edge_utils/stack_model.py,sha256=QqE91uLo2KauGEj91AVNANB1xE7J4Fa49YOX7k5mFng,4257
|
|
15
16
|
virgo_modules/src/market/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
16
17
|
virgo_modules/src/market/market_tools.py,sha256=vBt66_7E3ANz7avzfeNw_RHMGvG9lh5PRhxmcf_Oyjc,6880
|
|
17
|
-
virgo_modules-0.
|
|
18
|
-
virgo_modules-0.
|
|
19
|
-
virgo_modules-0.
|
|
20
|
-
virgo_modules-0.
|
|
21
|
-
virgo_modules-0.
|
|
18
|
+
virgo_modules-0.8.0.dist-info/licenses/LICENSE,sha256=pNgFyCYgmimaw0o6V20JupZLROycAnOA_HDDh1tX2V4,1097
|
|
19
|
+
virgo_modules-0.8.0.dist-info/METADATA,sha256=sCkdOmbxrEEXvGUIwh6vIl_vIcue5C0BbvRtvP9yows,1122
|
|
20
|
+
virgo_modules-0.8.0.dist-info/WHEEL,sha256=lTU6B6eIfYoiQJTZNc-fyaR6BpL6ehTzU3xGYxn2n8k,91
|
|
21
|
+
virgo_modules-0.8.0.dist-info/top_level.txt,sha256=ZjI-qEkDtT-8mFwGAWnXfqPOKEGlIhWRW1es1VyXc60,14
|
|
22
|
+
virgo_modules-0.8.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|