virgo-modules 0.6.1__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of virgo-modules might be problematic. Click here for more details.

@@ -11,7 +11,14 @@ from feature_engine.imputation import MeanMedianImputer
11
11
  from feature_engine.discretisation import EqualWidthDiscretiser
12
12
  from feature_engine.datetime import DatetimeFeatures
13
13
 
14
- from ..transformer_utils import VirgoWinsorizerFeature, InverseHyperbolicSine, FeaturesEntropy, FeatureSelector, InteractionFeatures
14
+ from ..transformer_utils import (
15
+ VirgoWinsorizerFeature,
16
+ InverseHyperbolicSine,
17
+ FeaturesEntropy,
18
+ FeatureSelector,
19
+ InteractionFeatures,
20
+ SplineMarketReturnJumpWaves
21
+ )
15
22
 
16
23
  from plotly.subplots import make_subplots
17
24
  import plotly.graph_objects as go
@@ -223,6 +230,7 @@ def data_processing_pipeline_classifier(
223
230
  date_features_list = False,
224
231
  entropy_set_list = False,
225
232
  interaction_features_cont = False,
233
+ spline_regression_config = False,
226
234
  pipeline_order = 'selector//winzorizer//discretizer//median_inputer//drop//correlation'
227
235
  ):
228
236
 
@@ -254,7 +262,12 @@ def data_processing_pipeline_classifier(
254
262
  invhypersin_pipe = [('invhypervolsin scaler', InverseHyperbolicSine(features = invhypervolsin_features))] if invhypervolsin_features else []
255
263
  datetimeFeatures_pipe = [('date features', DatetimeFeatures(features_to_extract = date_features_list, variables = 'Date', drop_original = False))] if date_features_list else []
256
264
  interaction_features = [("interaction features", InteractionFeatures(interaction_features_cont[0], interaction_features_cont[1]))] if interaction_features_cont else []
257
-
265
+ spline_features = [("spline features", SplineMarketReturnJumpWaves(
266
+ return_feature_names=spline_regression_config.get("return_feature_names"),
267
+ target_variables=spline_regression_config.get("target_variables"),
268
+ feature_label=spline_regression_config.get("feature_label"),
269
+ ))] if spline_regression_config else []
270
+
258
271
  entropy_pipe = list()
259
272
  if entropy_set_list:
260
273
  for setx_ in entropy_set_list:
@@ -274,6 +287,7 @@ def data_processing_pipeline_classifier(
274
287
  'date_features': datetimeFeatures_pipe,
275
288
  'interaction_features': interaction_features,
276
289
  'entropy_features' : entropy_pipe,
290
+ "spline_features": spline_features,
277
291
  }
278
292
 
279
293
  pipeline_steps = pipeline_order.split('//')
@@ -0,0 +1,54 @@
1
+ import random
2
+
3
+ from numpy.random import choice
4
+ import numpy as np
5
+ from scipy import stats
6
+ from sklearn.feature_selection import RFE
7
+
8
+ class StackRFE:
9
+ def __init__(self, model, n_features, batch_elim, step_elim, cv, max_iterations):
10
+ self.model = model
11
+ self.n_features = n_features
12
+ self.batch_elim = batch_elim
13
+ self.step_elim = step_elim
14
+ self.cv = cv
15
+ self.max_iterations = max_iterations
16
+
17
+ def _suggest_elimination(self, uniform=False):
18
+ """
19
+ suggest based on mean ranking, lower the mean rank higher the prob to be selected
20
+ """
21
+ ds = self.feature_rankings
22
+ ds_mean = {k:np.mean(ds.get(k)) for k in ds}
23
+ max_ = np.max([x for x in ds_mean.values()])
24
+ ds_weight = {k: (max_-v+1) for k,v in ds_mean.items()}
25
+ sum_ = np.sum([x for x in ds_weight.values()])
26
+ ds_prob = {k: v/sum_ for k,v in ds_weight.items()}
27
+ result = list(choice(list(ds_prob.keys()), self.batch_elim,p=list(ds_prob.values()), replace=False))
28
+ if uniform:
29
+ features = list(ds_prob.keys())
30
+ random.shuffle(features)
31
+ result = features[0:self.batch_elim]
32
+ return result
33
+
34
+ def fit(self, X, y):
35
+ features = list(X.columns).copy()
36
+ self.feature_rankings = {f:[1] for f in features}
37
+ for iteration in range(self.max_iterations):
38
+ # shuffling
39
+ if random.random() > 0.5:
40
+ batch_features = self._suggest_elimination()
41
+ else:
42
+ batch_features = self._suggest_elimination()
43
+ # selector and elimination
44
+ tmp_feature_ranking = {k: list() for k in batch_features}
45
+ selector = RFE(self.model, n_features_to_select=self.n_features, step=self.step_elim)
46
+ for train_index, test_index in self.cv.split(X, y):
47
+ X_ = X[X.index.get_level_values('i').isin(train_index)][batch_features]
48
+ y_ = y[y.index.get_level_values('i').isin(train_index)]
49
+ selector = selector.fit(X_, y_)
50
+ for k,r in zip(tmp_feature_ranking.keys(), selector.ranking_):
51
+ tmp_feature_ranking[k].append(r)
52
+ rankings = [stats.mode(v).mode for v in tmp_feature_ranking.values()]
53
+ for f,r in zip(batch_features, rankings):
54
+ self.feature_rankings[f].append(r)
File without changes
@@ -0,0 +1,189 @@
1
+ import gc
2
+
3
+ import pandas as pd
4
+ import numpy as np
5
+
6
+ from sklearn.linear_model import HuberRegressor
7
+ from scipy import stats
8
+
9
+ import matplotlib.pyplot as plt
10
+ import seaborn as sns; sns.set()
11
+
12
+ from matplotlib import cm
13
+ import matplotlib.colors as mcolors
14
+
15
+ class MarketAnalysis:
16
+ """
17
+ Class that perform market analysis using robust linear regression
18
+
19
+ Attributes
20
+ ----------
21
+ data : pd.DataFrame
22
+ input data
23
+ market_features : list
24
+ list of market feature (log returns) to apply analysis
25
+ return_cols: str
26
+ main log return feature
27
+ col_map: dict
28
+ dictionary containing rename of market features
29
+
30
+ Methods
31
+ -------
32
+ compute_beta(data=pd.DataFrame, feature_x=str, feature_y=str):
33
+ compute betas given x and y using robust linear regression
34
+ get_correlation(data=pd.DataFrame, feature_x=str, feature_y=str):
35
+ compute correlation given x and y
36
+ produce_beta_report(data=pd.DataFrame):
37
+ produce beta report
38
+ compute_general_report(sample_size=int, offset=int, index=str, subsample_ts=int, show_plot=bool):
39
+ compute full report, global and latest window
40
+ """
41
+
42
+ def __init__(self, data, market_features, return_col, col_map=None):
43
+ self.data = data.dropna()
44
+ self.market_features = market_features
45
+ self.return_cols = return_col
46
+ self.col_map=col_map
47
+
48
+ def compute_beta(self, data, feature_x, feature_y):
49
+ """
50
+ compute betas given x and y using robust linear regression
51
+
52
+ Parameters
53
+ ----------
54
+ data (pd.DataFrame): input data containing analysis features
55
+ feature_x (str): name of the feature x
56
+ feature_y (str): name of the feature y
57
+
58
+ Returns
59
+ -------
60
+ (beta(str), alpha(str))
61
+ """
62
+ x = data[feature_x].values.reshape(-1,1)
63
+ y = data[feature_y].values.reshape(-1,1)
64
+ huber_regr = HuberRegressor(fit_intercept = True)
65
+ huber_regr.fit(x, y)
66
+ beta, alpha = huber_regr.coef_[0], huber_regr.intercept_
67
+ return beta, alpha
68
+
69
+ def get_correlation(self, data, feature_x, feature_y):
70
+ """
71
+ compute correlation given x and y
72
+
73
+ Parameters
74
+ ----------
75
+ data (pd.DataFrame): input data containing analysis features
76
+ feature_x (str): name of the feature x
77
+ feature_y (str): name of the feature y
78
+
79
+ Returns
80
+ -------
81
+ r (float)
82
+ """
83
+ x = data[feature_x]
84
+ y = data[feature_y]
85
+ r = stats.mstats.pearsonr(x, y)[0]
86
+ return r
87
+
88
+ def produce_beta_report(self, data):
89
+ """
90
+ produce beta report
91
+
92
+ Parameters
93
+ ----------
94
+ data (pd.DataFrame): input data containing analysis features
95
+
96
+ Returns
97
+ -------
98
+ report (pd.DataFrame)
99
+ """
100
+ result = {
101
+ "market_index": list(),
102
+ "beta": list(),
103
+ "alpha": list(),
104
+ "r": list()
105
+ }
106
+ for index in self.market_features:
107
+ beta, alpha = self.compute_beta( data, self.return_cols, index)
108
+ r = self.get_correlation( data, self.return_cols, index)
109
+ result["market_index"].append(index)
110
+ result["beta"].append(beta)
111
+ result["alpha"].append(alpha)
112
+ result["r"].append(r)
113
+ pd_result = pd.DataFrame(result)
114
+ pd_result = pd_result.sort_values("r", ascending=False)
115
+ if self.col_map:
116
+ pd_result["map_market_index"] = pd_result.market_index.map(self.col_map)
117
+ return pd_result
118
+
119
+ def compute_general_report(self, sample_size, offset, index=False, subsample_ts=False, show_plot=True):
120
+ """
121
+ compute full report, global and latest window
122
+
123
+ Parameters
124
+ ----------
125
+ sample_size (int): sample size for every beta computation
126
+ offset (int): offset or overlap between samples
127
+ index (str): if provided, bet fit index is taken
128
+ subsample_ts (int): subsample for iterative beta calculation
129
+ show_plot (bool): whether to show plot
130
+
131
+ Returns
132
+ -------
133
+ (report (pd.DataFrame), latest_report (pd.DataFrame), figure (mtpl.plt))
134
+ """
135
+ general_report = self.produce_beta_report(self.data)
136
+ current_report = self.produce_beta_report(self.data.iloc[sample_size:,:])
137
+ if not index:
138
+ index = general_report.head(1).market_index.values[0]
139
+ b = general_report[general_report.market_index == index].beta.values
140
+ a = general_report[general_report.market_index == index].alpha.values
141
+
142
+ figure, ax = plt.subplot_mosaic(
143
+ [["scatter_total", "scatter_sample",'ts','ts']],
144
+ layout="constrained",
145
+ figsize=(18, 5)
146
+ )
147
+ x = self.data[self.return_cols]
148
+ y = self.data[index]
149
+ ax['scatter_total'].scatter(x, y)
150
+ ax['scatter_total'].plot(x, b*x+a, color='red')
151
+
152
+ if subsample_ts:
153
+ merger_df = self.data.iloc[-subsample_ts:,:].copy()
154
+ else:
155
+ merger_df = self.data.copy()
156
+ ax['ts'].plot(merger_df.Date, merger_df.Close, color = 'grey', alpha = 0.3)
157
+ b_array = list()
158
+ for i in range(0,len(merger_df)-sample_size,offset):
159
+ merger_ = merger_df.sort_values('Date', ascending = False).iloc[i:i+sample_size,:]
160
+ b, a = self.compute_beta(merger_, self.return_cols, index)
161
+ x = merger_[self.return_cols]
162
+ y = merger_[index]
163
+ normalize_ = mcolors.Normalize(vmin=-2.0, vmax=2.0)
164
+ colormap_ = cm.jet
165
+ ax['scatter_sample'].plot(x, y,'o', color = 'blue', alpha = 0.1)
166
+ ax['scatter_sample'].plot(x, b*x+a, color=colormap_(normalize_(b)))
167
+ ax['scatter_sample'].set_xlim(-0.08, 0.08)
168
+ ax['scatter_sample'].set_ylim(-0.08, 0.08)
169
+ plot = ax['ts'].scatter(merger_.Date, merger_.Close, color=colormap_(normalize_(b)), s = 10)
170
+ b_array.append(b)
171
+ normalize_ = mcolors.Normalize(vmin=np.min(b_array), vmax=np.max(b_array))
172
+ colormap_ = cm.jet
173
+ x_global = self.data[self.return_cols]
174
+ scalarmappaple = cm.ScalarMappable(norm=normalize_, cmap=colormap_)
175
+ scalarmappaple.set_array(x_global)
176
+ if self.col_map:
177
+ map_index = self.col_map.get(index)
178
+ title = f'market analysis of {map_index}'
179
+ else:
180
+ title = f'market analysis'
181
+ plt.title(title)
182
+ plt.colorbar(scalarmappaple)
183
+ del merger_df
184
+ gc.collect()
185
+ if show_plot:
186
+ plt.show()
187
+ else:
188
+ plt.close()
189
+ return general_report, current_report, figure
@@ -1421,6 +1421,12 @@ def extract_data_traintest(object_stock,features_to_search,configs, target_confi
1421
1421
  last_signal_featlist = last_signal_featlist.split('//')
1422
1422
  if feature_name in last_signal_featlist:
1423
1423
  object_stock.compute_last_signal(feature_name, False)
1424
+ market_interaction_features = configs.get('custom_transformations',{}).get('market_interaction_features', False)
1425
+ if market_interaction_features:
1426
+ for stage in market_interaction_features.keys():
1427
+ method_to_use = market_interaction_features.get(stage).get("method")
1428
+ arguments_to_use = market_interaction_features.get(stage).get("parameters")
1429
+ getattr(object_stock, method_to_use)(**arguments_to_use)
1424
1430
  # geting targets
1425
1431
  object_stock.get_categorical_targets(**target_params_up)
1426
1432
  object_stock.df = object_stock.df.drop(columns = ['target_down']).rename(columns = {'target_up':'target_up_save'})
@@ -1,7 +1,7 @@
1
1
  import yfinance as yf
2
2
  import pandas as pd
3
3
  import numpy as np
4
- import json
4
+ import gc
5
5
 
6
6
  import matplotlib.pyplot as plt
7
7
  import matplotlib.gridspec as gridspec
@@ -138,6 +138,10 @@ class stock_eda_panel(object):
138
138
  perform analysis of lags of the mean rolling log return
139
139
  compute_clip_bands(feature_name=str,threshold=float):
140
140
  compute outlier detection for a given signal, Note that this follows mean reversion procedure and feature has to be stationary. Also botton and roof resulting signals is attached to the dataframe
141
+ extract_sec_data(symbol=str, base_columns=list(str), rename_columns=dict):
142
+ extract new asset data and merge it to the main asset data
143
+ lag_log_return(lags=int, feature=str, feature_name=str):
144
+ compute log return given some lags
141
145
  signal_plotter(feature_name=str):
142
146
  display analysis plot of a feature with high and low signals
143
147
  log_features_standard(feature_name=str):
@@ -667,6 +671,63 @@ class stock_eda_panel(object):
667
671
  self.df[f'signal_low_{feature_name}'] = np.where( (self.df[f'norm_{feature_name}'] < self.df[f'lower_{feature_name}'] ), 1, 0)
668
672
  self.df[f'signal_up_{feature_name}'] = np.where( (self.df[f'norm_{feature_name}'] > self.df[f'upper_{feature_name}'] ), 1, 0)
669
673
 
674
+ def extract_sec_data(self, symbol, base_columns, rename_columns=None):
675
+ """
676
+ extract new asset data and merge it to the main asset data
677
+
678
+ Parameters
679
+ ----------
680
+ symbol (str): symbol to extract data
681
+ base_columns (list): list of columns to persist
682
+ rename_columns (dict): map of the new column names using pd.DataFrame.rename()
683
+
684
+ Returns
685
+ -------
686
+ None
687
+ """
688
+ begin_date = self.today - relativedelta(days = self.n_days)
689
+ begin_date_str = begin_date.strftime('%Y-%m-%d')
690
+
691
+ stock = yf.Ticker(symbol)
692
+ df = stock.history(period=self.data_window)
693
+ df = df.sort_values('Date')
694
+ df.reset_index(inplace=True)
695
+ df['Date'] = pd.to_datetime(df['Date'], format='mixed',utc=True).dt.date
696
+ df['Date'] = pd.to_datetime(df['Date'])
697
+ df = df[df.Date >= begin_date_str ]
698
+ df = df[base_columns]
699
+ if rename_columns:
700
+ df = df.rename(columns=rename_columns)
701
+ right_df = df.copy()
702
+
703
+ dates_vector = self.df.Date.to_frame()
704
+ right_df = dates_vector.merge(right_df, on ='Date',how = 'left')
705
+ right_df = right_df.fillna(method = 'bfill')
706
+ right_df = right_df.fillna(method = 'ffill')
707
+
708
+ self.df = self.df.merge(right_df, on ='Date',how = 'left')
709
+ self.df = self.df.sort_values("Date")
710
+ del right_df
711
+ gc.collect()
712
+
713
+ def lag_log_return(self, lags, feature, feature_name=False):
714
+ """
715
+ compute log return given some lags
716
+
717
+ Parameters
718
+ ----------
719
+ lags (int): lag to apply log return
720
+ feature (str): feature to apply log return
721
+ feature_name (str): rename resuling name
722
+
723
+ Returns
724
+ -------
725
+ None
726
+ """
727
+
728
+ feature_name = feature_name if feature_name else f"{feature}_log_return"
729
+ self.df[feature_name] = np.log(self.df[feature]/self.df[feature].shift(lags))
730
+
670
731
  def signal_plotter(self, feature_name):
671
732
 
672
733
  """
@@ -748,13 +809,11 @@ class stock_eda_panel(object):
748
809
  .transform(lambda x: x.rolling(ma2, min_periods=1).mean())
749
810
  )
750
811
 
751
- print('--------------------------------------------------------------------')
752
812
  if save_features:
753
813
  self.log_features_standard(feature_name)
754
814
  self.settings_relative_spread_ma = {'ma1':ma1, 'ma2':ma2, 'threshold':threshold}
755
815
 
756
816
  if plot:
757
-
758
817
  self.signal_plotter(feature_name)
759
818
 
760
819
  def pair_feature(self, pair_symbol, plot = False):
@@ -810,6 +869,24 @@ class stock_eda_panel(object):
810
869
  plt.legend()
811
870
  plt.show()
812
871
 
872
+ def smooth_logrets_interaction_term(self, feature_interact_with, resulting_feature_name="persisted_clip_diff_smooths", rollmean_window = 5, ext_threhold=0.015, persist_days = 3, save_features=False):
873
+ """
874
+ create an interaction term that is going to compare the distance of asset wolling window mean and market rolling window mean.
875
+ then get the outliers or high values using abs and this value persist for some days
876
+ goal persist big differences of market and asset returns
877
+
878
+ feature_interact_with: name of the market return
879
+ rollmean_window: rolling window or smoothing number of days
880
+ ext_threhold: threshold
881
+ persist_days: number of days to persis the signal
882
+ """
883
+ self.df["smooth_log_return"] = self.df['log_return'].rolling(rollmean_window).mean().values
884
+ self.df["smooth_market_log_return"] = self.df[feature_interact_with].rolling(rollmean_window).mean().values
885
+ self.df["diff_smooths"] = self.df["smooth_market_log_return"]-self.df["smooth_log_return"]
886
+ self.df["clip_diff_smooths"] = np.where(np.abs(self.df["diff_smooths"]) > ext_threhold, self.df["diff_smooths"] , 0)
887
+ self.df[resulting_feature_name] = self.df['clip_diff_smooths'].rolling(persist_days).mean().values
888
+ self.df = self.df.drop(columns=["smooth_log_return","smooth_market_log_return","diff_smooths","clip_diff_smooths"])
889
+
813
890
  def calculate_cointegration(self,series_1, series_2):
814
891
  """
815
892
  calculate cointegration score for two time series
@@ -2304,33 +2381,3 @@ class analyse_index(stock_eda_panel):
2304
2381
 
2305
2382
  self.states_result = result
2306
2383
 
2307
- def get_relevant_beta(data_market, ticket_name, show_plot = True, save_path = False, save_aws = False, aws_credentials = False):
2308
- '''
2309
- select relevant beta result data of a given asset
2310
-
2311
- Parameters:
2312
- data_market (pd.DataFrame): dataframe of the market results
2313
- ticket_name (str): name of the asset
2314
- show_plot (bool): If tru, plot results
2315
- save_path (str): local path for saving e.g r'C:/path/to/the/file/'
2316
- save_aws (str): remote key in s3 bucket path e.g. 'path/to/file/'
2317
- aws_credentials (dict): dict of the aws credentials
2318
-
2319
- Returns:
2320
- selection (pd.DataFrame): dataframe of the most relevant beta
2321
- '''
2322
- all_betas = data_market[data_market.asset == ticket_name].sort_values('general_r', ascending = False)
2323
- all_betas['gen_r2'] = all_betas.general_r ** 2
2324
- all_betas['sampl_r2'] = all_betas.sample_r ** 2
2325
- selection = all_betas.sort_values('gen_r2',ascending =False).head(2).sort_values('sampl_r2',ascending =False).head(1).drop(columns = ['gen_r2','sampl_r2'])
2326
-
2327
- if show_plot:
2328
- print(selection)
2329
- if save_path:
2330
- result_plot_name = f'market_best_fit.csv'
2331
- selection.to_csv(save_path+result_plot_name)
2332
-
2333
- if save_path and save_aws:
2334
- # upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = f'market_plots/{ticket_name}/'+result_plot_name,input_path = save_path+result_plot_name)
2335
- upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = save_aws + result_plot_name, input_path = save_path + result_plot_name, aws_credentials = aws_credentials)
2336
- return selection
@@ -1,6 +1,11 @@
1
+ import gc
2
+
1
3
  from sklearn.base import BaseEstimator, TransformerMixin
2
4
  import pandas as pd
3
5
  import numpy as np
6
+ import statsmodels.api as sm
7
+ from patsy import dmatrix
8
+ import matplotlib.pyplot as plt
4
9
 
5
10
  class InverseHyperbolicSine(BaseEstimator, TransformerMixin):
6
11
 
@@ -289,3 +294,108 @@ class InteractionFeatures(BaseEstimator, TransformerMixin):
289
294
  fn = 'iterm_'+f1.replace("norm_","")+"_"+f2.replace("norm_","")
290
295
  X = self.simple_div_interaction(X, f1, f2, fn)
291
296
  return X
297
+
298
+
299
+ class SplineMarketReturnJumpWaves(BaseEstimator, TransformerMixin):
300
+ """
301
+ Class that gets a feature returns and performs countings so that a spline regression model can be fitted
302
+
303
+ Attributes
304
+ ----------
305
+ return_feature_names : list
306
+ list of the name of the features to apply spline regresion
307
+ target_variables : list
308
+ list of target features
309
+ feature_label : str
310
+ prefix for the new features.
311
+ sample_perc : float
312
+ sample size of the traninig data taking into consideration time
313
+
314
+ Methods
315
+ -------
316
+ fit(additional="", X=DataFrame, y=DataFrame):
317
+ fit transformation.
318
+ transform(X=DataFrame, y=None):
319
+ apply feature transformation
320
+ """
321
+
322
+ def __init__(self, return_feature_names, target_variables, feature_label,
323
+ sample_perc=0.5,parts = 6, e_floor=-0.001,e_top=0.0001, d=3):
324
+ self.sample_perc = sample_perc
325
+ self.return_feature_names=return_feature_names
326
+ self.target_variables = target_variables
327
+ self.glms = dict()
328
+ self.feature_label = feature_label
329
+ self.parts = parts
330
+ self.e_floor = e_floor
331
+ self.e_top = e_top
332
+ self.d = d
333
+ def fit(self, X, y, plot = False):
334
+ #complete dataset with y
335
+ X_set=X.copy()
336
+ X_set[self.target_variables] = y
337
+ #sampling
338
+ if plot:
339
+ fig, ax = plt.subplots(len(self.return_feature_names),1)
340
+ for i,return_feature_name in enumerate(self.return_feature_names):
341
+ X_aggregated = (
342
+ X_set
343
+ .groupby("Date",as_index=False)
344
+ .agg(
345
+ count_target_up = ("target_up","sum"),
346
+ count_target_down = ("target_down","sum"),
347
+ return_feature = (return_feature_name,"max"),
348
+ )
349
+ .sort_values("Date",ascending=True)
350
+ .dropna()
351
+ .copy()
352
+ )
353
+ del X
354
+ gc.collect()
355
+ nlines = X_aggregated.shape[0]
356
+ threshold = int(round((1-nlines*self.sample_perc),0))
357
+ train_ = X_aggregated.iloc[:threshold,:]
358
+ self.glms[return_feature_name] = dict()
359
+ for target in self.target_variables:
360
+ X = train_[["return_feature"]].round(4).values.reshape(-1, 1)
361
+ y = np.log(train_.dropna()[f"count_{target}"].values + 1)
362
+ knot_str = self._get_knot(X)
363
+ transformed_x = dmatrix(f"bs(train, knots=({knot_str}), degree=3, include_intercept=False)", {"train": X}, return_type='dataframe')
364
+ model = sm.GLM(y, transformed_x).fit()
365
+ self.glms[return_feature_name][target] = {
366
+ "model":model,
367
+ }
368
+ if plot:
369
+ x_transfomed = dmatrix(f"bs(valid, knots=({knot_str}), degree={self.d}, include_intercept=False)", {"valid":X}, return_type='dataframe')
370
+ pred = model.predict(x_transfomed)
371
+ ax[i].scatter(X, np.exp(y),s=2,alpha=0.2)
372
+ ax[i].scatter(X, np.exp(pred), alpha=0.2, s=1)
373
+ #self.X_aggregated = X_aggregated
374
+ return self
375
+
376
+ def transform(self, X, y=None, plot =False):
377
+ if plot:
378
+ fig, ax = plt.subplots(len(self.return_feature_names),1)
379
+ for i, return_feature_name in enumerate(self.return_feature_names):
380
+ for target in self.target_variables:
381
+ model = self.glms[return_feature_name][target].get("model")
382
+ vect = X[return_feature_name]
383
+ knot_str = self._get_knot(vect)
384
+ X_transformed = dmatrix(f"bs(valid, knots=({knot_str}), degree={self.d}, include_intercept=False)",
385
+ {"valid":vect.fillna(0)},
386
+ return_type='dataframe')
387
+ X[f"{self.feature_label}_{return_feature_name}_{target}"] = model.predict(
388
+ X_transformed
389
+ )
390
+ if plot:
391
+ pred = model.predict(X_transformed)
392
+ ax[i].scatter(X, np.exp(pred), alpha=0.2, s=1)
393
+ return X
394
+
395
+ def _get_knot(self, input):
396
+ min_, max_ = np.min(input)-self.e_floor, np.max(input)+self.e_top
397
+ r = (max_ - min_)/self.parts
398
+ knot_tuple = [str(i*r+min_) for i,_ in enumerate(range(self.parts),start=0)]
399
+ knot_str = ",".join(knot_tuple)
400
+ knot_str = f"({knot_str})"
401
+ return knot_str
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
2
- Name: virgo-modules
3
- Version: 0.6.1
1
+ Metadata-Version: 2.4
2
+ Name: virgo_modules
3
+ Version: 0.8.0
4
4
  Summary: data processing and statistical modeling using stock market data
5
5
  Home-page: https://github.com/miguelmayhem92/virgo_module
6
6
  Author: Miguel Mayhuire
@@ -13,7 +13,18 @@ Requires-Python: >=3.9
13
13
  Description-Content-Type: text/markdown
14
14
  License-File: LICENSE
15
15
  Provides-Extra: dev
16
- Requires-Dist: pytest >=7.0 ; extra == 'dev'
16
+ Requires-Dist: pytest>=7.0; extra == "dev"
17
+ Dynamic: author
18
+ Dynamic: author-email
19
+ Dynamic: classifier
20
+ Dynamic: description
21
+ Dynamic: description-content-type
22
+ Dynamic: home-page
23
+ Dynamic: license
24
+ Dynamic: license-file
25
+ Dynamic: provides-extra
26
+ Dynamic: requires-python
27
+ Dynamic: summary
17
28
 
18
29
  # Virgo Package
19
30
 
@@ -0,0 +1,22 @@
1
+ virgo_modules/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ virgo_modules/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ virgo_modules/src/aws_utils.py,sha256=q0l7D7ofo09Lu1QQjv-esheQ06uiSy1Pdq3xMul8zvk,2571
4
+ virgo_modules/src/backtester.py,sha256=OhiWyzDX0PthXGuhChyWUmDN3cLkzVYe95zS4nGtia8,22106
5
+ virgo_modules/src/hmm_utils.py,sha256=D7axAnCdSe1_1EgRyli2PAnM2f6699hTY9GcxjPXG-o,21221
6
+ virgo_modules/src/pull_artifacts.py,sha256=5OPrgR7pcMSdpbevDRhf0ebk7g7ZRjff4NpTIIWAKjE,1989
7
+ virgo_modules/src/re_utils.py,sha256=GZCkAfgw2tVJRJ_Gw5Yewc14ebiE9wSImPiYQN8FsW0,75095
8
+ virgo_modules/src/ticketer_source.py,sha256=528WhGoANOm4IKnxGSWsbQxxUh3-qlZfvGRNAafMMcE,103883
9
+ virgo_modules/src/transformer_utils.py,sha256=SnYdtsFPnSF6u4UFIat0-X3-qVuUWvv_T46kiB-H0Sk,13682
10
+ virgo_modules/src/edge_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
+ virgo_modules/src/edge_utils/conformal_utils.py,sha256=cKm4KSM261Eu1FJn4oowKYiKIesW81VbqITIvopGSVk,5410
12
+ virgo_modules/src/edge_utils/edge_utils.py,sha256=4uXVWthzJDzkJ4Uq19ZYL9aPcA6CDUS3xYD4FY-a2AM,20018
13
+ virgo_modules/src/edge_utils/feature_selection.py,sha256=HYbQ0JLPDiRYhn-5-C438YEKbuNduDmuvboFC_VkHww,2453
14
+ virgo_modules/src/edge_utils/shap_utils.py,sha256=FgcHkfddvdFSeUqEubYa2ExRGVAWSthqK4b-eKagEmo,2333
15
+ virgo_modules/src/edge_utils/stack_model.py,sha256=QqE91uLo2KauGEj91AVNANB1xE7J4Fa49YOX7k5mFng,4257
16
+ virgo_modules/src/market/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
+ virgo_modules/src/market/market_tools.py,sha256=vBt66_7E3ANz7avzfeNw_RHMGvG9lh5PRhxmcf_Oyjc,6880
18
+ virgo_modules-0.8.0.dist-info/licenses/LICENSE,sha256=pNgFyCYgmimaw0o6V20JupZLROycAnOA_HDDh1tX2V4,1097
19
+ virgo_modules-0.8.0.dist-info/METADATA,sha256=sCkdOmbxrEEXvGUIwh6vIl_vIcue5C0BbvRtvP9yows,1122
20
+ virgo_modules-0.8.0.dist-info/WHEEL,sha256=lTU6B6eIfYoiQJTZNc-fyaR6BpL6ehTzU3xGYxn2n8k,91
21
+ virgo_modules-0.8.0.dist-info/top_level.txt,sha256=ZjI-qEkDtT-8mFwGAWnXfqPOKEGlIhWRW1es1VyXc60,14
22
+ virgo_modules-0.8.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.41.2)
2
+ Generator: setuptools (78.1.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,19 +0,0 @@
1
- virgo_modules/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- virgo_modules/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- virgo_modules/src/aws_utils.py,sha256=q0l7D7ofo09Lu1QQjv-esheQ06uiSy1Pdq3xMul8zvk,2571
4
- virgo_modules/src/backtester.py,sha256=OhiWyzDX0PthXGuhChyWUmDN3cLkzVYe95zS4nGtia8,22106
5
- virgo_modules/src/hmm_utils.py,sha256=D7axAnCdSe1_1EgRyli2PAnM2f6699hTY9GcxjPXG-o,21221
6
- virgo_modules/src/pull_artifacts.py,sha256=5OPrgR7pcMSdpbevDRhf0ebk7g7ZRjff4NpTIIWAKjE,1989
7
- virgo_modules/src/re_utils.py,sha256=DBY_VBB1wKm5D7znutpF_66CTLZhJfx54h8Ws0YzdN4,74641
8
- virgo_modules/src/ticketer_source.py,sha256=jxP-OOeoyN2JxRQg-mX6t6WNJXiIrhWKDywDxpYANxU,101977
9
- virgo_modules/src/transformer_utils.py,sha256=ysCUp3cB3_7Jr9OHDqhg2_6Vu0k1YVjfqbvQNbxpbhI,8990
10
- virgo_modules/src/edge_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
- virgo_modules/src/edge_utils/conformal_utils.py,sha256=cKm4KSM261Eu1FJn4oowKYiKIesW81VbqITIvopGSVk,5410
12
- virgo_modules/src/edge_utils/edge_utils.py,sha256=7nYPLDNyKqeKIuOOwQi4wsBibzs9gP1HgYMISXJX1Y8,19522
13
- virgo_modules/src/edge_utils/shap_utils.py,sha256=FgcHkfddvdFSeUqEubYa2ExRGVAWSthqK4b-eKagEmo,2333
14
- virgo_modules/src/edge_utils/stack_model.py,sha256=QqE91uLo2KauGEj91AVNANB1xE7J4Fa49YOX7k5mFng,4257
15
- virgo_modules-0.6.1.dist-info/LICENSE,sha256=pNgFyCYgmimaw0o6V20JupZLROycAnOA_HDDh1tX2V4,1097
16
- virgo_modules-0.6.1.dist-info/METADATA,sha256=9EtSQrm2xy6-S4wGgWwWbL5V7yz-8BV6TlK3G18LyoM,876
17
- virgo_modules-0.6.1.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
18
- virgo_modules-0.6.1.dist-info/top_level.txt,sha256=ZjI-qEkDtT-8mFwGAWnXfqPOKEGlIhWRW1es1VyXc60,14
19
- virgo_modules-0.6.1.dist-info/RECORD,,