virgo-modules 0.0.87__py3-none-any.whl → 0.0.89__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of virgo-modules might be problematic. Click here for more details.

@@ -700,7 +700,11 @@ def get_data(ticker_name:str, ticket_settings:dict, n_days:int = False, hmm_avai
700
700
  'stochastic_feature':'stochastic_feature',
701
701
  'william_feature':'william_feature',
702
702
  'vortex_feature':'vortex_feature',
703
- 'pair_index_feature':'pair_index_feature' # this has a diff structure!
703
+ 'pair_index_feature':'pair_index_feature', # this has a diff structure!
704
+ 'min_distance_pricefeature':'minmax_pricefeature',
705
+ 'min_relprice_pricefeature':'minmax_pricefeature',
706
+ 'max_distance_pricefeature':'minmax_pricefeature',
707
+ 'max_relprice_pricefeature':'minmax_pricefeature',
704
708
  }
705
709
  exceptions = ['pair_feature','pair_index_feature']
706
710
  ### standar feature
@@ -147,6 +147,109 @@ class FeatureSelector(BaseEstimator, TransformerMixin):
147
147
  def transform(self, X, y=None):
148
148
  return X[self.columns]
149
149
 
150
+ class features_entropy(BaseEstimator, TransformerMixin):
151
+ """
152
+ Class that creates a feature that calculate entropy for a given feature classes, but it might get some leackeage in the training set.
153
+ this class is compatible with scikitlearn pipeline
154
+
155
+ Attributes
156
+ ----------
157
+ columns : list
158
+ list of features to select
159
+ entropy_map: pd.DataFrame
160
+ dataframe of the map with the entropies per class
161
+ perc: float
162
+ percentage of the dates using for calculate the entropy map
163
+
164
+ Methods
165
+ -------
166
+ fit(additional="", X=DataFrame, y=None):
167
+ fit transformation.
168
+ transform(X=DataFrame, y=None):
169
+ apply feature transformation
170
+ """
171
+
172
+ def __init__(self, features, target, feature_name = None, feature_type = 'discrete', perc = 0.5, default_null = 0.99):
173
+
174
+ self.features = features
175
+ self.feature_type = feature_type
176
+ self.target = target
177
+ self.perc = perc
178
+ self.default_null = default_null
179
+
180
+ if not feature_name:
181
+ self.feature_name = '_'.join(features)
182
+ self.feature_name = self.feature_name + '_' + target + '_' + feature_type
183
+ else:
184
+ self.feature_name = feature_name
185
+
186
+ def fit(self, X, y=None):
187
+
188
+ unique_dates = list(X['Date'].unique())
189
+ unique_dates.sort()
190
+
191
+ total_length = len(unique_dates)
192
+ cut = int(round(total_length*self.perc,0))
193
+ train_dates = unique_dates[:cut]
194
+ max_train_date = max(train_dates)
195
+
196
+ X_ = X[X['Date'] <= max_train_date]
197
+ df = pd.merge(X_, y, left_index=True, right_index=True, how = 'left').copy()
198
+
199
+ column_list = [f'{self.feature_type}_signal_{colx}' for colx in self.features]
200
+
201
+ df_aggr = (
202
+ df
203
+ .groupby(column_list, as_index = False)
204
+ .apply(
205
+ lambda x: pd.Series(
206
+ dict(
207
+ counts = x[self.target].count(),
208
+ trues=(x[self.target] == 1).sum(),
209
+ falses=(x[self.target] == 0).sum(),
210
+ )
211
+ )
212
+ )
213
+ .assign(
214
+ trues_rate=lambda x: x['trues'] / x['counts']
215
+ )
216
+ .assign(
217
+ falses_rate=lambda x: x['falses'] / x['counts']
218
+ )
219
+ .assign(
220
+ log2_trues = lambda x: np.log2(1/x['trues_rate'])
221
+ )
222
+ .assign(
223
+ log2_falses = lambda x: np.log2(1/x['falses_rate'])
224
+ )
225
+ .assign(
226
+ comp1 = lambda x: x['trues_rate']*x['log2_trues']
227
+ )
228
+ .assign(
229
+ comp2 = lambda x: x['falses_rate']*x['log2_falses']
230
+ )
231
+ .assign(
232
+ class_entropy = lambda x: np.round(x['comp1']+x['comp2'],3)
233
+ )
234
+ )
235
+
236
+ self.column_list = column_list
237
+ self.entropy_map = (
238
+ df_aggr
239
+ [column_list+['class_entropy']]
240
+ .rename(columns = {'class_entropy': self.feature_name})
241
+ .copy()
242
+ )
243
+
244
+ del df, df_aggr
245
+ return self
246
+
247
+ def transform(self, X, y=None):
248
+
249
+ X = X.merge(self.entropy_map, on=self.column_list, how = 'left')
250
+ X[self.feature_name] = X[self.feature_name].fillna(self.default_null)
251
+ return X
252
+
150
253
  def sharpe_ratio(return_series):
151
254
 
152
255
  '''
@@ -383,6 +486,8 @@ class stock_eda_panel(object):
383
486
  perfom fast stochastic oscilator or william indicator
384
487
  vortex_feature(window=int, threshold=float, plot=boolean, save_features=boolean):
385
488
  perform vortex oscilator
489
+ minmax_pricefeature(type_func=str, window=int, distance=bolean, save_features=boolean)
490
+ get relative price/ distance feature with respect to the min/max price in a given window
386
491
  pair_index_feature(pair_symbol=str, feature_label=str, window=int, threshold=float, plot=boolean, save_features=boolean):
387
492
  perform additional asset ROC feature, then a new feature is created in the main dataframe
388
493
  produce_order_features(feature_name=str, save_features=boolean):
@@ -1698,6 +1803,70 @@ class stock_eda_panel(object):
1698
1803
  if plot:
1699
1804
  self.signal_plotter(feature_name)
1700
1805
 
1806
+ def minmax_pricefeature(self, type_func, window, distance = False, save_features = False):
1807
+ """
1808
+ perform relative price/distance with respect to the min/max price in a given time scope
1809
+
1810
+ Parameters
1811
+ ----------
1812
+ type_func (str): either min or max
1813
+ window (int): window scope
1814
+ distance (boolean): if true, get distance feature else relative feature
1815
+ save_features (boolean): True to save feature configuration and feature names
1816
+
1817
+ Returns
1818
+ -------
1819
+ None
1820
+ """
1821
+ if type_func == 'min':
1822
+ self.df['Price_ref'] = self.df[['Open','High', 'Low','Close']].min(axis = 1)
1823
+ elif type_func == 'max':
1824
+ self.df['Price_ref'] = self.df[['Open','High', 'Low','Close']].max(axis = 1)
1825
+
1826
+ init_shape = self.df.shape[0]
1827
+ df_date = self.df[['Date','Price_ref']].rename(columns = {'Date':'Date_ref'}).copy()
1828
+
1829
+ self.df = self.df.rename(columns = {'Price_ref':'Price_to_use'})
1830
+
1831
+ if type_func == 'min':
1832
+ self.df[f'window_price'] = (self.df.sort_values("Date")["Price_to_use"].transform(lambda x: x.rolling(window, min_periods=1).min()))
1833
+ elif type_func == 'max':
1834
+ self.df[f'window_price'] = (self.df.sort_values("Date")["Price_to_use"].transform(lambda x: x.rolling(window, min_periods=1).max()))
1835
+
1836
+
1837
+ self.df = self.df.merge(df_date, left_on = 'window_price', right_on = 'Price_ref', how = 'left')
1838
+ self.df['date_span'] = self.df['Date'] - self.df['Date_ref']
1839
+
1840
+ self.df['RN'] = self.df.sort_values(['date_span'], ascending=False).groupby(['Date']).cumcount() + 1
1841
+ self.df = self.df[self.df['RN'] == 1]
1842
+
1843
+ if distance:
1844
+ self.df[f'{type_func}_distance_to_price'] = pd.to_numeric(self.df['date_span'].dt.days, downcast='integer')
1845
+
1846
+ if not distance:
1847
+ if type_func == 'min':
1848
+ self.df[f'{type_func}_relprice'] = self.df['Price_to_use']/self.df['window_price']-1
1849
+
1850
+ if type_func == 'max':
1851
+ self.df[f'{type_func}_relprice'] = self.df['window_price']/self.df['Price_to_use']-1
1852
+
1853
+ self.df = self.df.drop(columns = ['RN', 'date_span', 'Price_to_use', 'window_price', 'Date_ref','Price_ref'])
1854
+
1855
+ end_shape = self.df.shape[0]
1856
+
1857
+ if init_shape != end_shape:
1858
+ raise Exception("shapes are not the same")
1859
+
1860
+ if save_features:
1861
+ if distance:
1862
+ self.features.append(f'{type_func}_distance_to_price')
1863
+ name_attr = f'{type_func}_distance'
1864
+ if not distance:
1865
+ self.features.append(f'{type_func}_relprice')
1866
+ name_attr = f'{type_func}_relprice'
1867
+
1868
+ setattr(self,f'settings_{name_attr}_pricefeature' , {'type_func': type_func, 'window': window, 'distance': distance})
1869
+
1701
1870
  def pair_index_feature(self, pair_symbol, feature_label, window, threshold, plot = False, save_features = False):
1702
1871
  """
1703
1872
  perform additional asset ROC feature, then a new feature is created in the main dataframe
@@ -2297,7 +2466,9 @@ class stock_eda_panel(object):
2297
2466
  ## for now this is hard coded
2298
2467
  feature_list = ['spread_ma','relative_spread_ma','pair_feature','count_features','bidirect_count_features','price_range','relative_price_range','rsi_feature',
2299
2468
  'rsi_feature_v2', 'days_features','days_features_v2', 'volume_feature','smooth_volume', 'roc_feature', 'stoch_feature', 'stochastic_feature',
2300
- 'william_feature', 'vortex_feature', 'pair_index_feature','hmm']
2469
+ 'william_feature', 'vortex_feature', 'pair_index_feature','hmm',
2470
+ 'min_distance_pricefeature', 'min_relprice_pricefeature', 'max_distance_pricefeature','max_relprice_pricefeature'
2471
+ ]
2301
2472
 
2302
2473
  for feature in feature_list:
2303
2474
  try:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: virgo-modules
3
- Version: 0.0.87
3
+ Version: 0.0.89
4
4
  Summary: data processing and statistical modeling using stock market data
5
5
  Home-page: https://github.com/miguelmayhem92/virgo_module
6
6
  Author: Miguel Mayhuire
@@ -3,10 +3,10 @@ virgo_modules/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
3
3
  virgo_modules/src/aws_utils.py,sha256=q0l7D7ofo09Lu1QQjv-esheQ06uiSy1Pdq3xMul8zvk,2571
4
4
  virgo_modules/src/edge_utils.py,sha256=tMpt0bfnoOyD_qqh4wD6TQeOhaMcGE59DbvIj3qnp-0,13732
5
5
  virgo_modules/src/pull_artifacts.py,sha256=5OPrgR7pcMSdpbevDRhf0ebk7g7ZRjff4NpTIIWAKjE,1989
6
- virgo_modules/src/re_utils.py,sha256=aSG1w5Xaikd9ThG2HAkP_bs43Mvy7oEyfPOPJ9NvQM4,72002
7
- virgo_modules/src/ticketer_source.py,sha256=cEGgago1bl3tynRND30jqfiPWxF-KTTgiN9DRTbyB_k,143298
8
- virgo_modules-0.0.87.dist-info/LICENSE,sha256=pNgFyCYgmimaw0o6V20JupZLROycAnOA_HDDh1tX2V4,1097
9
- virgo_modules-0.0.87.dist-info/METADATA,sha256=ih3-A9lxvlupVE5SSJR-Z5S-1LGS-clC1ZfiEpKHZs0,1429
10
- virgo_modules-0.0.87.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
11
- virgo_modules-0.0.87.dist-info/top_level.txt,sha256=ZjI-qEkDtT-8mFwGAWnXfqPOKEGlIhWRW1es1VyXc60,14
12
- virgo_modules-0.0.87.dist-info/RECORD,,
6
+ virgo_modules/src/re_utils.py,sha256=ndPUW3F0QkljtKLR1dqtBm2I2LtceduSgLRIk3HszWk,72244
7
+ virgo_modules/src/ticketer_source.py,sha256=QHNC4ZP8SdGpozUPcQSVCtazyacGVxvVKrcG-RlCaJA,150133
8
+ virgo_modules-0.0.89.dist-info/LICENSE,sha256=pNgFyCYgmimaw0o6V20JupZLROycAnOA_HDDh1tX2V4,1097
9
+ virgo_modules-0.0.89.dist-info/METADATA,sha256=5S5faZ4HzTuFPEU9JffN48OFVwmhR0gviqCPMm68cX8,1429
10
+ virgo_modules-0.0.89.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
11
+ virgo_modules-0.0.89.dist-info/top_level.txt,sha256=ZjI-qEkDtT-8mFwGAWnXfqPOKEGlIhWRW1es1VyXc60,14
12
+ virgo_modules-0.0.89.dist-info/RECORD,,