virgo-modules 0.0.87__tar.gz → 0.0.89__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of virgo-modules might be problematic. Click here for more details.
- {virgo_modules-0.0.87 → virgo_modules-0.0.89}/PKG-INFO +1 -1
- {virgo_modules-0.0.87 → virgo_modules-0.0.89}/setup.py +1 -1
- {virgo_modules-0.0.87 → virgo_modules-0.0.89}/virgo_app/virgo_modules/src/re_utils.py +5 -1
- {virgo_modules-0.0.87 → virgo_modules-0.0.89}/virgo_app/virgo_modules/src/ticketer_source.py +172 -1
- {virgo_modules-0.0.87 → virgo_modules-0.0.89}/virgo_app/virgo_modules.egg-info/PKG-INFO +1 -1
- {virgo_modules-0.0.87 → virgo_modules-0.0.89}/LICENSE +0 -0
- {virgo_modules-0.0.87 → virgo_modules-0.0.89}/README.md +0 -0
- {virgo_modules-0.0.87 → virgo_modules-0.0.89}/setup.cfg +0 -0
- {virgo_modules-0.0.87 → virgo_modules-0.0.89}/virgo_app/virgo_modules/__init__.py +0 -0
- {virgo_modules-0.0.87 → virgo_modules-0.0.89}/virgo_app/virgo_modules/src/__init__.py +0 -0
- {virgo_modules-0.0.87 → virgo_modules-0.0.89}/virgo_app/virgo_modules/src/aws_utils.py +0 -0
- {virgo_modules-0.0.87 → virgo_modules-0.0.89}/virgo_app/virgo_modules/src/edge_utils.py +0 -0
- {virgo_modules-0.0.87 → virgo_modules-0.0.89}/virgo_app/virgo_modules/src/pull_artifacts.py +0 -0
- {virgo_modules-0.0.87 → virgo_modules-0.0.89}/virgo_app/virgo_modules.egg-info/SOURCES.txt +0 -0
- {virgo_modules-0.0.87 → virgo_modules-0.0.89}/virgo_app/virgo_modules.egg-info/dependency_links.txt +0 -0
- {virgo_modules-0.0.87 → virgo_modules-0.0.89}/virgo_app/virgo_modules.egg-info/requires.txt +0 -0
- {virgo_modules-0.0.87 → virgo_modules-0.0.89}/virgo_app/virgo_modules.egg-info/top_level.txt +0 -0
|
@@ -5,7 +5,7 @@ with open("virgo_app/README.md", "r") as f:
|
|
|
5
5
|
|
|
6
6
|
setup(
|
|
7
7
|
name="virgo_modules",
|
|
8
|
-
version="0.0.
|
|
8
|
+
version="0.0.89",
|
|
9
9
|
description="data processing and statistical modeling using stock market data",
|
|
10
10
|
package_dir={"": "virgo_app"},
|
|
11
11
|
packages=find_packages(where="virgo_app"),
|
|
@@ -700,7 +700,11 @@ def get_data(ticker_name:str, ticket_settings:dict, n_days:int = False, hmm_avai
|
|
|
700
700
|
'stochastic_feature':'stochastic_feature',
|
|
701
701
|
'william_feature':'william_feature',
|
|
702
702
|
'vortex_feature':'vortex_feature',
|
|
703
|
-
'pair_index_feature':'pair_index_feature' # this has a diff structure!
|
|
703
|
+
'pair_index_feature':'pair_index_feature', # this has a diff structure!
|
|
704
|
+
'min_distance_pricefeature':'minmax_pricefeature',
|
|
705
|
+
'min_relprice_pricefeature':'minmax_pricefeature',
|
|
706
|
+
'max_distance_pricefeature':'minmax_pricefeature',
|
|
707
|
+
'max_relprice_pricefeature':'minmax_pricefeature',
|
|
704
708
|
}
|
|
705
709
|
exceptions = ['pair_feature','pair_index_feature']
|
|
706
710
|
### standar feature
|
{virgo_modules-0.0.87 → virgo_modules-0.0.89}/virgo_app/virgo_modules/src/ticketer_source.py
RENAMED
|
@@ -147,6 +147,109 @@ class FeatureSelector(BaseEstimator, TransformerMixin):
|
|
|
147
147
|
def transform(self, X, y=None):
|
|
148
148
|
return X[self.columns]
|
|
149
149
|
|
|
150
|
+
class features_entropy(BaseEstimator, TransformerMixin):
|
|
151
|
+
"""
|
|
152
|
+
Class that creates a feature that calculate entropy for a given feature classes, but it might get some leackeage in the training set.
|
|
153
|
+
this class is compatible with scikitlearn pipeline
|
|
154
|
+
|
|
155
|
+
Attributes
|
|
156
|
+
----------
|
|
157
|
+
columns : list
|
|
158
|
+
list of features to select
|
|
159
|
+
entropy_map: pd.DataFrame
|
|
160
|
+
dataframe of the map with the entropies per class
|
|
161
|
+
perc: float
|
|
162
|
+
percentage of the dates using for calculate the entropy map
|
|
163
|
+
|
|
164
|
+
Methods
|
|
165
|
+
-------
|
|
166
|
+
fit(additional="", X=DataFrame, y=None):
|
|
167
|
+
fit transformation.
|
|
168
|
+
transform(X=DataFrame, y=None):
|
|
169
|
+
apply feature transformation
|
|
170
|
+
"""
|
|
171
|
+
|
|
172
|
+
def __init__(self, features, target, feature_name = None, feature_type = 'discrete', perc = 0.5, default_null = 0.99):
|
|
173
|
+
|
|
174
|
+
self.features = features
|
|
175
|
+
self.feature_type = feature_type
|
|
176
|
+
self.target = target
|
|
177
|
+
self.perc = perc
|
|
178
|
+
self.default_null = default_null
|
|
179
|
+
|
|
180
|
+
if not feature_name:
|
|
181
|
+
self.feature_name = '_'.join(features)
|
|
182
|
+
self.feature_name = self.feature_name + '_' + target + '_' + feature_type
|
|
183
|
+
else:
|
|
184
|
+
self.feature_name = feature_name
|
|
185
|
+
|
|
186
|
+
def fit(self, X, y=None):
|
|
187
|
+
|
|
188
|
+
unique_dates = list(X['Date'].unique())
|
|
189
|
+
unique_dates.sort()
|
|
190
|
+
|
|
191
|
+
total_length = len(unique_dates)
|
|
192
|
+
cut = int(round(total_length*self.perc,0))
|
|
193
|
+
train_dates = unique_dates[:cut]
|
|
194
|
+
max_train_date = max(train_dates)
|
|
195
|
+
|
|
196
|
+
X_ = X[X['Date'] <= max_train_date]
|
|
197
|
+
df = pd.merge(X_, y, left_index=True, right_index=True, how = 'left').copy()
|
|
198
|
+
|
|
199
|
+
column_list = [f'{self.feature_type}_signal_{colx}' for colx in self.features]
|
|
200
|
+
|
|
201
|
+
df_aggr = (
|
|
202
|
+
df
|
|
203
|
+
.groupby(column_list, as_index = False)
|
|
204
|
+
.apply(
|
|
205
|
+
lambda x: pd.Series(
|
|
206
|
+
dict(
|
|
207
|
+
counts = x[self.target].count(),
|
|
208
|
+
trues=(x[self.target] == 1).sum(),
|
|
209
|
+
falses=(x[self.target] == 0).sum(),
|
|
210
|
+
)
|
|
211
|
+
)
|
|
212
|
+
)
|
|
213
|
+
.assign(
|
|
214
|
+
trues_rate=lambda x: x['trues'] / x['counts']
|
|
215
|
+
)
|
|
216
|
+
.assign(
|
|
217
|
+
falses_rate=lambda x: x['falses'] / x['counts']
|
|
218
|
+
)
|
|
219
|
+
.assign(
|
|
220
|
+
log2_trues = lambda x: np.log2(1/x['trues_rate'])
|
|
221
|
+
)
|
|
222
|
+
.assign(
|
|
223
|
+
log2_falses = lambda x: np.log2(1/x['falses_rate'])
|
|
224
|
+
)
|
|
225
|
+
.assign(
|
|
226
|
+
comp1 = lambda x: x['trues_rate']*x['log2_trues']
|
|
227
|
+
)
|
|
228
|
+
.assign(
|
|
229
|
+
comp2 = lambda x: x['falses_rate']*x['log2_falses']
|
|
230
|
+
)
|
|
231
|
+
.assign(
|
|
232
|
+
class_entropy = lambda x: np.round(x['comp1']+x['comp2'],3)
|
|
233
|
+
)
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
self.column_list = column_list
|
|
237
|
+
self.entropy_map = (
|
|
238
|
+
df_aggr
|
|
239
|
+
[column_list+['class_entropy']]
|
|
240
|
+
.rename(columns = {'class_entropy': self.feature_name})
|
|
241
|
+
.copy()
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
del df, df_aggr
|
|
245
|
+
return self
|
|
246
|
+
|
|
247
|
+
def transform(self, X, y=None):
|
|
248
|
+
|
|
249
|
+
X = X.merge(self.entropy_map, on=self.column_list, how = 'left')
|
|
250
|
+
X[self.feature_name] = X[self.feature_name].fillna(self.default_null)
|
|
251
|
+
return X
|
|
252
|
+
|
|
150
253
|
def sharpe_ratio(return_series):
|
|
151
254
|
|
|
152
255
|
'''
|
|
@@ -383,6 +486,8 @@ class stock_eda_panel(object):
|
|
|
383
486
|
perfom fast stochastic oscilator or william indicator
|
|
384
487
|
vortex_feature(window=int, threshold=float, plot=boolean, save_features=boolean):
|
|
385
488
|
perform vortex oscilator
|
|
489
|
+
minmax_pricefeature(type_func=str, window=int, distance=bolean, save_features=boolean)
|
|
490
|
+
get relative price/ distance feature with respect to the min/max price in a given window
|
|
386
491
|
pair_index_feature(pair_symbol=str, feature_label=str, window=int, threshold=float, plot=boolean, save_features=boolean):
|
|
387
492
|
perform additional asset ROC feature, then a new feature is created in the main dataframe
|
|
388
493
|
produce_order_features(feature_name=str, save_features=boolean):
|
|
@@ -1698,6 +1803,70 @@ class stock_eda_panel(object):
|
|
|
1698
1803
|
if plot:
|
|
1699
1804
|
self.signal_plotter(feature_name)
|
|
1700
1805
|
|
|
1806
|
+
def minmax_pricefeature(self, type_func, window, distance = False, save_features = False):
|
|
1807
|
+
"""
|
|
1808
|
+
perform relative price/distance with respect to the min/max price in a given time scope
|
|
1809
|
+
|
|
1810
|
+
Parameters
|
|
1811
|
+
----------
|
|
1812
|
+
type_func (str): either min or max
|
|
1813
|
+
window (int): window scope
|
|
1814
|
+
distance (boolean): if true, get distance feature else relative feature
|
|
1815
|
+
save_features (boolean): True to save feature configuration and feature names
|
|
1816
|
+
|
|
1817
|
+
Returns
|
|
1818
|
+
-------
|
|
1819
|
+
None
|
|
1820
|
+
"""
|
|
1821
|
+
if type_func == 'min':
|
|
1822
|
+
self.df['Price_ref'] = self.df[['Open','High', 'Low','Close']].min(axis = 1)
|
|
1823
|
+
elif type_func == 'max':
|
|
1824
|
+
self.df['Price_ref'] = self.df[['Open','High', 'Low','Close']].max(axis = 1)
|
|
1825
|
+
|
|
1826
|
+
init_shape = self.df.shape[0]
|
|
1827
|
+
df_date = self.df[['Date','Price_ref']].rename(columns = {'Date':'Date_ref'}).copy()
|
|
1828
|
+
|
|
1829
|
+
self.df = self.df.rename(columns = {'Price_ref':'Price_to_use'})
|
|
1830
|
+
|
|
1831
|
+
if type_func == 'min':
|
|
1832
|
+
self.df[f'window_price'] = (self.df.sort_values("Date")["Price_to_use"].transform(lambda x: x.rolling(window, min_periods=1).min()))
|
|
1833
|
+
elif type_func == 'max':
|
|
1834
|
+
self.df[f'window_price'] = (self.df.sort_values("Date")["Price_to_use"].transform(lambda x: x.rolling(window, min_periods=1).max()))
|
|
1835
|
+
|
|
1836
|
+
|
|
1837
|
+
self.df = self.df.merge(df_date, left_on = 'window_price', right_on = 'Price_ref', how = 'left')
|
|
1838
|
+
self.df['date_span'] = self.df['Date'] - self.df['Date_ref']
|
|
1839
|
+
|
|
1840
|
+
self.df['RN'] = self.df.sort_values(['date_span'], ascending=False).groupby(['Date']).cumcount() + 1
|
|
1841
|
+
self.df = self.df[self.df['RN'] == 1]
|
|
1842
|
+
|
|
1843
|
+
if distance:
|
|
1844
|
+
self.df[f'{type_func}_distance_to_price'] = pd.to_numeric(self.df['date_span'].dt.days, downcast='integer')
|
|
1845
|
+
|
|
1846
|
+
if not distance:
|
|
1847
|
+
if type_func == 'min':
|
|
1848
|
+
self.df[f'{type_func}_relprice'] = self.df['Price_to_use']/self.df['window_price']-1
|
|
1849
|
+
|
|
1850
|
+
if type_func == 'max':
|
|
1851
|
+
self.df[f'{type_func}_relprice'] = self.df['window_price']/self.df['Price_to_use']-1
|
|
1852
|
+
|
|
1853
|
+
self.df = self.df.drop(columns = ['RN', 'date_span', 'Price_to_use', 'window_price', 'Date_ref','Price_ref'])
|
|
1854
|
+
|
|
1855
|
+
end_shape = self.df.shape[0]
|
|
1856
|
+
|
|
1857
|
+
if init_shape != end_shape:
|
|
1858
|
+
raise Exception("shapes are not the same")
|
|
1859
|
+
|
|
1860
|
+
if save_features:
|
|
1861
|
+
if distance:
|
|
1862
|
+
self.features.append(f'{type_func}_distance_to_price')
|
|
1863
|
+
name_attr = f'{type_func}_distance'
|
|
1864
|
+
if not distance:
|
|
1865
|
+
self.features.append(f'{type_func}_relprice')
|
|
1866
|
+
name_attr = f'{type_func}_relprice'
|
|
1867
|
+
|
|
1868
|
+
setattr(self,f'settings_{name_attr}_pricefeature' , {'type_func': type_func, 'window': window, 'distance': distance})
|
|
1869
|
+
|
|
1701
1870
|
def pair_index_feature(self, pair_symbol, feature_label, window, threshold, plot = False, save_features = False):
|
|
1702
1871
|
"""
|
|
1703
1872
|
perform additional asset ROC feature, then a new feature is created in the main dataframe
|
|
@@ -2297,7 +2466,9 @@ class stock_eda_panel(object):
|
|
|
2297
2466
|
## for now this is hard coded
|
|
2298
2467
|
feature_list = ['spread_ma','relative_spread_ma','pair_feature','count_features','bidirect_count_features','price_range','relative_price_range','rsi_feature',
|
|
2299
2468
|
'rsi_feature_v2', 'days_features','days_features_v2', 'volume_feature','smooth_volume', 'roc_feature', 'stoch_feature', 'stochastic_feature',
|
|
2300
|
-
'william_feature', 'vortex_feature', 'pair_index_feature','hmm'
|
|
2469
|
+
'william_feature', 'vortex_feature', 'pair_index_feature','hmm',
|
|
2470
|
+
'min_distance_pricefeature', 'min_relprice_pricefeature', 'max_distance_pricefeature','max_relprice_pricefeature'
|
|
2471
|
+
]
|
|
2301
2472
|
|
|
2302
2473
|
for feature in feature_list:
|
|
2303
2474
|
try:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{virgo_modules-0.0.87 → virgo_modules-0.0.89}/virgo_app/virgo_modules.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
{virgo_modules-0.0.87 → virgo_modules-0.0.89}/virgo_app/virgo_modules.egg-info/top_level.txt
RENAMED
|
File without changes
|