virgo-modules 0.0.70__tar.gz → 0.0.72__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of virgo-modules might be problematic. Click here for more details.

Files changed (17) hide show
  1. {virgo_modules-0.0.70 → virgo_modules-0.0.72}/PKG-INFO +20 -4
  2. {virgo_modules-0.0.70 → virgo_modules-0.0.72}/setup.py +1 -1
  3. {virgo_modules-0.0.70 → virgo_modules-0.0.72}/virgo_app/virgo_modules/src/ticketer_source.py +94 -39
  4. {virgo_modules-0.0.70 → virgo_modules-0.0.72}/virgo_app/virgo_modules.egg-info/PKG-INFO +20 -4
  5. {virgo_modules-0.0.70 → virgo_modules-0.0.72}/LICENSE +0 -0
  6. {virgo_modules-0.0.70 → virgo_modules-0.0.72}/README.md +0 -0
  7. {virgo_modules-0.0.70 → virgo_modules-0.0.72}/setup.cfg +0 -0
  8. {virgo_modules-0.0.70 → virgo_modules-0.0.72}/virgo_app/virgo_modules/__init__.py +0 -0
  9. {virgo_modules-0.0.70 → virgo_modules-0.0.72}/virgo_app/virgo_modules/src/__init__.py +0 -0
  10. {virgo_modules-0.0.70 → virgo_modules-0.0.72}/virgo_app/virgo_modules/src/aws_utils.py +0 -0
  11. {virgo_modules-0.0.70 → virgo_modules-0.0.72}/virgo_app/virgo_modules/src/edge_utils.py +0 -0
  12. {virgo_modules-0.0.70 → virgo_modules-0.0.72}/virgo_app/virgo_modules/src/pull_artifacts.py +0 -0
  13. {virgo_modules-0.0.70 → virgo_modules-0.0.72}/virgo_app/virgo_modules/src/re_utils.py +0 -0
  14. {virgo_modules-0.0.70 → virgo_modules-0.0.72}/virgo_app/virgo_modules.egg-info/SOURCES.txt +0 -0
  15. {virgo_modules-0.0.70 → virgo_modules-0.0.72}/virgo_app/virgo_modules.egg-info/dependency_links.txt +0 -0
  16. {virgo_modules-0.0.70 → virgo_modules-0.0.72}/virgo_app/virgo_modules.egg-info/requires.txt +0 -0
  17. {virgo_modules-0.0.70 → virgo_modules-0.0.72}/virgo_app/virgo_modules.egg-info/top_level.txt +0 -0
@@ -1,19 +1,36 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: virgo_modules
3
- Version: 0.0.70
3
+ Version: 0.0.72
4
4
  Summary: data processing and statistical modeling using stock market data
5
5
  Home-page: https://github.com/miguelmayhem92/virgo_module
6
6
  Author: Miguel Mayhuire
7
7
  Author-email: miguelmayhem92@gmail.com
8
8
  License: MIT
9
- Platform: UNKNOWN
10
9
  Classifier: License :: OSI Approved :: MIT License
11
10
  Classifier: Programming Language :: Python :: 3.9
12
11
  Classifier: Operating System :: OS Independent
13
12
  Requires-Python: >=3.9, <3.10
14
13
  Description-Content-Type: text/markdown
15
- Provides-Extra: dev
16
14
  License-File: LICENSE
15
+ Requires-Dist: feature-engine==1.6.1
16
+ Requires-Dist: matplotlib==3.6.3
17
+ Requires-Dist: mlflow==2.1.1
18
+ Requires-Dist: numpy==1.23.5
19
+ Requires-Dist: optuna==3.1.0
20
+ Requires-Dist: pandas==1.5.3
21
+ Requires-Dist: plotly==5.15.0
22
+ Requires-Dist: rsa==4.9
23
+ Requires-Dist: scikit-learn==1.2.1
24
+ Requires-Dist: scipy==1.10.0
25
+ Requires-Dist: seaborn==0.12.2
26
+ Requires-Dist: starlette==0.22.0
27
+ Requires-Dist: statsmodels==0.13.5
28
+ Requires-Dist: ta==0.10.2
29
+ Requires-Dist: yfinance==0.2.9
30
+ Requires-Dist: hmmlearn==0.3.0
31
+ Requires-Dist: boto3
32
+ Provides-Extra: dev
33
+ Requires-Dist: pytest>=7.0; extra == "dev"
17
34
 
18
35
  # Virgo Package
19
36
 
@@ -34,4 +51,3 @@ obj = stock_eda_panel(stock_code = 'PEP', n_days = 20)
34
51
  obj.get_data()
35
52
  print(obj.df.shape)
36
53
  ```
37
-
@@ -5,7 +5,7 @@ with open("virgo_app/README.md", "r") as f:
5
5
 
6
6
  setup(
7
7
  name="virgo_modules",
8
- version="0.0.70",
8
+ version="0.0.72",
9
9
  description="data processing and statistical modeling using stock market data",
10
10
  package_dir={"": "virgo_app"},
11
11
  packages=find_packages(where="virgo_app"),
@@ -89,6 +89,14 @@ class FeatureSelector(BaseEstimator, TransformerMixin):
89
89
  def transform(self, X, y=None):
90
90
  return X[self.columns]
91
91
 
92
+ def sharpe_ratio(return_series):
93
+ N = 255 # Trading days in the year (change to 365 for crypto)
94
+ rf = 0.005 # Half a percent risk free rare
95
+ mean = return_series.mean() * N -rf
96
+ sigma = return_series.std() * np.sqrt(N)
97
+ sharpe = round(mean / sigma, 3)
98
+ return sharpe
99
+
92
100
  class signal_combiner(BaseEstimator, TransformerMixin):
93
101
  def __init__(self, columns, drop = True, prefix_up = 'signal_up_', prefix_low = 'signal_low_'):
94
102
  self.columns = columns
@@ -422,7 +430,6 @@ class stock_eda_panel(object):
422
430
 
423
431
  def outlier_plot(self, zlim, plot = False, save_features = False):
424
432
 
425
- print('---------------------------------------------------------------------------')
426
433
  mean = self.df.log_return.mean()
427
434
  std = self.df.log_return.std()
428
435
  self.df['z_log_return'] = (self.df.log_return - mean)/std
@@ -433,26 +440,41 @@ class stock_eda_panel(object):
433
440
  self.df['up_outlier'] = zlim*self.df['z_std_log_return'] + mean_
434
441
  self.df['low_outlier'] = -zlim*self.df['z_std_log_return'] + mean_
435
442
 
436
- self.df['signal_low_outlier'] = np.where( (self.df['z_log_return'] < self.df['low_outlier'] ), 1, 0)
443
+ self.df['signal_low_osutlier'] = np.where( (self.df['z_log_return'] < self.df['low_outlier'] ), 1, 0)
437
444
  self.df['signal_up_outlier'] = np.where( (self.df['z_log_return'] > self.df['up_outlier'] ), 1, 0)
438
445
  if save_features:
439
446
  self.signals.append('signal_low_outlier')
440
447
  self.signals.append('signal_up_outlier')
441
448
  self.settings_outlier = {'zlim':zlim}
442
449
  if plot:
450
+ mu = self.df['z_log_return'].mean()
451
+ sigma = self.df['z_log_return'].std()
452
+ x = np.linspace(self.df['z_log_return'].min(),self.df['z_log_return'].max(), 15000)
453
+ y = stats.norm.pdf(x, loc = mu, scale = sigma)
454
+
443
455
  fig, axs = plt.subplots(2, 1,figsize=(15,8))
444
456
 
445
- axs[0].hist(self.df['z_log_return'],bins = 100 )
457
+ axs[0].hist(self.df['z_log_return'],density = True,bins = 100 , label = 'Returns distribution')
446
458
  axs[0].axvline(l1, color='r', linestyle='--')
447
459
  axs[0].axvline(-l1, color='r', linestyle='--')
448
460
  axs[0].axvline(l2, color='green', linestyle='--')
449
461
  axs[0].axvline(-l2, color='green', linestyle='--')
450
-
462
+ axs[0].plot(x,y, linewidth = 3, color = 'r', label = 'Normal Dist Curve')
463
+
451
464
  axs[1].plot(self.df['Date'],self.df['z_log_return'])
452
465
  axs[1].plot(self.df['Date'],self.df['low_outlier'], linestyle='--')
453
466
  axs[1].plot(self.df['Date'],self.df['up_outlier'], linestyle='--')
454
467
 
468
+ fig.legend()
455
469
  plt.show()
470
+
471
+ z_stat, p_stat = stats.normaltest(self.df['z_log_return'].dropna())
472
+ p_stat = round(p_stat, 7)
473
+ print('---------------------- returns normality tests ----------------------------')
474
+ if p_stat < 0.05:
475
+ print(f'pvalue: {p_stat} then, returns do not follow a normal distribution')
476
+ else:
477
+ print(f'pvalue: {p_stat} then, returns follow a normal distribution')
456
478
 
457
479
  def analysis_roll_mean_log_returns(self, lags, plot = False):
458
480
 
@@ -1195,7 +1217,20 @@ class stock_eda_panel(object):
1195
1217
  ### first feature: the hidden state
1196
1218
  self.df['hmm_feature'] = self.model_hmm.predict(self.df)
1197
1219
  self.create_hmm_derived_features(lag_returns = lag_returns_state)
1198
-
1220
+
1221
+ ## completion
1222
+
1223
+ hidden_states = pipeline_hmm.predict(data_train)
1224
+ map_ = {i:f'state_{i}' for i in range(n_clusters)}
1225
+ color_map = { i:DEFAULT_PLOTLY_COLORS[i] for i in range(n_clusters)}
1226
+
1227
+ data_train['HMM'] = hidden_states
1228
+ data_train['HMM_state'] = data_train['HMM'].map(map_)
1229
+
1230
+ hidden_states = pipeline_hmm.predict(data_test)
1231
+ data_test['HMM'] = hidden_states
1232
+ data_test['HMM_state'] = data_test['HMM'].map(map_)
1233
+
1199
1234
  if model:
1200
1235
  self.df['hmm_feature'] = model.predict(self.df)
1201
1236
  self.create_hmm_derived_features(lag_returns = lag_returns_state)
@@ -1207,13 +1242,6 @@ class stock_eda_panel(object):
1207
1242
 
1208
1243
  if plot:
1209
1244
 
1210
- hidden_states = pipeline_hmm.predict(data_train)
1211
- map_ = {i:f'state_{i}' for i in range(n_clusters)}
1212
- color_map = { i:DEFAULT_PLOTLY_COLORS[i] for i in range(n_clusters)}
1213
-
1214
- data_train['HMM'] = hidden_states
1215
- data_train['HMM_state'] = data_train['HMM'].map(map_)
1216
-
1217
1245
  fig = go.Figure()
1218
1246
  fig.add_trace(go.Scatter(x=data_train['Date'], y=data_train['Close'], mode='lines',name = 'history', marker_color = 'grey'))
1219
1247
  for state in data_train['HMM_state'].unique():
@@ -1224,10 +1252,6 @@ class stock_eda_panel(object):
1224
1252
  fig.show()
1225
1253
 
1226
1254
  print('---------------------------------------------------------')
1227
-
1228
- hidden_states = pipeline_hmm.predict(data_test)
1229
- data_test['HMM'] = hidden_states
1230
- data_test['HMM_state'] = data_test['HMM'].map(map_)
1231
1255
 
1232
1256
  fig = go.Figure()
1233
1257
  fig.add_trace(go.Scatter(x=data_test['Date'], y=data_test['Close'], mode='lines',name = 'history', marker_color = 'grey'))
@@ -1826,7 +1850,7 @@ class signal_analyser_object:
1826
1850
  if self.return_fig:
1827
1851
  return fig
1828
1852
 
1829
- def create_backtest_signal(self,days_strategy, test_size, feature_name):
1853
+ def create_backtest_signal(self,days_strategy, test_size, feature_name, high_exit = False, low_exit = False):
1830
1854
  asset_1 = 'Close'
1831
1855
  up_signal, low_signal= f'signal_up_{feature_name}', f'signal_low_{feature_name}'
1832
1856
  df1 = self.data.iloc[-test_size:,:].copy()
@@ -1846,54 +1870,84 @@ class signal_analyser_object:
1846
1870
  df2['span'] = (pd.to_datetime(df2['Date']) - pd.to_datetime(df2['lag_Date'])).dt.days - 1
1847
1871
  df2['break'] = np.where(df2['span'] > 3, 1, 0)
1848
1872
  df2['break'] = np.where(df2['span'].isna(), 1, df2['break'])
1849
-
1873
+
1850
1874
  df2['chain_id'] = df2.sort_values(['Date']).groupby(['break']).cumcount() + 1
1851
1875
  df2['chain_id'] = np.where(df2['break'] == 1, df2['chain_id'], np.nan )
1852
1876
  df2['chain_id'] = df2['chain_id'].fillna(method = 'ffill')
1853
-
1877
+
1854
1878
  df2['internal_rn'] = df2.sort_values(['Date']).groupby(['chain_id']).cumcount() + 1
1855
1879
  df2['inv_internal_rn'] = df2.sort_values(['Date'],ascending = False).groupby(['chain_id']).cumcount() + 1
1856
-
1880
+
1857
1881
  df2['first_in_chain'] = np.where(df2['internal_rn'] == 1, True, False)
1858
1882
  df2['last_in_chain'] = np.where(df2['inv_internal_rn'] == 1, True, False)
1859
-
1883
+
1860
1884
  df2 = df2.drop(columns = ['break','span','lag_Date','inv_internal_rn']).sort_values('Date')
1861
-
1885
+
1862
1886
  df2 = df2[(df2.last_in_chain == True) & (df2.signal_type == 'down')][['last_in_chain']]
1863
1887
  dft = df1.merge(df2,how = 'left',left_index=True, right_index=True )
1864
-
1888
+
1865
1889
  dft['chain_id'] = dft.sort_values(['Date']).groupby(['last_in_chain']).cumcount() + 1
1866
1890
  dft['chain_id'] = np.where(dft['last_in_chain'] == True, dft['chain_id'], np.nan )
1867
1891
  dft['chain_id'] = dft['chain_id'].fillna(method = 'ffill')
1868
-
1892
+
1869
1893
  dft['internal_rn'] = dft.sort_values(['Date']).groupby(['chain_id']).cumcount() + 1
1870
1894
  dft['flag'] = np.where(dft['internal_rn'] < days_strategy, 1,0)
1871
-
1895
+
1872
1896
  dft['lrets_bench'] = np.log(dft[asset_1]/dft[asset_1].shift(1))
1873
1897
  dft['bench_prod'] = dft['lrets_bench'].cumsum()
1874
1898
  dft['bench_prod_exp'] = np.exp(dft['bench_prod']) - 1
1875
-
1899
+
1900
+ if high_exit and low_exit:
1901
+ dft['open_strat'] = np.where(dft.last_in_chain == True, dft.Open, np.nan)
1902
+ dft['open_strat'] = dft['open_strat'].fillna(method = 'ffill')
1903
+ dft['open_strat'] = np.where(dft.flag == 1, dft.open_strat, np.nan)
1904
+ dft['high_strat_ret'] = (dft['High']/dft['open_strat']-1)*100
1905
+ dft['low_strat_ret'] = (dft['Low']/dft['open_strat']-1)*100
1906
+ dft['high_exit'] = np.where(((dft['high_strat_ret'] >= high_exit) | (dft['internal_rn'] == days_strategy)), 1, np.nan)
1907
+ dft['low_exit'] = np.where((dft['low_strat_ret'] <= low_exit), -1, np.nan)
1908
+
1909
+ dft["exit_type"] = dft[["high_exit", "low_exit"]].max(axis=1)
1910
+ dft['exit_type'] = np.where(dft["exit_type"] == 1, 1, np.where(dft["exit_type"] == -1,-1,np.nan))
1911
+ dft['exit'] = np.where(dft['exit_type'].isnull(), np.nan, 1)
1912
+ dft['exit_order'] = dft.sort_values(['Date']).groupby(['chain_id','exit']).cumcount() + 1
1913
+ dft['exit'] = np.where(dft['exit_order'] == 1, True, np.nan)
1914
+ dft = dft.drop(columns = ['exit_order'])
1915
+ ## if last signal is near
1916
+ max_id = dft.chain_id.max()
1917
+ dft['max_internal_rn'] = dft.sort_values(['Date']).groupby(['chain_id']).internal_rn.transform('max')
1918
+ dft['exit'] = np.where((dft.chain_id == max_id) & (dft.max_internal_rn < days_strategy) & (dft.max_internal_rn == dft.internal_rn), 1, dft['exit'])
1919
+
1920
+ dft['exit_step'] = np.where(dft.exit == 1, dft.internal_rn, np.nan)
1921
+ dft['exit_step'] = dft.sort_values(['Date']).groupby(['chain_id']).exit_step.transform('max')
1922
+
1923
+ dft['flag'] = np.where(dft.internal_rn <= dft.exit_step, 1, 0)
1924
+ dft = dft.drop(columns = ['open_strat', 'high_strat_ret', 'low_strat_ret','exit_step', 'exit','exit_type','high_exit','low_exit', 'max_internal_rn'])
1925
+
1876
1926
  dft['lrets_strat'] = np.log(dft[asset_1].shift(-1)/dft[asset_1]) * dft['flag']
1877
1927
  dft['lrets_strat'] = np.where(dft['lrets_strat'].isna(),-0.0,dft['lrets_strat'])
1878
1928
  dft['lrets_prod'] = dft['lrets_strat'].cumsum()
1879
1929
  dft['strat_prod_exp'] = np.exp(dft['lrets_prod']) - 1
1880
-
1930
+
1881
1931
  bench_rets = round(dft['bench_prod_exp'].values[-1]*100,1)
1882
1932
  strat_rets = round(dft['strat_prod_exp'].values[-1]*100,1)
1883
-
1884
1933
 
1885
- message1 = f'returns benchmark {bench_rets}%'
1886
- message2 = f'returns strategy {strat_rets}%'
1934
+ bench_sr = round(sharpe_ratio(dft.bench_prod_exp.dropna()),1)
1935
+ strat_sr = round(sharpe_ratio(dft.strat_prod_exp.dropna()),1)
1936
+
1937
+ message1 = f'{bench_rets}%'
1938
+ message2 = f'{strat_rets}%'
1887
1939
 
1888
1940
  messages = {
1889
- 'benchmark return':message1,
1890
- 'strategy return':message2
1941
+ 'benchmark return:':message1,
1942
+ 'benchmark sharpe ratio:': bench_sr,
1943
+ 'strategy return:':message2,
1944
+ 'strategy sharpe ratio:': strat_sr,
1891
1945
  }
1892
1946
  if self.show_plot:
1893
1947
  print('----------------------------')
1894
- print(message1)
1895
- print(message2)
1948
+ print(messages)
1896
1949
  print('----------------------------')
1950
+
1897
1951
  fig = plt.figure(1)
1898
1952
  plt.plot(dft.bench_prod_exp.values, label = 'benchmark')
1899
1953
  plt.scatter(range(len(dft)),np.where(dft[low_signal] == 1,dft.bench_prod_exp.values,np.nan),color = 'red', label = 'signal')
@@ -1919,23 +1973,24 @@ class signal_analyser_object:
1919
1973
 
1920
1974
  upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = self.save_aws + result_json_name, input_path = self.save_path + result_json_name, aws_credentials = self.aws_credentials)
1921
1975
  upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = self.save_aws + result_plot_name, input_path = self.save_path + result_plot_name, aws_credentials = self.aws_credentials)
1922
-
1976
+
1923
1977
  if not self.show_plot:
1924
1978
  plt.close()
1925
1979
 
1926
1980
  del df1,df2,dft
1927
-
1981
+
1928
1982
  if self.return_fig:
1929
1983
  return fig, messages
1930
1984
 
1931
- def execute_signal_analyser(test_data_size, feature_name, days_list, configuration, method, object_stock, signal_analyser_object, plot = False, backtest= False):
1985
+ def execute_signal_analyser(test_data_size, feature_name, days_list, configuration, method, object_stock, signal_analyser_object, plot = False, backtest= False, exit_params = {}):
1986
+
1932
1987
  method(**configuration)
1933
1988
  signal_assess = signal_analyser_object(object_stock.df,object_stock.stock_code,show_plot = plot)
1934
1989
  signal_assess.signal_analyser(test_size = test_data_size, feature_name = feature_name, days_list = days_list, threshold = 1)
1935
-
1990
+
1936
1991
  if backtest:
1937
1992
  print('-----------------------back test ---------------------------')
1938
- signal_assess.create_backtest_signal(backtest, test_data_size, feature_name)
1993
+ signal_assess.create_backtest_signal(backtest, test_data_size, feature_name, **exit_params )
1939
1994
 
1940
1995
  return signal_assess.mean_median_return
1941
1996
 
@@ -1,19 +1,36 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: virgo-modules
3
- Version: 0.0.70
3
+ Version: 0.0.72
4
4
  Summary: data processing and statistical modeling using stock market data
5
5
  Home-page: https://github.com/miguelmayhem92/virgo_module
6
6
  Author: Miguel Mayhuire
7
7
  Author-email: miguelmayhem92@gmail.com
8
8
  License: MIT
9
- Platform: UNKNOWN
10
9
  Classifier: License :: OSI Approved :: MIT License
11
10
  Classifier: Programming Language :: Python :: 3.9
12
11
  Classifier: Operating System :: OS Independent
13
12
  Requires-Python: >=3.9, <3.10
14
13
  Description-Content-Type: text/markdown
15
- Provides-Extra: dev
16
14
  License-File: LICENSE
15
+ Requires-Dist: feature-engine==1.6.1
16
+ Requires-Dist: matplotlib==3.6.3
17
+ Requires-Dist: mlflow==2.1.1
18
+ Requires-Dist: numpy==1.23.5
19
+ Requires-Dist: optuna==3.1.0
20
+ Requires-Dist: pandas==1.5.3
21
+ Requires-Dist: plotly==5.15.0
22
+ Requires-Dist: rsa==4.9
23
+ Requires-Dist: scikit-learn==1.2.1
24
+ Requires-Dist: scipy==1.10.0
25
+ Requires-Dist: seaborn==0.12.2
26
+ Requires-Dist: starlette==0.22.0
27
+ Requires-Dist: statsmodels==0.13.5
28
+ Requires-Dist: ta==0.10.2
29
+ Requires-Dist: yfinance==0.2.9
30
+ Requires-Dist: hmmlearn==0.3.0
31
+ Requires-Dist: boto3
32
+ Provides-Extra: dev
33
+ Requires-Dist: pytest>=7.0; extra == "dev"
17
34
 
18
35
  # Virgo Package
19
36
 
@@ -34,4 +51,3 @@ obj = stock_eda_panel(stock_code = 'PEP', n_days = 20)
34
51
  obj.get_data()
35
52
  print(obj.df.shape)
36
53
  ```
37
-
File without changes
File without changes
File without changes