virgo-modules 0.0.70__py3-none-any.whl → 0.0.71__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of virgo-modules might be problematic. Click here for more details.

@@ -89,6 +89,14 @@ class FeatureSelector(BaseEstimator, TransformerMixin):
89
89
  def transform(self, X, y=None):
90
90
  return X[self.columns]
91
91
 
92
+ def sharpe_ratio(return_series):
93
+ N = 255 # Trading days in the year (change to 365 for crypto)
94
+ rf = 0.005 # Half a percent risk free rare
95
+ mean = return_series.mean() * N -rf
96
+ sigma = return_series.std() * np.sqrt(N)
97
+ sharpe = round(mean / sigma, 3)
98
+ return sharpe
99
+
92
100
  class signal_combiner(BaseEstimator, TransformerMixin):
93
101
  def __init__(self, columns, drop = True, prefix_up = 'signal_up_', prefix_low = 'signal_low_'):
94
102
  self.columns = columns
@@ -422,7 +430,6 @@ class stock_eda_panel(object):
422
430
 
423
431
  def outlier_plot(self, zlim, plot = False, save_features = False):
424
432
 
425
- print('---------------------------------------------------------------------------')
426
433
  mean = self.df.log_return.mean()
427
434
  std = self.df.log_return.std()
428
435
  self.df['z_log_return'] = (self.df.log_return - mean)/std
@@ -433,26 +440,41 @@ class stock_eda_panel(object):
433
440
  self.df['up_outlier'] = zlim*self.df['z_std_log_return'] + mean_
434
441
  self.df['low_outlier'] = -zlim*self.df['z_std_log_return'] + mean_
435
442
 
436
- self.df['signal_low_outlier'] = np.where( (self.df['z_log_return'] < self.df['low_outlier'] ), 1, 0)
443
+ self.df['signal_low_osutlier'] = np.where( (self.df['z_log_return'] < self.df['low_outlier'] ), 1, 0)
437
444
  self.df['signal_up_outlier'] = np.where( (self.df['z_log_return'] > self.df['up_outlier'] ), 1, 0)
438
445
  if save_features:
439
446
  self.signals.append('signal_low_outlier')
440
447
  self.signals.append('signal_up_outlier')
441
448
  self.settings_outlier = {'zlim':zlim}
442
449
  if plot:
450
+ mu = self.df['z_log_return'].mean()
451
+ sigma = self.df['z_log_return'].std()
452
+ x = np.linspace(self.df['z_log_return'].min(),self.df['z_log_return'].max(), 15000)
453
+ y = stats.norm.pdf(x, loc = mu, scale = sigma)
454
+
443
455
  fig, axs = plt.subplots(2, 1,figsize=(15,8))
444
456
 
445
- axs[0].hist(self.df['z_log_return'],bins = 100 )
457
+ axs[0].hist(self.df['z_log_return'],density = True,bins = 100 , label = 'Returns distribution')
446
458
  axs[0].axvline(l1, color='r', linestyle='--')
447
459
  axs[0].axvline(-l1, color='r', linestyle='--')
448
460
  axs[0].axvline(l2, color='green', linestyle='--')
449
461
  axs[0].axvline(-l2, color='green', linestyle='--')
450
-
462
+ axs[0].plot(x,y, linewidth = 3, color = 'r', label = 'Normal Dist Curve')
463
+
451
464
  axs[1].plot(self.df['Date'],self.df['z_log_return'])
452
465
  axs[1].plot(self.df['Date'],self.df['low_outlier'], linestyle='--')
453
466
  axs[1].plot(self.df['Date'],self.df['up_outlier'], linestyle='--')
454
467
 
468
+ fig.legend()
455
469
  plt.show()
470
+
471
+ z_stat, p_stat = stats.normaltest(self.df['z_log_return'].dropna())
472
+ p_stat = round(p_stat, 7)
473
+ print('---------------------- returns normality tests ----------------------------')
474
+ if p_stat < 0.05:
475
+ print(f'pvalue: {p_stat} then, returns do not follow a normal distribution')
476
+ else:
477
+ print(f'pvalue: {p_stat} then, returns follow a normal distribution')
456
478
 
457
479
  def analysis_roll_mean_log_returns(self, lags, plot = False):
458
480
 
@@ -1826,7 +1848,7 @@ class signal_analyser_object:
1826
1848
  if self.return_fig:
1827
1849
  return fig
1828
1850
 
1829
- def create_backtest_signal(self,days_strategy, test_size, feature_name):
1851
+ def create_backtest_signal(self,days_strategy, test_size, feature_name, high_exit = False, low_exit = False):
1830
1852
  asset_1 = 'Close'
1831
1853
  up_signal, low_signal= f'signal_up_{feature_name}', f'signal_low_{feature_name}'
1832
1854
  df1 = self.data.iloc[-test_size:,:].copy()
@@ -1846,54 +1868,84 @@ class signal_analyser_object:
1846
1868
  df2['span'] = (pd.to_datetime(df2['Date']) - pd.to_datetime(df2['lag_Date'])).dt.days - 1
1847
1869
  df2['break'] = np.where(df2['span'] > 3, 1, 0)
1848
1870
  df2['break'] = np.where(df2['span'].isna(), 1, df2['break'])
1849
-
1871
+
1850
1872
  df2['chain_id'] = df2.sort_values(['Date']).groupby(['break']).cumcount() + 1
1851
1873
  df2['chain_id'] = np.where(df2['break'] == 1, df2['chain_id'], np.nan )
1852
1874
  df2['chain_id'] = df2['chain_id'].fillna(method = 'ffill')
1853
-
1875
+
1854
1876
  df2['internal_rn'] = df2.sort_values(['Date']).groupby(['chain_id']).cumcount() + 1
1855
1877
  df2['inv_internal_rn'] = df2.sort_values(['Date'],ascending = False).groupby(['chain_id']).cumcount() + 1
1856
-
1878
+
1857
1879
  df2['first_in_chain'] = np.where(df2['internal_rn'] == 1, True, False)
1858
1880
  df2['last_in_chain'] = np.where(df2['inv_internal_rn'] == 1, True, False)
1859
-
1881
+
1860
1882
  df2 = df2.drop(columns = ['break','span','lag_Date','inv_internal_rn']).sort_values('Date')
1861
-
1883
+
1862
1884
  df2 = df2[(df2.last_in_chain == True) & (df2.signal_type == 'down')][['last_in_chain']]
1863
1885
  dft = df1.merge(df2,how = 'left',left_index=True, right_index=True )
1864
-
1886
+
1865
1887
  dft['chain_id'] = dft.sort_values(['Date']).groupby(['last_in_chain']).cumcount() + 1
1866
1888
  dft['chain_id'] = np.where(dft['last_in_chain'] == True, dft['chain_id'], np.nan )
1867
1889
  dft['chain_id'] = dft['chain_id'].fillna(method = 'ffill')
1868
-
1890
+
1869
1891
  dft['internal_rn'] = dft.sort_values(['Date']).groupby(['chain_id']).cumcount() + 1
1870
1892
  dft['flag'] = np.where(dft['internal_rn'] < days_strategy, 1,0)
1871
-
1893
+
1872
1894
  dft['lrets_bench'] = np.log(dft[asset_1]/dft[asset_1].shift(1))
1873
1895
  dft['bench_prod'] = dft['lrets_bench'].cumsum()
1874
1896
  dft['bench_prod_exp'] = np.exp(dft['bench_prod']) - 1
1875
-
1897
+
1898
+ if high_exit and low_exit:
1899
+ dft['open_strat'] = np.where(dft.last_in_chain == True, dft.Open, np.nan)
1900
+ dft['open_strat'] = dft['open_strat'].fillna(method = 'ffill')
1901
+ dft['open_strat'] = np.where(dft.flag == 1, dft.open_strat, np.nan)
1902
+ dft['high_strat_ret'] = (dft['High']/dft['open_strat']-1)*100
1903
+ dft['low_strat_ret'] = (dft['Low']/dft['open_strat']-1)*100
1904
+ dft['high_exit'] = np.where(((dft['high_strat_ret'] >= high_exit) | (dft['internal_rn'] == days_strategy)), 1, np.nan)
1905
+ dft['low_exit'] = np.where((dft['low_strat_ret'] <= low_exit), -1, np.nan)
1906
+
1907
+ dft["exit_type"] = dft[["high_exit", "low_exit"]].max(axis=1)
1908
+ dft['exit_type'] = np.where(dft["exit_type"] == 1, 1, np.where(dft["exit_type"] == -1,-1,np.nan))
1909
+ dft['exit'] = np.where(dft['exit_type'].isnull(), np.nan, 1)
1910
+ dft['exit_order'] = dft.sort_values(['Date']).groupby(['chain_id','exit']).cumcount() + 1
1911
+ dft['exit'] = np.where(dft['exit_order'] == 1, True, np.nan)
1912
+ dft = dft.drop(columns = ['exit_order'])
1913
+ ## if last signal is near
1914
+ max_id = dft.chain_id.max()
1915
+ dft['max_internal_rn'] = dft.sort_values(['Date']).groupby(['chain_id']).internal_rn.transform('max')
1916
+ dft['exit'] = np.where((dft.chain_id == max_id) & (dft.max_internal_rn < days_strategy) & (dft.max_internal_rn == dft.internal_rn), 1, dft['exit'])
1917
+
1918
+ dft['exit_step'] = np.where(dft.exit == 1, dft.internal_rn, np.nan)
1919
+ dft['exit_step'] = dft.sort_values(['Date']).groupby(['chain_id']).exit_step.transform('max')
1920
+
1921
+ dft['flag'] = np.where(dft.internal_rn <= dft.exit_step, 1, 0)
1922
+ dft = dft.drop(columns = ['open_strat', 'high_strat_ret', 'low_strat_ret','exit_step', 'exit','exit_type','high_exit','low_exit', 'max_internal_rn'])
1923
+
1876
1924
  dft['lrets_strat'] = np.log(dft[asset_1].shift(-1)/dft[asset_1]) * dft['flag']
1877
1925
  dft['lrets_strat'] = np.where(dft['lrets_strat'].isna(),-0.0,dft['lrets_strat'])
1878
1926
  dft['lrets_prod'] = dft['lrets_strat'].cumsum()
1879
1927
  dft['strat_prod_exp'] = np.exp(dft['lrets_prod']) - 1
1880
-
1928
+
1881
1929
  bench_rets = round(dft['bench_prod_exp'].values[-1]*100,1)
1882
1930
  strat_rets = round(dft['strat_prod_exp'].values[-1]*100,1)
1883
-
1884
1931
 
1885
- message1 = f'returns benchmark {bench_rets}%'
1886
- message2 = f'returns strategy {strat_rets}%'
1932
+ bench_sr = round(sharpe_ratio(dft.bench_prod_exp.dropna()),1)
1933
+ strat_sr = round(sharpe_ratio(dft.strat_prod_exp.dropna()),1)
1934
+
1935
+ message1 = f'{bench_rets}%'
1936
+ message2 = f'{strat_rets}%'
1887
1937
 
1888
1938
  messages = {
1889
- 'benchmark return':message1,
1890
- 'strategy return':message2
1939
+ 'benchmark return:':message1,
1940
+ 'benchmark sharpe ratio:': bench_sr,
1941
+ 'strategy return:':message2,
1942
+ 'strategy sharpe ratio:': strat_sr,
1891
1943
  }
1892
1944
  if self.show_plot:
1893
1945
  print('----------------------------')
1894
- print(message1)
1895
- print(message2)
1946
+ print(messages)
1896
1947
  print('----------------------------')
1948
+
1897
1949
  fig = plt.figure(1)
1898
1950
  plt.plot(dft.bench_prod_exp.values, label = 'benchmark')
1899
1951
  plt.scatter(range(len(dft)),np.where(dft[low_signal] == 1,dft.bench_prod_exp.values,np.nan),color = 'red', label = 'signal')
@@ -1919,23 +1971,24 @@ class signal_analyser_object:
1919
1971
 
1920
1972
  upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = self.save_aws + result_json_name, input_path = self.save_path + result_json_name, aws_credentials = self.aws_credentials)
1921
1973
  upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = self.save_aws + result_plot_name, input_path = self.save_path + result_plot_name, aws_credentials = self.aws_credentials)
1922
-
1974
+
1923
1975
  if not self.show_plot:
1924
1976
  plt.close()
1925
1977
 
1926
1978
  del df1,df2,dft
1927
-
1979
+
1928
1980
  if self.return_fig:
1929
1981
  return fig, messages
1930
1982
 
1931
- def execute_signal_analyser(test_data_size, feature_name, days_list, configuration, method, object_stock, signal_analyser_object, plot = False, backtest= False):
1983
+ def execute_signal_analyser(test_data_size, feature_name, days_list, configuration, method, object_stock, signal_analyser_object, plot = False, backtest= False, exit_params = {}):
1984
+
1932
1985
  method(**configuration)
1933
1986
  signal_assess = signal_analyser_object(object_stock.df,object_stock.stock_code,show_plot = plot)
1934
1987
  signal_assess.signal_analyser(test_size = test_data_size, feature_name = feature_name, days_list = days_list, threshold = 1)
1935
-
1988
+
1936
1989
  if backtest:
1937
1990
  print('-----------------------back test ---------------------------')
1938
- signal_assess.create_backtest_signal(backtest, test_data_size, feature_name)
1991
+ signal_assess.create_backtest_signal(backtest, test_data_size, feature_name, **exit_params )
1939
1992
 
1940
1993
  return signal_assess.mean_median_return
1941
1994
 
@@ -1,37 +1,36 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: virgo-modules
3
- Version: 0.0.70
3
+ Version: 0.0.71
4
4
  Summary: data processing and statistical modeling using stock market data
5
5
  Home-page: https://github.com/miguelmayhem92/virgo_module
6
6
  Author: Miguel Mayhuire
7
7
  Author-email: miguelmayhem92@gmail.com
8
8
  License: MIT
9
- Platform: UNKNOWN
10
9
  Classifier: License :: OSI Approved :: MIT License
11
10
  Classifier: Programming Language :: Python :: 3.9
12
11
  Classifier: Operating System :: OS Independent
13
12
  Requires-Python: >=3.9, <3.10
14
13
  Description-Content-Type: text/markdown
15
14
  License-File: LICENSE
16
- Requires-Dist: feature-engine (==1.6.1)
17
- Requires-Dist: matplotlib (==3.6.3)
18
- Requires-Dist: mlflow (==2.1.1)
19
- Requires-Dist: numpy (==1.23.5)
20
- Requires-Dist: optuna (==3.1.0)
21
- Requires-Dist: pandas (==1.5.3)
22
- Requires-Dist: plotly (==5.15.0)
23
- Requires-Dist: rsa (==4.9)
24
- Requires-Dist: scikit-learn (==1.2.1)
25
- Requires-Dist: scipy (==1.10.0)
26
- Requires-Dist: seaborn (==0.12.2)
27
- Requires-Dist: starlette (==0.22.0)
28
- Requires-Dist: statsmodels (==0.13.5)
29
- Requires-Dist: ta (==0.10.2)
30
- Requires-Dist: yfinance (==0.2.9)
31
- Requires-Dist: hmmlearn (==0.3.0)
15
+ Requires-Dist: feature-engine ==1.6.1
16
+ Requires-Dist: matplotlib ==3.6.3
17
+ Requires-Dist: mlflow ==2.1.1
18
+ Requires-Dist: numpy ==1.23.5
19
+ Requires-Dist: optuna ==3.1.0
20
+ Requires-Dist: pandas ==1.5.3
21
+ Requires-Dist: plotly ==5.15.0
22
+ Requires-Dist: rsa ==4.9
23
+ Requires-Dist: scikit-learn ==1.2.1
24
+ Requires-Dist: scipy ==1.10.0
25
+ Requires-Dist: seaborn ==0.12.2
26
+ Requires-Dist: starlette ==0.22.0
27
+ Requires-Dist: statsmodels ==0.13.5
28
+ Requires-Dist: ta ==0.10.2
29
+ Requires-Dist: yfinance ==0.2.9
30
+ Requires-Dist: hmmlearn ==0.3.0
32
31
  Requires-Dist: boto3
33
32
  Provides-Extra: dev
34
- Requires-Dist: pytest (>=7.0) ; extra == 'dev'
33
+ Requires-Dist: pytest >=7.0 ; extra == 'dev'
35
34
 
36
35
  # Virgo Package
37
36
 
@@ -52,4 +51,3 @@ obj = stock_eda_panel(stock_code = 'PEP', n_days = 20)
52
51
  obj.get_data()
53
52
  print(obj.df.shape)
54
53
  ```
55
-
@@ -4,9 +4,9 @@ virgo_modules/src/aws_utils.py,sha256=GWmVdXM0mIJJPn-X-bEtM4KtNPCHM1D457hnuKxaM7
4
4
  virgo_modules/src/edge_utils.py,sha256=Ihdmq7dyb8gOvG6CrDal7wsa15tqsdsFk6KINwM6578,7691
5
5
  virgo_modules/src/pull_artifacts.py,sha256=5OPrgR7pcMSdpbevDRhf0ebk7g7ZRjff4NpTIIWAKjE,1989
6
6
  virgo_modules/src/re_utils.py,sha256=LDI3sYAaNm3LO5gRul7PyCVbJrkT3PBihObkdVilVec,52428
7
- virgo_modules/src/ticketer_source.py,sha256=_vJkF38yAl2FbxvcOWHbjG5n7o34lh8SdRR8zzIxChY,101408
8
- virgo_modules-0.0.70.dist-info/LICENSE,sha256=pNgFyCYgmimaw0o6V20JupZLROycAnOA_HDDh1tX2V4,1097
9
- virgo_modules-0.0.70.dist-info/METADATA,sha256=GM8r8w-YFG7dGOAtfx295KdnSZEjHna01KNyyosXcNc,1484
10
- virgo_modules-0.0.70.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
11
- virgo_modules-0.0.70.dist-info/top_level.txt,sha256=ZjI-qEkDtT-8mFwGAWnXfqPOKEGlIhWRW1es1VyXc60,14
12
- virgo_modules-0.0.70.dist-info/RECORD,,
7
+ virgo_modules/src/ticketer_source.py,sha256=fGj954x4UmYU4zaI20q5dTkgqAVK0XQJsuibM2UBPMs,104812
8
+ virgo_modules-0.0.71.dist-info/LICENSE,sha256=pNgFyCYgmimaw0o6V20JupZLROycAnOA_HDDh1tX2V4,1097
9
+ virgo_modules-0.0.71.dist-info/METADATA,sha256=vWlzxe3a8nehV54nuJZi87OWDKwwEhz5JNRkncIv_To,1429
10
+ virgo_modules-0.0.71.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
11
+ virgo_modules-0.0.71.dist-info/top_level.txt,sha256=ZjI-qEkDtT-8mFwGAWnXfqPOKEGlIhWRW1es1VyXc60,14
12
+ virgo_modules-0.0.71.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.38.4)
2
+ Generator: bdist_wheel (0.41.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5