virgo-modules 0.0.82__py3-none-any.whl → 0.0.84__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of virgo-modules might be problematic. Click here for more details.

@@ -1278,7 +1278,9 @@ class produce_plotly_plots:
1278
1278
  if self.save_path and self.save_aws:
1279
1279
  # upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = f'market_plots/{self.ticket_name}/'+result_json_name ,input_path = self.save_path+result_json_name)
1280
1280
  upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = self.save_aws + result_json_name, input_path = self.save_path + result_json_name, aws_credentials = self.aws_credentials)
1281
-
1281
+ if self.return_figs:
1282
+ return fig
1283
+
1282
1284
  def plot_hmm_analysis_logger(data_frame,test_data_size, save_path = False, show_plot = True):
1283
1285
  '''
1284
1286
  display box plots train and test of hmm state returns
@@ -48,6 +48,8 @@ from feature_engine.timeseries.forecasting import LagFeatures
48
48
  from feature_engine.imputation import MeanMedianImputer
49
49
  from feature_engine.discretisation import EqualWidthDiscretiser
50
50
 
51
+ from sklearn.linear_model import HuberRegressor
52
+
51
53
  from .aws_utils import upload_file_to_aws
52
54
 
53
55
  import logging
@@ -3070,10 +3072,10 @@ class analyse_index(stock_eda_panel):
3070
3072
 
3071
3073
  Attributes
3072
3074
  ----------
3073
- data : pd.DataFrame
3074
- symbol of the asset
3075
- index : str
3075
+ data_index : pd.DataFrame
3076
3076
  name of the index
3077
+ indexes: list
3078
+ list of indexes
3077
3079
  asset : str
3078
3080
  name of the asset
3079
3081
  n_obs : int
@@ -3104,14 +3106,13 @@ class analyse_index(stock_eda_panel):
3104
3106
  get_betas(subsample_ts=int)
3105
3107
  get general beta and last sample beta, correlation score is included too
3106
3108
  """
3107
-
3108
- def __init__(self, index, asset, n_obs, lag, data_window = '5y', show_plot = True, save_path = False, save_aws = False, aws_credentials = False):
3109
+ def __init__(self, index_data, asset, n_obs, lag, data_window = '5y', show_plot = False, save_path = False, save_aws = False, aws_credentials = False, return_fig = False):
3109
3110
  """
3110
3111
  Initialize object
3111
3112
 
3112
3113
  Parameters
3113
3114
  ----------
3114
- index (str): name of the index
3115
+ index_data (pd.DataFrame or str): index data dataframe or index string
3115
3116
  asset (str): name of the asset
3116
3117
  n_obs (int): number of rows to extract
3117
3118
  lag (int): lag to apply
@@ -3126,45 +3127,57 @@ class analyse_index(stock_eda_panel):
3126
3127
  None
3127
3128
  """
3128
3129
 
3129
- self.index = index
3130
+
3131
+ if type(index_data) != str:
3132
+ index_data['Date'] = pd.to_datetime(index_data['Date'])
3133
+ self.index_data = index_data
3134
+ self.indexes = [ x for x in list(index_data.columns) if x != 'Date']
3135
+ else:
3136
+ self.indexes = [index_data]
3137
+
3138
+ self.index_data = index_data
3130
3139
  self.asset = asset
3131
3140
  self.n_obs = n_obs
3132
3141
  self.data_window = data_window
3133
3142
  self.lag = lag
3134
3143
 
3135
3144
  self.show_plot = show_plot
3145
+ self.return_fig = return_fig
3136
3146
  self.save_path = save_path
3137
3147
  self.save_aws = save_aws
3138
3148
 
3139
3149
  def process_data(self):
3140
3150
  """
3141
3151
  using stock_eda_panel, get data and merge data
3142
-
3152
+
3143
3153
  Parameters
3144
3154
  ----------
3145
3155
  None
3146
-
3156
+
3147
3157
  Returns
3148
3158
  -------
3149
3159
  None
3150
3160
  """
3151
- index = stock_eda_panel(self.index, self.n_obs, self.data_window)
3152
- index.get_data()
3153
- index.df['shift'] = index.df.Close.shift(self.lag)
3154
- index.df['index_return'] = index.df.Close/index.df['shift'] - 1
3155
-
3156
- asset = stock_eda_panel(self.asset, self.n_obs, self.data_window)
3161
+ asset = stock_eda_panel(self.asset, self.n_obs, data_window=self.data_window)
3157
3162
  asset.get_data()
3158
- asset.df['shift'] = asset.df.Close.shift(self.lag)
3159
- asset.df['asset_return'] = asset.df.Close/asset.df['shift'] - 1
3160
-
3161
- df1 = index.df[['Date','index_return']]
3162
- df2 = asset.df[['Date','asset_return','Close']]
3163
- merger = df1.merge(df2, on = 'Date', how = 'inner')
3164
- merger.dropna(inplace = True)
3165
- self.merger_df = merger
3163
+ df = asset.df[['Date','Close']]
3164
+
3165
+ if type(self.index_data) != str:
3166
+ df_merge = df.merge(self.index_data, on = ['Date'], how = 'left').sort_values('Date')
3167
+
3168
+ else:
3169
+ indx = stock_eda_panel(self.index_data, self.n_obs, data_window=self.data_window)
3170
+ indx.get_data()
3171
+ indx_df = indx.df[['Date','Close']].rename(columns = {'Close':self.index_data})
3172
+ df_merge = df.merge(indx_df, on = ['Date'], how = 'left').sort_values('Date')
3173
+
3174
+ for colx in ['Close'] + self.indexes:
3175
+ df_merge[f'{colx}_pct'] = df_merge[colx]/df_merge[colx].shift(self.lag) - 1
3176
+
3177
+ df_merge.dropna(inplace = True)
3178
+ self.merger_df = df_merge.rename(columns = {'Close_pct': 'asset_return'})
3166
3179
 
3167
- def plot_betas(self,sample_size, offset, subsample_ts =False):
3180
+ def plot_betas(self,sample_size, offset, subsample_ts =False, index = False):
3168
3181
  """
3169
3182
  display beta analysis plot
3170
3183
 
@@ -3178,19 +3191,26 @@ class analyse_index(stock_eda_panel):
3178
3191
  -------
3179
3192
  None
3180
3193
  """
3181
- ### extracting data
3182
-
3183
- self.process_data()
3184
-
3185
- ### ploting analysis
3194
+ if (type(self.index_data) == str) & (index != False):
3195
+ raise Exception("No need of index argument")
3196
+ else:
3197
+ index = self.indexes[0]
3198
+
3199
+ index_pct = f'{index}_pct'
3200
+ ### ploting analysis
3186
3201
  figure, ax = plt.subplot_mosaic(
3187
3202
  [["scatter_total", "scatter_sample",'ts','ts']],
3188
3203
  layout="constrained",
3189
3204
  figsize=(18, 5)
3190
3205
  )
3191
3206
 
3192
- ax['scatter_total'].scatter(self.merger_df.asset_return, self.merger_df.index_return)
3193
- b, a = np.polyfit(self.merger_df.asset_return, self.merger_df.index_return, 1)
3207
+ ax['scatter_total'].scatter(self.merger_df.asset_return, self.merger_df[index_pct])
3208
+
3209
+ huber_regr = HuberRegressor(fit_intercept = True)
3210
+ huber_regr.fit(self.merger_df.asset_return.values.reshape(-1,1), self.merger_df[index_pct].values.reshape(-1,1))
3211
+ b, a = huber_regr.coef_[0], huber_regr.intercept_
3212
+
3213
+ # b, a = np.polyfit(self.merger_df.asset_return, self.merger_df[index_pct], 1)
3194
3214
  ax['scatter_total'].plot(self.merger_df.asset_return, b*self.merger_df.asset_return+a, color='red')
3195
3215
 
3196
3216
  ax['ts'].plot(self.merger_df.Date, self.merger_df.Close, color = 'grey', alpha = 0.3)
@@ -3201,10 +3221,13 @@ class analyse_index(stock_eda_panel):
3201
3221
  for i in range(0,len(self.merger_df)-sample_size,offset):
3202
3222
 
3203
3223
  merger_ = self.merger_df.sort_values('Date', ascending = False).iloc[i:i+sample_size,:]
3204
- x = merger_.index_return
3224
+ x = merger_[index_pct]
3205
3225
  y = merger_.asset_return
3206
- b, a = np.polyfit(x,y, 1)
3207
-
3226
+ # b, a = np.polyfit(x,y, 1)
3227
+ huber_regr = HuberRegressor(fit_intercept = True)
3228
+ huber_regr.fit(x.values.reshape(-1,1), y.values.reshape(-1,1))
3229
+ b, a = huber_regr.coef_[0], huber_regr.intercept_
3230
+
3208
3231
  normalize = mcolors.Normalize(vmin=-1, vmax=1)
3209
3232
  colormap = cm.jet
3210
3233
 
@@ -3218,11 +3241,12 @@ class analyse_index(stock_eda_panel):
3218
3241
  scalarmappaple = cm.ScalarMappable(norm=normalize, cmap=colormap)
3219
3242
  scalarmappaple.set_array(x)
3220
3243
 
3221
- plt.title(f'{self.asset} using index: {self.index}')
3244
+ plt.title(f'{self.asset} using index: {index}')
3222
3245
  plt.colorbar(scalarmappaple)
3223
3246
 
3224
3247
  if self.show_plot:
3225
3248
  plt.show()
3249
+
3226
3250
  if self.save_path:
3227
3251
  result_plot_name = f'market_best_fit.png'
3228
3252
  figure.savefig(self.save_path+result_plot_name)
@@ -3230,119 +3254,53 @@ class analyse_index(stock_eda_panel):
3230
3254
  if self.save_path and self.save_aws:
3231
3255
  # upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = f'market_plots/{self.asset}/'+result_plot_name,input_path = self.save_path+result_plot_name)
3232
3256
  upload_file_to_aws(bucket = 'VIRGO_BUCKET', key = self.save_aws + result_plot_name, input_path = self.save_path + result_plot_name, aws_credentials = self.aws_credentials)
3257
+
3233
3258
  if not self.show_plot:
3234
- plt.close()
3235
-
3259
+ plt.close()
3260
+
3261
+ if self.return_fig:
3262
+ return figure
3263
+
3236
3264
  def get_betas(self,subsample_ts=False):
3237
3265
  """
3238
3266
  get general beta and last sample beta, correlation score is included too
3239
-
3267
+
3240
3268
  Parameters
3241
3269
  ----------
3242
3270
  subsample_ts (int): subsample size of data
3243
-
3271
+
3244
3272
  Returns
3245
3273
  -------
3246
3274
  None
3247
3275
  """
3248
- self.process_data()
3249
- general_beta, a = np.polyfit(self.merger_df.asset_return, self.merger_df.index_return, 1)
3250
- general_r = stats.mstats.pearsonr(self.merger_df.asset_return, self.merger_df.index_return)[0]
3251
-
3252
- self.process_data()
3253
- if subsample_ts:
3254
- self.merger_df = self.merger_df.iloc[-subsample_ts:,:].dropna()
3255
- sample_beta, a = np.polyfit(self.merger_df.asset_return, self.merger_df.index_return, 1)
3256
- sample_r = stats.mstats.pearsonr(self.merger_df.asset_return, self.merger_df.index_return)[0]
3257
-
3258
- result = {
3259
- 'general_beta':general_beta,
3260
- 'general_r':general_r,
3261
- 'sample_beta':sample_beta,
3262
- 'sample_r':sample_r
3263
- }
3264
-
3265
- self.states_result = result
3266
-
3267
- class evaluate_markets(analyse_index):
3268
- """
3269
- object that is going to evaluate multiple indexes
3270
-
3271
- Attributes
3272
- ----------
3273
- stock_code : str
3274
- asset to assess
3275
- indexes : list
3276
- list of indexes
3277
- best_result : dict
3278
- best result beta and correlation
3279
-
3280
- Methods
3281
- -------
3282
- process_data():
3283
- using stock_eda_panel, get data and merge data
3284
- plot_betas(sample_size=int, offset=int, subsample_ts=int):
3285
- display beta analysis plot
3286
- get_betas(subsample_ts=int)
3287
- get general beta and last sample beta, correlation score is included too
3288
- evaluate_best_market_fit(sample_size=int, offset=int,lag=int, n_obs=int, verbose=boolean, plot_best=boolean):
3289
- iterate every index in the index list and get results
3290
- """
3291
-
3292
- def __init__(self, stock_code, indexes):
3293
- """
3294
- Initialize object
3295
-
3296
- Parameters
3297
- ----------
3298
- stock_code (str): asset to assess
3299
- indexes (list): list of indexes
3300
-
3301
- Returns
3302
- -------
3303
- None
3304
- """
3305
- self.stock_code = stock_code
3306
- self.indexes = indexes
3307
- def evaluate_best_market_fit(self,sample_size, offset,lag= 3, n_obs = 3500, verbose = False, plot_best = False):
3308
- """
3309
- iterate every index in the index list and get results
3310
-
3311
- Parameters
3312
- ----------
3313
- sample_size (int): sample size to get betas
3314
- offset (int): overlap size
3315
- lag (int): number of lags of the returns
3316
- n_obs (int): number of observations of the data extraction
3317
- verbose (boolean): if true, print results
3318
- plot_best (boolean): if true, display plot of the best result
3319
-
3320
- Returns
3321
- -------
3322
- None
3323
- """
3324
- results_dicts = dict()
3276
+ result = list()
3325
3277
  for index in self.indexes:
3326
- betex = analyse_index(index = index,asset = self.stock_code,n_obs = n_obs, lag = lag)
3327
- betex.get_betas(sample_size)
3328
- results_dicts[index] = betex.states_result
3329
- pd_result = pd.DataFrame(results_dicts).T
3330
- pd_result['gen_r2'] = pd_result.general_r ** 2
3331
- pd_result['sampl_r2'] = pd_result.sample_r ** 2
3332
- self.stat_results = pd_result
3333
-
3334
- best_result = pd_result.sort_values('gen_r2',ascending = False).head(2).sort_values('sampl_r2',ascending = False).head(1)
3335
- best_fit_index = best_result.index.values[0]
3336
-
3337
- self.stat_results = self.stat_results.drop(columns = ['gen_r2','sampl_r2'])
3338
-
3339
- if verbose:
3340
- print(best_result)
3341
- if plot_best:
3342
- betex = analyse_index(index = best_fit_index,asset = self.stock_code, n_obs = n_obs, lag = lag)
3343
- betex.plot_betas(sample_size = sample_size, offset = offset, subsample_ts = False)
3278
+
3279
+ index_pct = f'{index}_pct'
3280
+ huber_regr = HuberRegressor(fit_intercept = True)
3281
+ huber_regr.fit(self.merger_df.asset_return.values.reshape(-1,1), self.merger_df[index_pct].values.reshape(-1,1))
3282
+ general_beta, a = huber_regr.coef_[0], huber_regr.intercept_
3283
+ general_r = stats.mstats.pearsonr(self.merger_df.asset_return, self.merger_df[index])[0]
3284
+
3285
+ dict_res = {
3286
+ 'index':index,
3287
+ 'general_beta':general_beta,
3288
+ 'general_r':general_r,
3289
+ }
3290
+
3291
+ if subsample_ts:
3292
+ tmp_df = self.merger_df.iloc[-subsample_ts:,:].dropna()
3293
+ huber_regr = HuberRegressor(fit_intercept = True)
3294
+ huber_regr.fit(tmp_df.asset_return.values.reshape(-1,1), tmp_df[index_pct].values.reshape(-1,1))
3295
+ sample_beta, a = huber_regr.coef_[0], huber_regr.intercept_
3296
+ sample_r = stats.mstats.pearsonr(tmp_df.asset_return, tmp_df[index])[0]
3297
+ dict_res['sample_beta'] = sample_beta
3298
+ dict_res['sample_r'] = sample_r
3299
+
3300
+ result.append(dict_res)
3301
+
3302
+ self.states_result = result
3344
3303
 
3345
- self.best_result = best_result
3346
3304
 
3347
3305
  def get_relevant_beta(data_market, ticket_name, show_plot = True, save_path = False, save_aws = False, aws_credentials = False):
3348
3306
  '''
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: virgo-modules
3
- Version: 0.0.82
3
+ Version: 0.0.84
4
4
  Summary: data processing and statistical modeling using stock market data
5
5
  Home-page: https://github.com/miguelmayhem92/virgo_module
6
6
  Author: Miguel Mayhuire
@@ -3,10 +3,10 @@ virgo_modules/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
3
3
  virgo_modules/src/aws_utils.py,sha256=q0l7D7ofo09Lu1QQjv-esheQ06uiSy1Pdq3xMul8zvk,2571
4
4
  virgo_modules/src/edge_utils.py,sha256=ll5pRs9EE20IsE5A1vA589TKzobkeA-b0d68jNTsu1U,13268
5
5
  virgo_modules/src/pull_artifacts.py,sha256=5OPrgR7pcMSdpbevDRhf0ebk7g7ZRjff4NpTIIWAKjE,1989
6
- virgo_modules/src/re_utils.py,sha256=a7u7ouZ6dKvJZXzgjPLTRARbCapMC_sWf-azhExpMAk,71590
7
- virgo_modules/src/ticketer_source.py,sha256=nDxBs0YIFwInCvk73PoU8D8oiAxElf3ERbRQXRw8k_M,144162
8
- virgo_modules-0.0.82.dist-info/LICENSE,sha256=pNgFyCYgmimaw0o6V20JupZLROycAnOA_HDDh1tX2V4,1097
9
- virgo_modules-0.0.82.dist-info/METADATA,sha256=n-F7T6tpDni4jLj24J_g5K0VfTXKcA_RC_rJVYpufDE,1429
10
- virgo_modules-0.0.82.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
11
- virgo_modules-0.0.82.dist-info/top_level.txt,sha256=ZjI-qEkDtT-8mFwGAWnXfqPOKEGlIhWRW1es1VyXc60,14
12
- virgo_modules-0.0.82.dist-info/RECORD,,
6
+ virgo_modules/src/re_utils.py,sha256=jB5raZFppVl-ZgCzmAF6vB8GCwyVAV6rQjHAKFaCnVw,71652
7
+ virgo_modules/src/ticketer_source.py,sha256=cEGgago1bl3tynRND30jqfiPWxF-KTTgiN9DRTbyB_k,143298
8
+ virgo_modules-0.0.84.dist-info/LICENSE,sha256=pNgFyCYgmimaw0o6V20JupZLROycAnOA_HDDh1tX2V4,1097
9
+ virgo_modules-0.0.84.dist-info/METADATA,sha256=6shZyCX6M6LsMFsxiT-jKAAYtBMd84kjcBDKiGo0JTc,1429
10
+ virgo_modules-0.0.84.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
11
+ virgo_modules-0.0.84.dist-info/top_level.txt,sha256=ZjI-qEkDtT-8mFwGAWnXfqPOKEGlIhWRW1es1VyXc60,14
12
+ virgo_modules-0.0.84.dist-info/RECORD,,