voly 0.0.146__py3-none-any.whl → 0.0.148__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
voly/core/hd.py CHANGED
@@ -8,7 +8,7 @@ import pandas as pd
8
8
  import numpy as np
9
9
  import datetime as dt
10
10
  from scipy import stats
11
- from typing import Dict, List, Tuple, Optional, Union, Any
11
+ from typing import Dict, List, Tuple, Optional, Union, Any, Callable
12
12
  from voly.utils.logger import logger, catch_exception
13
13
  from voly.exceptions import VolyError
14
14
  from voly.core.rnd import get_all_moments
@@ -27,20 +27,25 @@ def get_historical_data(currency: str,
27
27
  Fetch historical OHLCV data for a cryptocurrency.
28
28
 
29
29
  Parameters:
30
- ----------
30
+ -----------
31
31
  currency : str
32
- The cryptocurrency to fetch data for (e.g., 'BTC', 'ETH').
32
+ The cryptocurrency to fetch data for (e.g., 'BTC', 'ETH')
33
33
  lookback_days : str
34
34
  The lookback period in days, formatted as '90d', '30d', etc.
35
35
  granularity : str
36
- The time interval for data points (e.g., '15m', '1h', '1d').
36
+ The time interval for data points (e.g., '15m', '1h', '1d')
37
37
  exchange_name : str
38
- The exchange to fetch data from (default: 'binance').
38
+ The exchange to fetch data from (default: 'binance')
39
39
 
40
40
  Returns:
41
- -------
42
- pd.DataFrame: Historical price data with OHLCV columns.
41
+ --------
42
+ pd.DataFrame
43
+ Historical price data with OHLCV columns and datetime index
43
44
  """
45
+ # Validate inputs
46
+ if not lookback_days.endswith('d'):
47
+ raise VolyError("lookback_days should be in format '90d', '30d', etc.")
48
+
44
49
  try:
45
50
  # Get the exchange class from ccxt
46
51
  exchange_class = getattr(ccxt, exchange_name.lower())
@@ -49,28 +54,21 @@ def get_historical_data(currency: str,
49
54
  raise VolyError(f"Exchange '{exchange_name}' not found in ccxt. Please check the exchange name.")
50
55
 
51
56
  # Form the trading pair symbol
52
- symbol = currency + '/USDT'
57
+ symbol = f"{currency}/USDT"
53
58
 
54
59
  # Convert lookback_days to timestamp
55
- if lookback_days.endswith('d'):
56
- days_ago = int(lookback_days[:-1])
57
- date_start = (dt.datetime.now() - dt.timedelta(days=days_ago)).strftime('%Y-%m-%d %H:%M:%S')
58
- else:
59
- raise VolyError("lookback_days should be in format '90d', '30d', etc.")
60
-
60
+ days_ago = int(lookback_days[:-1])
61
+ date_start = (dt.datetime.now() - dt.timedelta(days=days_ago)).strftime('%Y-%m-%d %H:%M:%S')
61
62
  from_ts = exchange.parse8601(date_start)
63
+
62
64
  ohlcv_list = []
63
65
  ohlcv = exchange.fetch_ohlcv(symbol, granularity, since=from_ts, limit=1000)
64
66
  ohlcv_list.append(ohlcv)
65
-
66
- # Fetch all available data within the lookback period
67
- while len(ohlcv) == 1000:
67
+ while True:
68
68
  from_ts = ohlcv[-1][0]
69
69
  new_ohlcv = exchange.fetch_ohlcv(symbol, granularity, since=from_ts, limit=1000)
70
- if len(new_ohlcv) <= 1:
71
- break
72
- ohlcv.extend(new_ohlcv[1:]) # Skip first element to avoid duplication
73
- if len(new_ohlcv) < 1000:
70
+ ohlcv.extend(new_ohlcv)
71
+ if len(new_ohlcv) != 1000:
74
72
  break
75
73
 
76
74
  # Convert to DataFrame
@@ -87,36 +85,76 @@ def get_historical_data(currency: str,
87
85
  @catch_exception
88
86
  def parse_window_length(window_length: str, df_hist: pd.DataFrame) -> int:
89
87
  """
90
- Parse window length from string format (e.g., '7d', '30d') to number of data points.
88
+ Convert window length string (e.g., '30d') to number of data points.
91
89
 
92
90
  Parameters:
93
91
  -----------
94
92
  window_length : str
95
93
  Window length in days, formatted as '7d', '30d', etc.
96
94
  df_hist : pd.DataFrame
97
- Historical data DataFrame with datetime index.
95
+ Historical data DataFrame with datetime index
98
96
 
99
97
  Returns:
100
98
  --------
101
99
  int
102
- Number of data points corresponding to the window length.
100
+ Number of data points corresponding to the window length
103
101
  """
102
+ # Validate inputs
104
103
  if not isinstance(window_length, str) or not window_length.endswith('d'):
105
104
  raise VolyError("window_length should be in format '7d', '30d', etc.")
106
105
 
106
+ if len(df_hist) < 2:
107
+ raise VolyError("Historical data must contain at least 2 points to calculate granularity")
108
+
107
109
  # Extract number of days
108
110
  days = int(window_length[:-1])
109
111
 
110
- # Calculate time delta between consecutive data points
111
- if len(df_hist) > 1:
112
- avg_delta = (df_hist.index[-1] - df_hist.index[0]) / (len(df_hist) - 1)
113
- # Convert to days and get points per day
114
- days_per_point = avg_delta.total_seconds() / (24 * 60 * 60)
115
- # Calculate number of points for the window
116
- n_points = int(days / days_per_point)
117
- return max(n_points, 10) # Ensure at least 10 points
118
- else:
119
- raise VolyError("Not enough data points in df_hist to calculate granularity.")
112
+ # Calculate average time delta between data points
113
+ avg_delta = (df_hist.index[-1] - df_hist.index[0]).total_seconds() / (len(df_hist) - 1)
114
+
115
+ # Convert to days and calculate points per window
116
+ days_per_point = avg_delta / (24 * 60 * 60)
117
+ n_points = int(days / days_per_point)
118
+
119
+ # Ensure minimum number of points
120
+ return max(n_points, 10)
121
+
122
+
123
+ def get_param_names(model_type: str, distribution: str) -> List[str]:
124
+ """
125
+ Get parameter names for a volatility model and distribution.
126
+
127
+ Parameters:
128
+ -----------
129
+ model_type : str
130
+ Type of volatility model ('garch' or 'egarch')
131
+ distribution : str
132
+ Distribution type ('normal', 'studentst', or 'skewstudent')
133
+
134
+ Returns:
135
+ --------
136
+ List[str]
137
+ List of parameter names
138
+ """
139
+ # GARCH(1,1) parameters
140
+ if model_type.lower() == 'garch':
141
+ if distribution.lower() == 'normal':
142
+ return ['mu', 'omega', 'alpha[1]', 'beta[1]']
143
+ elif distribution.lower() == 'studentst':
144
+ return ['mu', 'omega', 'alpha[1]', 'beta[1]', 'nu']
145
+ elif distribution.lower() == 'skewstudent':
146
+ return ['mu', 'omega', 'alpha[1]', 'beta[1]', 'nu', 'lambda']
147
+
148
+ # EGARCH(1,1,1) parameters
149
+ elif model_type.lower() == 'egarch':
150
+ if distribution.lower() == 'normal':
151
+ return ['mu', 'omega', 'alpha[1]', 'gamma[1]', 'beta[1]']
152
+ elif distribution.lower() == 'studentst':
153
+ return ['mu', 'omega', 'alpha[1]', 'gamma[1]', 'beta[1]', 'nu']
154
+ elif distribution.lower() == 'skewstudent':
155
+ return ['mu', 'omega', 'alpha[1]', 'gamma[1]', 'beta[1]', 'nu', 'lambda']
156
+
157
+ raise VolyError(f"Invalid model_type '{model_type}' or distribution '{distribution}'")
120
158
 
121
159
 
122
160
  @catch_exception
@@ -127,85 +165,90 @@ def fit_volatility_model(log_returns: np.ndarray,
127
165
  window_length: str = '30d',
128
166
  n_fits: int = 400) -> Dict[str, Any]:
129
167
  """
130
- Fit a volatility model (GARCH or EGARCH) to log returns.
168
+ Fit a volatility model (GARCH or EGARCH) to historical returns.
131
169
 
132
170
  Parameters:
133
171
  -----------
134
172
  log_returns : np.ndarray
135
- Array of log returns
173
+ Array of log returns (percent)
136
174
  df_hist : pd.DataFrame
137
- DataFrame with historical price data
175
+ Historical price data
138
176
  model_type : str
139
177
  Type of volatility model ('garch' or 'egarch')
140
178
  distribution : str
141
179
  Distribution type ('normal', 'studentst', or 'skewstudent')
142
180
  window_length : str
143
- Length of each window as a string (e.g., '30d')
181
+ Length of sliding window in days (e.g., '30d')
144
182
  n_fits : int
145
- Number of sliding windows
183
+ Number of sliding windows to fit
146
184
 
147
185
  Returns:
148
186
  --------
149
187
  Dict[str, Any]
150
- Dictionary with model parameters and processes
188
+ Dictionary with model parameters and fitting results
151
189
  """
152
190
  # Parse window length
153
191
  window_points = parse_window_length(window_length, df_hist)
154
192
 
193
+ # Validate data
155
194
  if len(log_returns) < window_points + n_fits:
156
195
  raise VolyError(f"Not enough data points. Need at least {window_points + n_fits}, got {len(log_returns)}")
157
196
 
158
- # Adjust window sizes if necessary to avoid over-fitting
197
+ # Adjust window sizes to avoid overfitting
159
198
  n_fits = min(n_fits, max(100, len(log_returns) // 3))
160
199
  window_points = min(window_points, max(20, len(log_returns) // 3))
161
200
 
162
- start = window_points + n_fits
163
- end = n_fits
201
+ # Calculate start and end indices for sliding windows
202
+ start_idx = window_points + n_fits
203
+ end_idx = n_fits
164
204
 
165
- # Different number of parameters based on model type and distribution
205
+ # Get parameter names for the model
166
206
  param_names = get_param_names(model_type, distribution)
167
207
  n_params = len(param_names)
168
208
 
209
+ # Initialize arrays for parameters and innovations
169
210
  parameters = np.zeros((n_fits, n_params))
170
211
  z_process = []
171
212
 
172
- logger.info(
173
- f"Fitting {model_type.upper()} model with {distribution} distribution using {n_fits} windows of {window_length}...")
213
+ logger.info(f"Fitting {model_type.upper()} model with {distribution} distribution "
214
+ f"using {n_fits} windows of {window_length}")
174
215
 
216
+ # Fit models with sliding windows
175
217
  for i in range(n_fits):
218
+ # Log progress
176
219
  if i % (n_fits // 10) == 0:
177
220
  logger.info(f"Fitting progress: {i}/{n_fits}")
178
221
 
179
- # Skip if we don't have enough data
180
- if end - i - 1 < 0 or start - i - 1 > len(log_returns):
222
+ # Check if we have enough data for this window
223
+ if end_idx - i - 1 < 0 or start_idx - i - 1 > len(log_returns):
181
224
  continue
182
225
 
183
- window = log_returns[end - i - 1:start - i - 1]
226
+ # Extract window data
227
+ window = log_returns[end_idx - i - 1:start_idx - i - 1]
184
228
 
185
- # Skip windows that are too small or have invalid data
229
+ # Skip invalid windows
186
230
  if len(window) < 10 or np.isnan(window).any() or np.isinf(window).any():
187
231
  continue
188
232
 
189
- # Mean-center the data to improve numerical stability
233
+ # Mean-center the data for numerical stability
190
234
  data = window - np.mean(window)
191
235
 
192
236
  try:
193
- # Configure model based on type and distribution
237
+ # Configure and fit model
194
238
  if model_type.lower() == 'garch':
195
239
  model = arch_model(data, vol='GARCH', p=1, q=1, dist=distribution.lower())
196
240
  else: # egarch
197
241
  model = arch_model(data, vol='EGARCH', p=1, o=1, q=1, dist=distribution.lower())
198
242
 
243
+ # Fit with optimization settings
199
244
  fit_result = model.fit(disp='off', options={'maxiter': 1000})
200
245
 
201
- # Extract parameters based on model type and distribution
246
+ # Extract parameters
202
247
  params_dict = fit_result.params.to_dict()
203
-
204
- # Extract parameter values in correct order
205
248
  param_values = [params_dict.get(param, 0) for param in param_names]
206
249
  parameters[i, :] = param_values
207
250
 
208
- # Get last innovation (standardized residual)
251
+ # Extract standardized residuals (innovations)
209
252
  residuals = fit_result.resid
210
253
  conditional_vol = fit_result.conditional_volatility
211
254
 
@@ -217,11 +260,11 @@ def fit_volatility_model(log_returns: np.ndarray,
217
260
  except Exception as e:
218
261
  logger.warning(f"Model fit failed for window {i}: {str(e)}")
219
262
 
220
- # Clean up any failed fits
263
+ # Check if we have enough successful fits
221
264
  if len(z_process) < n_fits / 2:
222
265
  raise VolyError(f"Too many model fits failed ({len(z_process)}/{n_fits}). Check your data.")
223
266
 
224
- # Filter out rows with zeros (failed fits)
267
+ # Remove failed fits
225
268
  valid_rows = ~np.all(parameters == 0, axis=1)
226
269
  parameters = parameters[valid_rows]
227
270
 
@@ -240,100 +283,95 @@ def fit_volatility_model(log_returns: np.ndarray,
240
283
  }
241
284
 
242
285
 
243
- def get_param_names(model_type: str, distribution: str) -> List[str]:
286
+ @catch_exception
287
+ def create_innovation_sampler(vol_model: Dict[str, Any]) -> Callable:
244
288
  """
245
- Get parameter names based on model type and distribution.
289
+ Create a function to sample innovations based on the volatility model.
246
290
 
247
291
  Parameters:
248
292
  -----------
249
- model_type : str
250
- Type of volatility model ('garch' or 'egarch')
251
- distribution : str
252
- Distribution type ('normal', 'studentst', or 'skewstudent')
293
+ vol_model : Dict[str, Any]
294
+ Volatility model information from fit_volatility_model()
253
295
 
254
296
  Returns:
255
297
  --------
256
- List[str]
257
- List of parameter names
298
+ Callable
299
+ Function that returns random innovations when called
258
300
  """
259
- if model_type.lower() == 'garch':
260
- if distribution.lower() == 'normal':
261
- return ['mu', 'omega', 'alpha[1]', 'beta[1]']
262
- elif distribution.lower() == 'studentst':
263
- return ['mu', 'omega', 'alpha[1]', 'beta[1]', 'nu']
264
- else: # skewstudent
265
- return ['mu', 'omega', 'alpha[1]', 'beta[1]', 'nu', 'lambda']
266
- else: # egarch
267
- if distribution.lower() == 'normal':
268
- return ['mu', 'omega', 'alpha[1]', 'gamma[1]', 'beta[1]']
269
- elif distribution.lower() == 'studentst':
270
- return ['mu', 'omega', 'alpha[1]', 'gamma[1]', 'beta[1]', 'nu']
271
- else: # skewstudent
272
- return ['mu', 'omega', 'alpha[1]', 'gamma[1]', 'beta[1]', 'nu', 'lambda']
301
+ distribution = vol_model['distribution']
302
+ z_process = vol_model['z_process']
303
+
304
+ if distribution.lower() == 'normal':
305
+ # Use standard normal for normal distribution
306
+ def sample_innovation(size=1):
307
+ return np.random.normal(0, 1, size=size)
308
+ else:
309
+ # Use KDE for non-normal distributions to capture empirical distribution
310
+ kde = stats.gaussian_kde(z_process, bw_method='silverman')
311
+ z_range = np.linspace(min(z_process), max(z_process), 1000)
312
+ z_prob = kde(z_range)
313
+ z_prob = z_prob / np.sum(z_prob)
314
+
315
+ def sample_innovation(size=1):
316
+ return np.random.choice(z_range, size=size, p=z_prob)
317
+
318
+ return sample_innovation
273
319
 
274
320
 
275
321
  @catch_exception
276
- def simulate_volatility_paths(vol_model: Dict[str, Any],
322
+ def generate_volatility_paths(vol_model: Dict[str, Any],
277
323
  horizon: int,
278
324
  simulations: int = 5000) -> Tuple[np.ndarray, float]:
279
325
  """
280
- Simulate future paths using a fitted volatility model.
326
+ Simulate future price paths using a fitted volatility model.
281
327
 
282
328
  Parameters:
283
329
  -----------
284
330
  vol_model : Dict[str, Any]
285
- Dict with volatility model parameters
331
+ Volatility model information from fit_volatility_model()
286
332
  horizon : int
287
- Number of steps to simulate
333
+ Number of time steps to simulate
288
334
  simulations : int
289
335
  Number of paths to simulate
290
336
 
291
337
  Returns:
292
338
  --------
293
339
  Tuple[np.ndarray, float]
294
- Simulated returns and drift
340
+ Array of simulated returns and the drift term
295
341
  """
342
+ # Extract model information
296
343
  parameters = vol_model['parameters']
297
- z_process = vol_model['z_process']
298
344
  model_type = vol_model['model_type']
299
345
  distribution = vol_model['distribution']
300
346
  param_names = vol_model['param_names']
301
347
 
302
- # Use mean parameters as starting point
348
+ # Get mean parameters
303
349
  pars = vol_model['avg_params'].copy()
304
350
  bounds = vol_model['std_params'].copy()
305
351
 
306
- # Create dictionary for easier parameter access
352
+ # Create parameter dictionary for easier access
307
353
  param_dict = {name: value for name, value in zip(param_names, pars)}
308
354
 
309
- # Log parameters in a structured way
355
+ # Log parameters
310
356
  param_str = ", ".join([f"{name}={param_dict.get(name, 0):.6f}" for name in param_names])
311
357
  logger.info(f"{model_type.upper()} parameters: {param_str}")
312
358
 
313
- # Create sampling function based on distribution
314
- if distribution.lower() == 'normal':
315
- # Use standard normal for normal distribution
316
- def sample_innovation(size=1):
317
- return np.random.normal(0, 1, size=size)
318
- else:
319
- # Use KDE for non-normal distributions to capture empirical distribution
320
- kde = stats.gaussian_kde(z_process, bw_method='silverman') # original code didnt have bw_method
321
- z_range = np.linspace(min(z_process), max(z_process), 1000)
322
- z_prob = kde(z_range)
323
- z_prob = z_prob / np.sum(z_prob)
324
-
325
- def sample_innovation(size=1):
326
- return np.random.choice(z_range, size=size, p=z_prob)
359
+ # Create innovation sampler
360
+ sample_innovation = create_innovation_sampler(vol_model)
327
361
 
328
- # Simulate paths
362
+ # Initialize results array
329
363
  simulated_returns = np.zeros(simulations)
330
364
  mu = param_dict.get('mu', 0)
331
365
 
366
+ logger.info(f"Simulating {simulations} paths for horizon {horizon}")
367
+
368
+ # Simulate paths
332
369
  for i in range(simulations):
370
+ # Log progress
333
371
  if (i + 1) % (simulations // 10) == 0:
334
372
  logger.info(f"Simulation progress: {i + 1}/{simulations}")
335
373
 
336
- # Optionally vary parameters between simulations
374
+ # Vary parameters periodically for robustness
337
375
  if (i + 1) % (simulations // 20) == 0:
338
376
  # Create parameter variations based on their estimated distribution
339
377
  sim_params = {}
@@ -356,25 +394,30 @@ def simulate_volatility_paths(vol_model: Dict[str, Any],
356
394
 
357
395
  # Initialize volatility based on model type
358
396
  if model_type.lower() == 'garch':
397
+ # Extract GARCH parameters
359
398
  omega = sim_params.get('omega', 0)
360
399
  alpha = sim_params.get('alpha[1]', 0)
361
400
  beta = sim_params.get('beta[1]', 0)
362
401
 
363
- # Initialize GARCH volatility (unconditional variance)
364
- sigma2 = omega / (1 - alpha - beta) if alpha + beta < 1 else omega / 0.99
402
+ # Initialize with unconditional variance
403
+ persistence = alpha + beta
404
+ sigma2 = omega / (1 - persistence) if persistence < 1 else omega / 0.99
405
+
365
406
  else: # egarch
407
+ # Extract EGARCH parameters
366
408
  omega = sim_params.get('omega', 0)
367
409
  beta = sim_params.get('beta[1]', 0)
368
410
 
369
- # Initialize EGARCH volatility
411
+ # Initialize log variance
370
412
  log_sigma2 = omega / (1 - beta) if beta < 1 else omega / 0.99
371
413
  sigma2 = np.exp(log_sigma2)
372
414
 
415
+ # Initialize return sum
373
416
  returns_sum = 0
374
417
 
375
- # Simulate path step by step
418
+ # Simulate path
376
419
  for _ in range(horizon):
377
- # Sample a random innovation
420
+ # Sample innovation
378
421
  z = sample_innovation()
379
422
 
380
423
  # Update returns and volatility based on model type
@@ -384,29 +427,265 @@ def simulate_volatility_paths(vol_model: Dict[str, Any],
384
427
  returns_sum += e + mu
385
428
 
386
429
  # Update GARCH volatility
387
- sigma2 = sim_params.get('omega', 0) + sim_params.get('alpha[1]', 0) * e ** 2 + sim_params.get('beta[1]',
388
- 0) * sigma2
430
+ sigma2 = (sim_params.get('omega', 0) +
431
+ sim_params.get('alpha[1]', 0) * e ** 2 +
432
+ sim_params.get('beta[1]', 0) * sigma2)
433
+
389
434
  else: # egarch
390
435
  # Calculate return
391
436
  e = z * np.sqrt(sigma2)
392
437
  returns_sum += e + mu
393
438
 
394
- # Update EGARCH volatility
395
- abs_z = abs(z)
439
+ # Extract EGARCH parameters
396
440
  gamma = sim_params.get('gamma[1]', 0)
397
441
  alpha = sim_params.get('alpha[1]', 0)
398
442
  beta = sim_params.get('beta[1]', 0)
399
443
  omega = sim_params.get('omega', 0)
400
444
 
401
- # EGARCH update equation
445
+ # Update EGARCH volatility
446
+ abs_z = abs(z)
402
447
  log_sigma2 = omega + beta * log_sigma2 + alpha * (abs_z - np.sqrt(2 / np.pi)) + gamma * z
403
448
  sigma2 = np.exp(log_sigma2)
404
449
 
450
+ # Store final return
405
451
  simulated_returns[i] = returns_sum
406
452
 
407
453
  return simulated_returns, mu * horizon
408
454
 
409
455
 
456
+ @catch_exception
457
+ def prepare_domains(domain_params: Tuple[float, float, int],
458
+ s: float,
459
+ return_domain: str) -> Dict[str, np.ndarray]:
460
+ """
461
+ Prepare domain arrays for different representations.
462
+
463
+ Parameters:
464
+ -----------
465
+ domain_params : Tuple[float, float, int]
466
+ (min_log_moneyness, max_log_moneyness, num_points)
467
+ s : float
468
+ Spot price
469
+ return_domain : str
470
+ Domain for results
471
+
472
+ Returns:
473
+ --------
474
+ Dict[str, np.ndarray]
475
+ Dictionary of domain arrays
476
+ """
477
+ # Create log-moneyness grid
478
+ LM = np.linspace(domain_params[0], domain_params[1], domain_params[2])
479
+
480
+ # Calculate other domains
481
+ M = np.exp(LM) # Moneyness
482
+ R = M - 1 # Returns
483
+ K = s / M # Strike prices
484
+
485
+ # Calculate grid spacing
486
+ dx = LM[1] - LM[0]
487
+
488
+ return {
489
+ 'log_moneyness': LM,
490
+ 'moneyness': M,
491
+ 'returns': R,
492
+ 'strikes': K,
493
+ 'dx': dx
494
+ }
495
+
496
+
497
+ @catch_exception
498
+ def calculate_basic_density(df_hist: pd.DataFrame,
499
+ t: float,
500
+ r: float,
501
+ n_periods: int,
502
+ domains: Dict[str, np.ndarray],
503
+ bandwidth: str = 'silverman') -> Dict[str, np.ndarray]:
504
+ """
505
+ Calculate historical density using KDE of historical returns.
506
+
507
+ Parameters:
508
+ -----------
509
+ df_hist : pd.DataFrame
510
+ Historical price data
511
+ t : float
512
+ Time to maturity in years
513
+ r : float
514
+ Risk-free rate
515
+ n_periods : int
516
+ Number of periods to scale returns
517
+ domains : Dict[str, np.ndarray]
518
+ Domain arrays
519
+ bandwidth : str
520
+ KDE bandwidth method
521
+
522
+ Returns:
523
+ --------
524
+ Dict[str, np.ndarray]
525
+ Dictionary of PDFs in different domains
526
+ """
527
+ # Extract domains
528
+ LM = domains['log_moneyness']
529
+ M = domains['moneyness']
530
+ R = domains['returns']
531
+ K = domains['strikes']
532
+ dx = domains['dx']
533
+
534
+ # Filter historical data for the maturity's lookback period
535
+ start_date = pd.Timestamp.now() - pd.Timedelta(days=int(t * 365.25))
536
+ maturity_hist = df_hist[df_hist.index >= start_date].copy()
537
+
538
+ if len(maturity_hist) < 10:
539
+ raise VolyError(f"Not enough historical data for maturity (t={t:.4f})")
540
+
541
+ # Calculate scaled returns
542
+ maturity_hist['log_returns'] = np.log(maturity_hist['close'] / maturity_hist['close'].shift(1)) * np.sqrt(n_periods)
543
+ maturity_hist = maturity_hist.dropna()
544
+ returns = maturity_hist['log_returns'].values
545
+
546
+ if len(returns) < 2:
547
+ raise VolyError(f"Not enough valid returns for maturity (t={t:.4f})")
548
+
549
+ # Girsanov adjustment to shift to risk-neutral measure
550
+ mu_scaled = returns.mean()
551
+ sigma_scaled = returns.std()
552
+ expected_risk_neutral_mean = (r - 0.5 * sigma_scaled ** 2) * np.sqrt(t)
553
+ adjustment = mu_scaled - expected_risk_neutral_mean
554
+ adj_returns = returns - adjustment
555
+
556
+ # Create PDF with KDE
557
+ kde = stats.gaussian_kde(adj_returns, bw_method=bandwidth)
558
+ pdf_lm = kde(LM)
559
+
560
+ # Normalize the PDF
561
+ pdf_lm = pdf_lm / np.trapz(pdf_lm, LM)
562
+
563
+ # Transform to other domains
564
+ pdf_m = pdf_lm / M
565
+ pdf_k = pdf_lm / K
566
+ pdf_r = pdf_lm / (1 + R)
567
+
568
+ # Calculate CDF
569
+ cdf = np.cumsum(pdf_lm * dx)
570
+ cdf = cdf / cdf[-1]
571
+
572
+ return {
573
+ 'log_moneyness': pdf_lm,
574
+ 'moneyness': pdf_m,
575
+ 'returns': pdf_r,
576
+ 'strikes': pdf_k,
577
+ 'cdf': cdf
578
+ }
579
+
580
+
581
+ @catch_exception
582
+ def calculate_volatility_density(vol_model: Dict[str, Any],
583
+ s: float,
584
+ t: float,
585
+ r: float,
586
+ n_periods: int,
587
+ tau_days: float,
588
+ domains: Dict[str, np.ndarray],
589
+ simulations: int = 5000,
590
+ bandwidth: str = 'silverman') -> Tuple[Dict[str, np.ndarray], Dict[str, Any]]:
591
+ """
592
+ Calculate historical density using volatility model simulation.
593
+
594
+ Parameters:
595
+ -----------
596
+ vol_model : Dict[str, Any]
597
+ Volatility model from fit_volatility_model()
598
+ s : float
599
+ Spot price
600
+ t : float
601
+ Time to maturity in years
602
+ r : float
603
+ Risk-free rate
604
+ n_periods : int
605
+ Number of periods to scale returns
606
+ tau_days : float
607
+ Days to maturity
608
+ domains : Dict[str, np.ndarray]
609
+ Domain arrays
610
+ simulations : int
611
+ Number of Monte Carlo simulations
612
+ bandwidth : str
613
+ KDE bandwidth method
614
+
615
+ Returns:
616
+ --------
617
+ Tuple[Dict[str, np.ndarray], Dict[str, Any]]
618
+ Dictionary of PDFs in different domains and model parameters
619
+ """
620
+ # Extract domains
621
+ LM = domains['log_moneyness']
622
+ M = domains['moneyness']
623
+ R = domains['returns']
624
+ K = domains['strikes']
625
+ dx = domains['dx']
626
+
627
+ # Simulate paths with the volatility model
628
+ horizon = max(1, int(tau_days))
629
+ simulated_returns, simulated_mu = generate_volatility_paths(
630
+ vol_model,
631
+ horizon,
632
+ simulations
633
+ )
634
+
635
+ # Scale the simulated returns to match target time horizon
636
+ scaling_factor = np.sqrt(n_periods / tau_days)
637
+ scaled_returns = simulated_returns * scaling_factor
638
+
639
+ # Risk-neutral adjustment
640
+ mu_scaled = scaled_returns.mean()
641
+ sigma_scaled = scaled_returns.std()
642
+ expected_risk_neutral_mean = (r - 0.5 * (sigma_scaled / 100) ** 2) * 100 * np.sqrt(t)
643
+ adjustment = mu_scaled - expected_risk_neutral_mean
644
+ risk_neutral_returns = scaled_returns - adjustment
645
+
646
+ # Convert to terminal prices
647
+ simulated_prices = s * np.exp(risk_neutral_returns / 100)
648
+
649
+ # Convert to moneyness domain (x-domain)
650
+ simulated_moneyness = s / simulated_prices
651
+
652
+ # Calculate PDF with KDE
653
+ kde = stats.gaussian_kde(simulated_moneyness, bw_method=bandwidth)
654
+ pdf_m = kde(M)
655
+
656
+ # Normalize the PDF
657
+ pdf_m = pdf_m / np.trapz(pdf_m, M)
658
+
659
+ # Transform to other domains
660
+ pdf_lm = pdf_m * M
661
+ pdf_k = pdf_lm / K
662
+ pdf_r = pdf_lm / (1 + R)
663
+
664
+ # Calculate CDF
665
+ cdf = np.cumsum(pdf_lm * dx)
666
+ cdf = cdf / cdf[-1]
667
+
668
+ # Prepare model parameters for moments
669
+ avg_params = vol_model['avg_params']
670
+ param_names = vol_model['param_names']
671
+ model_params = {name.replace('[1]', ''): value for name, value in zip(param_names, avg_params)}
672
+ model_params['model_type'] = vol_model['model_type']
673
+ model_params['distribution'] = vol_model['distribution']
674
+
675
+ # Add persistence for GARCH models
676
+ if vol_model['model_type'] == 'garch':
677
+ model_params['persistence'] = model_params.get('alpha', 0) + model_params.get('beta', 0)
678
+
679
+ return {
680
+ 'log_moneyness': pdf_lm,
681
+ 'moneyness': pdf_m,
682
+ 'returns': pdf_r,
683
+ 'strikes': pdf_k,
684
+ 'cdf': cdf
685
+ }, model_params
686
+
687
+
688
+ @catch_exception
410
689
  def get_hd_surface(model_results: pd.DataFrame,
411
690
  df_hist: pd.DataFrame,
412
691
  domain_params: Tuple[float, float, int] = (-1.5, 1.5, 1000),
@@ -427,29 +706,26 @@ def get_hd_surface(model_results: pd.DataFrame,
427
706
  df_hist : pd.DataFrame
428
707
  DataFrame with historical price data
429
708
  domain_params : Tuple[float, float, int]
430
- Tuple of (min, max, num_points) for x-domain
709
+ (min_log_moneyness, max_log_moneyness, num_points)
431
710
  return_domain : str
432
- Domain for x-axis values ('log_moneyness', 'moneyness', 'returns', 'strikes')
711
+ Domain for results ('log_moneyness', 'moneyness', 'returns', 'strikes')
433
712
  method : str
434
- Method to use for HD estimation:
435
- - 'garch': GARCH(1,1) model
436
- - 'egarch': EGARCH(1,1,1) model with asymmetry
437
- - 'basic': Simple histogram/KDE of historical returns
713
+ Method for HD estimation ('garch', 'egarch', 'basic')
438
714
  distribution : str
439
- Distribution to use for volatility models ('normal', 'studentst', or 'skewstudent')
715
+ Distribution for volatility models ('normal', 'studentst', 'skewstudent')
440
716
  window_length : str
441
- Length of sliding windows as string (e.g., '30d')
717
+ Length of sliding windows for model fitting (e.g., '30d')
442
718
  n_fits : int
443
- Number of sliding windows for volatility model fitting
719
+ Number of sliding windows for model fitting
444
720
  simulations : int
445
- Number of Monte Carlo simulations for volatility models
721
+ Number of Monte Carlo simulations
446
722
  bandwidth : str
447
- KDE bandwidth method (default: 'silverman')
723
+ KDE bandwidth method
448
724
 
449
725
  Returns:
450
726
  --------
451
727
  Dict[str, Any]
452
- Dictionary containing pdf_surface, cdf_surface, x_surface, and moments
728
+ Dictionary with pdf_surface, cdf_surface, x_surface, and moments
453
729
  """
454
730
  # Validate inputs
455
731
  required_columns = ['s', 't', 'r']
@@ -460,11 +736,11 @@ def get_hd_surface(model_results: pd.DataFrame,
460
736
  if len(df_hist) < 2:
461
737
  raise VolyError("Not enough data points in df_hist")
462
738
 
463
- # Determine granularity from df_hist
739
+ # Determine granularity from data
464
740
  minutes_diff = (df_hist.index[1] - df_hist.index[0]).total_seconds() / 60
465
741
  minutes_per_period = max(1, int(minutes_diff))
466
742
 
467
- # Validate method and model parameters
743
+ # Validate method and distribution
468
744
  valid_methods = ['garch', 'egarch', 'basic']
469
745
  valid_distributions = ['normal', 'studentst', 'skewstudent']
470
746
 
@@ -477,16 +753,20 @@ def get_hd_surface(model_results: pd.DataFrame,
477
753
  if method in ['garch', 'egarch'] and distribution not in valid_distributions:
478
754
  raise VolyError(f"Invalid distribution: {distribution}. Must be one of {valid_distributions}")
479
755
 
480
- # Calculate log returns from price history
756
+ # Validate return domain
757
+ valid_domains = ['log_moneyness', 'moneyness', 'returns', 'strikes']
758
+ if return_domain not in valid_domains:
759
+ raise VolyError(f"Invalid return_domain: {return_domain}. Must be one of {valid_domains}")
760
+
761
+ # Calculate log returns
481
762
  log_returns = np.log(df_hist['close'] / df_hist['close'].shift(1)) * 100
482
763
  log_returns = log_returns.dropna().values
483
764
 
484
- # Fit volatility model if using GARCH or EGARCH
765
+ # Fit volatility model if needed
485
766
  vol_model = None
486
767
  if method in ['garch', 'egarch']:
487
- model_type = method # Use method as model_type
488
- logger.info(
489
- f"Using {model_type.upper()} with {distribution} distribution, {n_fits} fits, {simulations} simulations")
768
+ model_type = method
769
+ logger.info(f"Using {model_type.upper()} with {distribution} distribution")
490
770
 
491
771
  vol_model = fit_volatility_model(
492
772
  log_returns=log_returns,
@@ -496,8 +776,6 @@ def get_hd_surface(model_results: pd.DataFrame,
496
776
  window_length=window_length,
497
777
  n_fits=n_fits
498
778
  )
499
- elif method == 'basic':
500
- logger.info(f"Using basic returns-based KDE method with bandwidth {bandwidth}")
501
779
 
502
780
  # Initialize result containers
503
781
  pdf_surface = {}
@@ -507,155 +785,75 @@ def get_hd_surface(model_results: pd.DataFrame,
507
785
 
508
786
  # Process each maturity
509
787
  for i in model_results.index:
510
- # Get parameters for this maturity
511
- s = model_results.loc[i, 's'] # Current spot price
512
- r = model_results.loc[i, 'r'] # Risk-free rate
513
- t = model_results.loc[i, 't'] # Time to maturity in years
514
-
515
- # Get domain grids
516
- LM = np.linspace(domain_params[0], domain_params[1], domain_params[2])
517
- M = np.exp(LM) # Moneyness
518
- R = M - 1 # Returns
519
- K = s / M # Strike prices
520
-
521
- # For time scaling calculations
522
- tau_days_float = t * 365.25 # Exact number of days
523
- n_periods = max(1, int(t * 365.25 * 24 * 60 / minutes_per_period))
524
-
525
- logger.info(f"Processing HD for maturity {i} (t={t:.4f} years, {tau_days_float:.2f} days)")
526
-
527
- if method == 'basic':
528
- # Simple returns-based method
529
- # Filter historical data for this maturity's lookback period
530
- start_date = pd.Timestamp.now() - pd.Timedelta(days=int(t * 365.25))
531
- maturity_hist = df_hist[df_hist.index >= start_date].copy()
532
-
533
- if len(maturity_hist) < 10:
534
- logger.warning(f"Not enough historical data for maturity {i}, skipping.")
535
- continue
536
-
537
- # Calculate scaled returns
538
- maturity_hist['log_returns'] = np.log(maturity_hist['close'] / maturity_hist['close'].shift(1)) * np.sqrt(
539
- n_periods)
540
- maturity_hist = maturity_hist.dropna()
541
-
542
- returns = maturity_hist['log_returns'].values
543
- if len(returns) < 2:
544
- logger.warning(f"Not enough valid returns for maturity {i}, skipping.")
545
- continue
546
-
547
- # Girsanov adjustment to shift to risk-neutral measure
548
- mu_scaled = returns.mean()
549
- sigma_scaled = returns.std()
550
- expected_risk_neutral_mean = (r - 0.5 * sigma_scaled ** 2) * np.sqrt(t)
551
- adjustment = mu_scaled - expected_risk_neutral_mean
552
- adj_returns = returns - adjustment
553
-
554
- # Create HD and normalize
555
- f = stats.gaussian_kde(adj_returns, bw_method=bandwidth)
556
- pdf_values = f(LM)
557
-
558
- # Transform according to return domain
559
- pdf_lm = pdf_values
560
- pdf_m = pdf_lm / M
561
- pdf_k = pdf_lm / K
562
- pdf_r = pdf_lm / (1 + R)
563
-
564
- # No model parameters to include
565
- model_params = None
566
-
567
- elif method in ['garch', 'egarch']:
568
- # Volatility model-based method
569
- if vol_model is None:
570
- logger.warning(f"Volatility model fitting failed, skipping maturity {i}")
571
- continue
572
-
573
- # Simulate paths with the volatility model
574
- horizon = max(1, int(tau_days_float))
575
- simulated_returns, simulated_mu = simulate_volatility_paths(
576
- vol_model,
577
- horizon,
578
- simulations
579
- )
580
-
581
- # Scale the simulated returns to match target time horizon
582
- scaling_factor = np.sqrt(n_periods / tau_days_float)
583
- scaled_returns = simulated_returns * scaling_factor
584
-
585
- # Risk-neutral adjustment
586
- mu_scaled = scaled_returns.mean()
587
- sigma_scaled = scaled_returns.std()
588
- expected_risk_neutral_mean = (r - 0.5 * (sigma_scaled / 100) ** 2) * 100 * np.sqrt(t)
589
- adjustment = mu_scaled - expected_risk_neutral_mean
590
- risk_neutral_returns = scaled_returns - adjustment
591
-
592
- # Convert to terminal prices
593
- simulated_prices = s * np.exp(risk_neutral_returns / 100)
594
-
595
- # Convert to moneyness domain (x-domain)
596
- simulated_moneyness = s / simulated_prices
597
-
598
- # Perform KDE to get PDF
599
- kde = stats.gaussian_kde(simulated_moneyness, bw_method=bandwidth)
600
- pdf_values = kde(M)
601
-
602
- # Transform according to return domain
603
- pdf_m = pdf_values
604
- pdf_lm = pdf_m * M
605
- pdf_k = pdf_lm / K
606
- pdf_r = pdf_lm / (1 + R)
607
-
608
- # Include volatility model params in moments
609
- avg_params = vol_model['avg_params']
610
- param_names = vol_model['param_names']
611
- model_params = {name.replace('[1]', ''): value for name, value in zip(param_names, avg_params)}
612
- model_params['model_type'] = method
613
- model_params['distribution'] = distribution
614
-
615
- # Add persistence for GARCH models
616
- if method == 'garch':
617
- model_params['persistence'] = model_params.get('alpha', 0) + model_params.get('beta', 0)
618
- else:
619
- continue # Skip if invalid method
620
-
621
- # Ensure density integrates to 1
622
- dx = LM[1] - LM[0]
623
- total_area = np.sum(pdf_values * dx)
624
- if total_area <= 0:
625
- logger.warning(f"Invalid density (area <= 0) for maturity {i}, skipping.")
626
- continue
627
-
628
- pdf_values = pdf_values / total_area
629
-
630
- # Calculate CDF
631
- cdf = np.cumsum(pdf_lm * dx)
632
- cdf = np.minimum(cdf / cdf[-1], 1.0) # Ensure CDF is between 0 and 1
633
-
634
- # Select appropriate domain and calculate moments
635
- if return_domain == 'log_moneyness':
636
- x = LM
637
- pdf = pdf_lm
638
- moments = get_all_moments(x, pdf, model_params)
639
- elif return_domain == 'moneyness':
640
- x = M
641
- pdf = pdf_m
642
- moments = get_all_moments(x, pdf, model_params)
643
- elif return_domain == 'returns':
644
- x = R
645
- pdf = pdf_r
646
- moments = get_all_moments(x, pdf, model_params)
647
- elif return_domain == 'strikes':
648
- x = K
649
- pdf = pdf_k
788
+ try:
789
+ # Get parameters for this maturity
790
+ s = model_results.loc[i, 's'] # Spot price
791
+ r = model_results.loc[i, 'r'] # Risk-free rate
792
+ t = model_results.loc[i, 't'] # Time to maturity in years
793
+
794
+ # Calculate time scaling parameters
795
+ tau_days = t * 365.25 # Days to expiry
796
+ n_periods = max(1, int(tau_days * 24 * 60 / minutes_per_period)) # Number of periods
797
+
798
+ logger.info(f"Processing HD for maturity {i} (t={t:.4f} years, {tau_days:.2f} days)")
799
+
800
+ # Prepare domains
801
+ domains = prepare_domains(domain_params, s, return_domain)
802
+
803
+ # Calculate density based on method
804
+ if method == 'basic':
805
+ pdfs = calculate_basic_density(
806
+ df_hist=df_hist,
807
+ t=t,
808
+ r=r,
809
+ n_periods=n_periods,
810
+ domains=domains,
811
+ bandwidth=bandwidth
812
+ )
813
+ model_params = None
814
+
815
+ else: # 'garch' or 'egarch'
816
+ if vol_model is None:
817
+ logger.warning(f"Volatility model fitting failed, skipping maturity {i}")
818
+ continue
819
+
820
+ pdfs, model_params = calculate_volatility_density(
821
+ vol_model=vol_model,
822
+ s=s,
823
+ t=t,
824
+ r=r,
825
+ n_periods=n_periods,
826
+ tau_days=tau_days,
827
+ domains=domains,
828
+ simulations=simulations,
829
+ bandwidth=bandwidth
830
+ )
831
+
832
+ # Get domain arrays for output
833
+ if return_domain == 'log_moneyness':
834
+ x = domains['log_moneyness']
835
+ pdf = pdfs['log_moneyness']
836
+ elif return_domain == 'moneyness':
837
+ x = domains['moneyness']
838
+ pdf = pdfs['moneyness']
839
+ elif return_domain == 'returns':
840
+ x = domains['returns']
841
+ pdf = pdfs['returns']
842
+ elif return_domain == 'strikes':
843
+ x = domains['strikes']
844
+ pdf = pdfs['strikes']
845
+
846
+ # Calculate statistical moments
650
847
  moments = get_all_moments(x, pdf, model_params)
651
- else:
652
- raise VolyError(f"Unsupported return_domain: {return_domain}")
653
848
 
654
- # Store results
655
- pdf_surface[i] = pdf
656
- cdf_surface[i] = cdf
657
- x_surface[i] = x
658
- all_moments[i] = moments
849
+ # Store results
850
+ pdf_surface[i] = pdf
851
+ cdf_surface[i] = pdfs['cdf']
852
+ x_surface[i] = x
853
+ all_moments[i] = moments
854
+
855
+ except Exception as e:
856
+ logger.warning(f"Failed to calculate HD for maturity {i}: {str(e)}")
659
857
 
660
858
  # Check if we have any valid results
661
859
  if not pdf_surface: