voly 0.0.146__py3-none-any.whl → 0.0.147__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
voly/core/hd.py CHANGED
@@ -8,7 +8,7 @@ import pandas as pd
8
8
  import numpy as np
9
9
  import datetime as dt
10
10
  from scipy import stats
11
- from typing import Dict, List, Tuple, Optional, Union, Any
11
+ from typing import Dict, List, Tuple, Optional, Union, Any, Callable
12
12
  from voly.utils.logger import logger, catch_exception
13
13
  from voly.exceptions import VolyError
14
14
  from voly.core.rnd import get_all_moments
@@ -27,20 +27,25 @@ def get_historical_data(currency: str,
27
27
  Fetch historical OHLCV data for a cryptocurrency.
28
28
 
29
29
  Parameters:
30
- ----------
30
+ -----------
31
31
  currency : str
32
- The cryptocurrency to fetch data for (e.g., 'BTC', 'ETH').
32
+ The cryptocurrency to fetch data for (e.g., 'BTC', 'ETH')
33
33
  lookback_days : str
34
34
  The lookback period in days, formatted as '90d', '30d', etc.
35
35
  granularity : str
36
- The time interval for data points (e.g., '15m', '1h', '1d').
36
+ The time interval for data points (e.g., '15m', '1h', '1d')
37
37
  exchange_name : str
38
- The exchange to fetch data from (default: 'binance').
38
+ The exchange to fetch data from (default: 'binance')
39
39
 
40
40
  Returns:
41
- -------
42
- pd.DataFrame: Historical price data with OHLCV columns.
41
+ --------
42
+ pd.DataFrame
43
+ Historical price data with OHLCV columns and datetime index
43
44
  """
45
+ # Validate inputs
46
+ if not lookback_days.endswith('d'):
47
+ raise VolyError("lookback_days should be in format '90d', '30d', etc.")
48
+
44
49
  try:
45
50
  # Get the exchange class from ccxt
46
51
  exchange_class = getattr(ccxt, exchange_name.lower())
@@ -49,30 +54,40 @@ def get_historical_data(currency: str,
49
54
  raise VolyError(f"Exchange '{exchange_name}' not found in ccxt. Please check the exchange name.")
50
55
 
51
56
  # Form the trading pair symbol
52
- symbol = currency + '/USDT'
57
+ symbol = f"{currency}/USDT"
53
58
 
54
59
  # Convert lookback_days to timestamp
55
- if lookback_days.endswith('d'):
56
- days_ago = int(lookback_days[:-1])
57
- date_start = (dt.datetime.now() - dt.timedelta(days=days_ago)).strftime('%Y-%m-%d %H:%M:%S')
58
- else:
59
- raise VolyError("lookback_days should be in format '90d', '30d', etc.")
60
-
60
+ days_ago = int(lookback_days[:-1])
61
+ date_start = (dt.datetime.now() - dt.timedelta(days=days_ago)).strftime('%Y-%m-%d %H:%M:%S')
61
62
  from_ts = exchange.parse8601(date_start)
62
- ohlcv_list = []
63
- ohlcv = exchange.fetch_ohlcv(symbol, granularity, since=from_ts, limit=1000)
64
- ohlcv_list.append(ohlcv)
65
-
66
- # Fetch all available data within the lookback period
67
- while len(ohlcv) == 1000:
68
- from_ts = ohlcv[-1][0]
69
- new_ohlcv = exchange.fetch_ohlcv(symbol, granularity, since=from_ts, limit=1000)
70
- if len(new_ohlcv) <= 1:
63
+
64
+ # Fetch data with pagination
65
+ ohlcv = []
66
+ last_ts = from_ts
67
+
68
+ logger.info(f"Fetching {currency} historical data from {exchange_name} for past {days_ago} days")
69
+
70
+ while True:
71
+ batch = exchange.fetch_ohlcv(symbol, granularity, since=last_ts, limit=1000)
72
+
73
+ if not batch or len(batch) == 0:
71
74
  break
72
- ohlcv.extend(new_ohlcv[1:]) # Skip first element to avoid duplication
73
- if len(new_ohlcv) < 1000:
75
+
76
+ if len(ohlcv) > 0 and batch[0][0] == ohlcv[-1][0]:
77
+ # Skip first element if it's a duplicate of the last from previous batch
78
+ batch = batch[1:]
79
+
80
+ if not batch:
81
+ break
82
+
83
+ ohlcv.extend(batch)
84
+ last_ts = batch[-1][0]
85
+
86
+ if len(batch) < 1000:
74
87
  break
75
88
 
89
+ logger.debug(f"Fetched {len(batch)} candles, total now: {len(ohlcv)}")
90
+
76
91
  # Convert to DataFrame
77
92
  df_hist = pd.DataFrame(ohlcv, columns=['date', 'open', 'high', 'low', 'close', 'volume'])
78
93
  df_hist['date'] = pd.to_datetime(df_hist['date'], unit='ms')
@@ -87,36 +102,76 @@ def get_historical_data(currency: str,
87
102
  @catch_exception
88
103
  def parse_window_length(window_length: str, df_hist: pd.DataFrame) -> int:
89
104
  """
90
- Parse window length from string format (e.g., '7d', '30d') to number of data points.
105
+ Convert window length string (e.g., '30d') to number of data points.
91
106
 
92
107
  Parameters:
93
108
  -----------
94
109
  window_length : str
95
110
  Window length in days, formatted as '7d', '30d', etc.
96
111
  df_hist : pd.DataFrame
97
- Historical data DataFrame with datetime index.
112
+ Historical data DataFrame with datetime index
98
113
 
99
114
  Returns:
100
115
  --------
101
116
  int
102
- Number of data points corresponding to the window length.
117
+ Number of data points corresponding to the window length
103
118
  """
119
+ # Validate inputs
104
120
  if not isinstance(window_length, str) or not window_length.endswith('d'):
105
121
  raise VolyError("window_length should be in format '7d', '30d', etc.")
106
122
 
123
+ if len(df_hist) < 2:
124
+ raise VolyError("Historical data must contain at least 2 points to calculate granularity")
125
+
107
126
  # Extract number of days
108
127
  days = int(window_length[:-1])
109
128
 
110
- # Calculate time delta between consecutive data points
111
- if len(df_hist) > 1:
112
- avg_delta = (df_hist.index[-1] - df_hist.index[0]) / (len(df_hist) - 1)
113
- # Convert to days and get points per day
114
- days_per_point = avg_delta.total_seconds() / (24 * 60 * 60)
115
- # Calculate number of points for the window
116
- n_points = int(days / days_per_point)
117
- return max(n_points, 10) # Ensure at least 10 points
118
- else:
119
- raise VolyError("Not enough data points in df_hist to calculate granularity.")
129
+ # Calculate average time delta between data points
130
+ avg_delta = (df_hist.index[-1] - df_hist.index[0]).total_seconds() / (len(df_hist) - 1)
131
+
132
+ # Convert to days and calculate points per window
133
+ days_per_point = avg_delta / (24 * 60 * 60)
134
+ n_points = int(days / days_per_point)
135
+
136
+ # Ensure minimum number of points
137
+ return max(n_points, 10)
138
+
139
+
140
+ def get_param_names(model_type: str, distribution: str) -> List[str]:
141
+ """
142
+ Get parameter names for a volatility model and distribution.
143
+
144
+ Parameters:
145
+ -----------
146
+ model_type : str
147
+ Type of volatility model ('garch' or 'egarch')
148
+ distribution : str
149
+ Distribution type ('normal', 'studentst', or 'skewstudent')
150
+
151
+ Returns:
152
+ --------
153
+ List[str]
154
+ List of parameter names
155
+ """
156
+ # GARCH(1,1) parameters
157
+ if model_type.lower() == 'garch':
158
+ if distribution.lower() == 'normal':
159
+ return ['mu', 'omega', 'alpha[1]', 'beta[1]']
160
+ elif distribution.lower() == 'studentst':
161
+ return ['mu', 'omega', 'alpha[1]', 'beta[1]', 'nu']
162
+ elif distribution.lower() == 'skewstudent':
163
+ return ['mu', 'omega', 'alpha[1]', 'beta[1]', 'nu', 'lambda']
164
+
165
+ # EGARCH(1,1,1) parameters
166
+ elif model_type.lower() == 'egarch':
167
+ if distribution.lower() == 'normal':
168
+ return ['mu', 'omega', 'alpha[1]', 'gamma[1]', 'beta[1]']
169
+ elif distribution.lower() == 'studentst':
170
+ return ['mu', 'omega', 'alpha[1]', 'gamma[1]', 'beta[1]', 'nu']
171
+ elif distribution.lower() == 'skewstudent':
172
+ return ['mu', 'omega', 'alpha[1]', 'gamma[1]', 'beta[1]', 'nu', 'lambda']
173
+
174
+ raise VolyError(f"Invalid model_type '{model_type}' or distribution '{distribution}'")
120
175
 
121
176
 
122
177
  @catch_exception
@@ -127,85 +182,90 @@ def fit_volatility_model(log_returns: np.ndarray,
127
182
  window_length: str = '30d',
128
183
  n_fits: int = 400) -> Dict[str, Any]:
129
184
  """
130
- Fit a volatility model (GARCH or EGARCH) to log returns.
185
+ Fit a volatility model (GARCH or EGARCH) to historical returns.
131
186
 
132
187
  Parameters:
133
188
  -----------
134
189
  log_returns : np.ndarray
135
- Array of log returns
190
+ Array of log returns (percent)
136
191
  df_hist : pd.DataFrame
137
- DataFrame with historical price data
192
+ Historical price data
138
193
  model_type : str
139
194
  Type of volatility model ('garch' or 'egarch')
140
195
  distribution : str
141
196
  Distribution type ('normal', 'studentst', or 'skewstudent')
142
197
  window_length : str
143
- Length of each window as a string (e.g., '30d')
198
+ Length of sliding window in days (e.g., '30d')
144
199
  n_fits : int
145
- Number of sliding windows
200
+ Number of sliding windows to fit
146
201
 
147
202
  Returns:
148
203
  --------
149
204
  Dict[str, Any]
150
- Dictionary with model parameters and processes
205
+ Dictionary with model parameters and fitting results
151
206
  """
152
207
  # Parse window length
153
208
  window_points = parse_window_length(window_length, df_hist)
154
209
 
210
+ # Validate data
155
211
  if len(log_returns) < window_points + n_fits:
156
212
  raise VolyError(f"Not enough data points. Need at least {window_points + n_fits}, got {len(log_returns)}")
157
213
 
158
- # Adjust window sizes if necessary to avoid over-fitting
214
+ # Adjust window sizes to avoid overfitting
159
215
  n_fits = min(n_fits, max(100, len(log_returns) // 3))
160
216
  window_points = min(window_points, max(20, len(log_returns) // 3))
161
217
 
162
- start = window_points + n_fits
163
- end = n_fits
218
+ # Calculate start and end indices for sliding windows
219
+ start_idx = window_points + n_fits
220
+ end_idx = n_fits
164
221
 
165
- # Different number of parameters based on model type and distribution
222
+ # Get parameter names for the model
166
223
  param_names = get_param_names(model_type, distribution)
167
224
  n_params = len(param_names)
168
225
 
226
+ # Initialize arrays for parameters and innovations
169
227
  parameters = np.zeros((n_fits, n_params))
170
228
  z_process = []
171
229
 
172
- logger.info(
173
- f"Fitting {model_type.upper()} model with {distribution} distribution using {n_fits} windows of {window_length}...")
230
+ logger.info(f"Fitting {model_type.upper()} model with {distribution} distribution "
231
+ f"using {n_fits} windows of {window_length}")
174
232
 
233
+ # Fit models with sliding windows
175
234
  for i in range(n_fits):
235
+ # Log progress
176
236
  if i % (n_fits // 10) == 0:
177
237
  logger.info(f"Fitting progress: {i}/{n_fits}")
178
238
 
179
- # Skip if we don't have enough data
180
- if end - i - 1 < 0 or start - i - 1 > len(log_returns):
239
+ # Check if we have enough data for this window
240
+ if end_idx - i - 1 < 0 or start_idx - i - 1 > len(log_returns):
181
241
  continue
182
242
 
183
- window = log_returns[end - i - 1:start - i - 1]
243
+ # Extract window data
244
+ window = log_returns[end_idx - i - 1:start_idx - i - 1]
184
245
 
185
- # Skip windows that are too small or have invalid data
246
+ # Skip invalid windows
186
247
  if len(window) < 10 or np.isnan(window).any() or np.isinf(window).any():
187
248
  continue
188
249
 
189
- # Mean-center the data to improve numerical stability
250
+ # Mean-center the data for numerical stability
190
251
  data = window - np.mean(window)
191
252
 
192
253
  try:
193
- # Configure model based on type and distribution
254
+ # Configure and fit model
194
255
  if model_type.lower() == 'garch':
195
256
  model = arch_model(data, vol='GARCH', p=1, q=1, dist=distribution.lower())
196
257
  else: # egarch
197
258
  model = arch_model(data, vol='EGARCH', p=1, o=1, q=1, dist=distribution.lower())
198
259
 
260
+ # Fit with optimization settings
199
261
  fit_result = model.fit(disp='off', options={'maxiter': 1000})
200
262
 
201
- # Extract parameters based on model type and distribution
263
+ # Extract parameters
202
264
  params_dict = fit_result.params.to_dict()
203
-
204
- # Extract parameter values in correct order
205
265
  param_values = [params_dict.get(param, 0) for param in param_names]
206
266
  parameters[i, :] = param_values
207
267
 
208
- # Get last innovation (standardized residual)
268
+ # Extract standardized residuals (innovations)
209
269
  residuals = fit_result.resid
210
270
  conditional_vol = fit_result.conditional_volatility
211
271
 
@@ -217,11 +277,11 @@ def fit_volatility_model(log_returns: np.ndarray,
217
277
  except Exception as e:
218
278
  logger.warning(f"Model fit failed for window {i}: {str(e)}")
219
279
 
220
- # Clean up any failed fits
280
+ # Check if we have enough successful fits
221
281
  if len(z_process) < n_fits / 2:
222
282
  raise VolyError(f"Too many model fits failed ({len(z_process)}/{n_fits}). Check your data.")
223
283
 
224
- # Filter out rows with zeros (failed fits)
284
+ # Remove failed fits
225
285
  valid_rows = ~np.all(parameters == 0, axis=1)
226
286
  parameters = parameters[valid_rows]
227
287
 
@@ -240,100 +300,95 @@ def fit_volatility_model(log_returns: np.ndarray,
240
300
  }
241
301
 
242
302
 
243
- def get_param_names(model_type: str, distribution: str) -> List[str]:
303
+ @catch_exception
304
+ def create_innovation_sampler(vol_model: Dict[str, Any]) -> Callable:
244
305
  """
245
- Get parameter names based on model type and distribution.
306
+ Create a function to sample innovations based on the volatility model.
246
307
 
247
308
  Parameters:
248
309
  -----------
249
- model_type : str
250
- Type of volatility model ('garch' or 'egarch')
251
- distribution : str
252
- Distribution type ('normal', 'studentst', or 'skewstudent')
310
+ vol_model : Dict[str, Any]
311
+ Volatility model information from fit_volatility_model()
253
312
 
254
313
  Returns:
255
314
  --------
256
- List[str]
257
- List of parameter names
315
+ Callable
316
+ Function that returns random innovations when called
258
317
  """
259
- if model_type.lower() == 'garch':
260
- if distribution.lower() == 'normal':
261
- return ['mu', 'omega', 'alpha[1]', 'beta[1]']
262
- elif distribution.lower() == 'studentst':
263
- return ['mu', 'omega', 'alpha[1]', 'beta[1]', 'nu']
264
- else: # skewstudent
265
- return ['mu', 'omega', 'alpha[1]', 'beta[1]', 'nu', 'lambda']
266
- else: # egarch
267
- if distribution.lower() == 'normal':
268
- return ['mu', 'omega', 'alpha[1]', 'gamma[1]', 'beta[1]']
269
- elif distribution.lower() == 'studentst':
270
- return ['mu', 'omega', 'alpha[1]', 'gamma[1]', 'beta[1]', 'nu']
271
- else: # skewstudent
272
- return ['mu', 'omega', 'alpha[1]', 'gamma[1]', 'beta[1]', 'nu', 'lambda']
318
+ distribution = vol_model['distribution']
319
+ z_process = vol_model['z_process']
320
+
321
+ if distribution.lower() == 'normal':
322
+ # Use standard normal for normal distribution
323
+ def sample_innovation(size=1):
324
+ return np.random.normal(0, 1, size=size)
325
+ else:
326
+ # Use KDE for non-normal distributions to capture empirical distribution
327
+ kde = stats.gaussian_kde(z_process, bw_method='silverman')
328
+ z_range = np.linspace(min(z_process), max(z_process), 1000)
329
+ z_prob = kde(z_range)
330
+ z_prob = z_prob / np.sum(z_prob)
331
+
332
+ def sample_innovation(size=1):
333
+ return np.random.choice(z_range, size=size, p=z_prob)
334
+
335
+ return sample_innovation
273
336
 
274
337
 
275
338
  @catch_exception
276
- def simulate_volatility_paths(vol_model: Dict[str, Any],
339
+ def generate_volatility_paths(vol_model: Dict[str, Any],
277
340
  horizon: int,
278
341
  simulations: int = 5000) -> Tuple[np.ndarray, float]:
279
342
  """
280
- Simulate future paths using a fitted volatility model.
343
+ Simulate future price paths using a fitted volatility model.
281
344
 
282
345
  Parameters:
283
346
  -----------
284
347
  vol_model : Dict[str, Any]
285
- Dict with volatility model parameters
348
+ Volatility model information from fit_volatility_model()
286
349
  horizon : int
287
- Number of steps to simulate
350
+ Number of time steps to simulate
288
351
  simulations : int
289
352
  Number of paths to simulate
290
353
 
291
354
  Returns:
292
355
  --------
293
356
  Tuple[np.ndarray, float]
294
- Simulated returns and drift
357
+ Array of simulated returns and the drift term
295
358
  """
359
+ # Extract model information
296
360
  parameters = vol_model['parameters']
297
- z_process = vol_model['z_process']
298
361
  model_type = vol_model['model_type']
299
362
  distribution = vol_model['distribution']
300
363
  param_names = vol_model['param_names']
301
364
 
302
- # Use mean parameters as starting point
365
+ # Get mean parameters
303
366
  pars = vol_model['avg_params'].copy()
304
367
  bounds = vol_model['std_params'].copy()
305
368
 
306
- # Create dictionary for easier parameter access
369
+ # Create parameter dictionary for easier access
307
370
  param_dict = {name: value for name, value in zip(param_names, pars)}
308
371
 
309
- # Log parameters in a structured way
372
+ # Log parameters
310
373
  param_str = ", ".join([f"{name}={param_dict.get(name, 0):.6f}" for name in param_names])
311
374
  logger.info(f"{model_type.upper()} parameters: {param_str}")
312
375
 
313
- # Create sampling function based on distribution
314
- if distribution.lower() == 'normal':
315
- # Use standard normal for normal distribution
316
- def sample_innovation(size=1):
317
- return np.random.normal(0, 1, size=size)
318
- else:
319
- # Use KDE for non-normal distributions to capture empirical distribution
320
- kde = stats.gaussian_kde(z_process, bw_method='silverman') # original code didnt have bw_method
321
- z_range = np.linspace(min(z_process), max(z_process), 1000)
322
- z_prob = kde(z_range)
323
- z_prob = z_prob / np.sum(z_prob)
376
+ # Create innovation sampler
377
+ sample_innovation = create_innovation_sampler(vol_model)
324
378
 
325
- def sample_innovation(size=1):
326
- return np.random.choice(z_range, size=size, p=z_prob)
327
-
328
- # Simulate paths
379
+ # Initialize results array
329
380
  simulated_returns = np.zeros(simulations)
330
381
  mu = param_dict.get('mu', 0)
331
382
 
383
+ logger.info(f"Simulating {simulations} paths for horizon {horizon}")
384
+
385
+ # Simulate paths
332
386
  for i in range(simulations):
387
+ # Log progress
333
388
  if (i + 1) % (simulations // 10) == 0:
334
389
  logger.info(f"Simulation progress: {i + 1}/{simulations}")
335
390
 
336
- # Optionally vary parameters between simulations
391
+ # Vary parameters periodically for robustness
337
392
  if (i + 1) % (simulations // 20) == 0:
338
393
  # Create parameter variations based on their estimated distribution
339
394
  sim_params = {}
@@ -356,25 +411,30 @@ def simulate_volatility_paths(vol_model: Dict[str, Any],
356
411
 
357
412
  # Initialize volatility based on model type
358
413
  if model_type.lower() == 'garch':
414
+ # Extract GARCH parameters
359
415
  omega = sim_params.get('omega', 0)
360
416
  alpha = sim_params.get('alpha[1]', 0)
361
417
  beta = sim_params.get('beta[1]', 0)
362
418
 
363
- # Initialize GARCH volatility (unconditional variance)
364
- sigma2 = omega / (1 - alpha - beta) if alpha + beta < 1 else omega / 0.99
419
+ # Initialize with unconditional variance
420
+ persistence = alpha + beta
421
+ sigma2 = omega / (1 - persistence) if persistence < 1 else omega / 0.99
422
+
365
423
  else: # egarch
424
+ # Extract EGARCH parameters
366
425
  omega = sim_params.get('omega', 0)
367
426
  beta = sim_params.get('beta[1]', 0)
368
427
 
369
- # Initialize EGARCH volatility
428
+ # Initialize log variance
370
429
  log_sigma2 = omega / (1 - beta) if beta < 1 else omega / 0.99
371
430
  sigma2 = np.exp(log_sigma2)
372
431
 
432
+ # Initialize return sum
373
433
  returns_sum = 0
374
434
 
375
- # Simulate path step by step
435
+ # Simulate path
376
436
  for _ in range(horizon):
377
- # Sample a random innovation
437
+ # Sample innovation
378
438
  z = sample_innovation()
379
439
 
380
440
  # Update returns and volatility based on model type
@@ -384,29 +444,265 @@ def simulate_volatility_paths(vol_model: Dict[str, Any],
384
444
  returns_sum += e + mu
385
445
 
386
446
  # Update GARCH volatility
387
- sigma2 = sim_params.get('omega', 0) + sim_params.get('alpha[1]', 0) * e ** 2 + sim_params.get('beta[1]',
388
- 0) * sigma2
447
+ sigma2 = (sim_params.get('omega', 0) +
448
+ sim_params.get('alpha[1]', 0) * e ** 2 +
449
+ sim_params.get('beta[1]', 0) * sigma2)
450
+
389
451
  else: # egarch
390
452
  # Calculate return
391
453
  e = z * np.sqrt(sigma2)
392
454
  returns_sum += e + mu
393
455
 
394
- # Update EGARCH volatility
395
- abs_z = abs(z)
456
+ # Extract EGARCH parameters
396
457
  gamma = sim_params.get('gamma[1]', 0)
397
458
  alpha = sim_params.get('alpha[1]', 0)
398
459
  beta = sim_params.get('beta[1]', 0)
399
460
  omega = sim_params.get('omega', 0)
400
461
 
401
- # EGARCH update equation
462
+ # Update EGARCH volatility
463
+ abs_z = abs(z)
402
464
  log_sigma2 = omega + beta * log_sigma2 + alpha * (abs_z - np.sqrt(2 / np.pi)) + gamma * z
403
465
  sigma2 = np.exp(log_sigma2)
404
466
 
467
+ # Store final return
405
468
  simulated_returns[i] = returns_sum
406
469
 
407
470
  return simulated_returns, mu * horizon
408
471
 
409
472
 
473
+ @catch_exception
474
+ def prepare_domains(domain_params: Tuple[float, float, int],
475
+ s: float,
476
+ return_domain: str) -> Dict[str, np.ndarray]:
477
+ """
478
+ Prepare domain arrays for different representations.
479
+
480
+ Parameters:
481
+ -----------
482
+ domain_params : Tuple[float, float, int]
483
+ (min_log_moneyness, max_log_moneyness, num_points)
484
+ s : float
485
+ Spot price
486
+ return_domain : str
487
+ Domain for results
488
+
489
+ Returns:
490
+ --------
491
+ Dict[str, np.ndarray]
492
+ Dictionary of domain arrays
493
+ """
494
+ # Create log-moneyness grid
495
+ LM = np.linspace(domain_params[0], domain_params[1], domain_params[2])
496
+
497
+ # Calculate other domains
498
+ M = np.exp(LM) # Moneyness
499
+ R = M - 1 # Returns
500
+ K = s / M # Strike prices
501
+
502
+ # Calculate grid spacing
503
+ dx = LM[1] - LM[0]
504
+
505
+ return {
506
+ 'log_moneyness': LM,
507
+ 'moneyness': M,
508
+ 'returns': R,
509
+ 'strikes': K,
510
+ 'dx': dx
511
+ }
512
+
513
+
514
+ @catch_exception
515
+ def calculate_basic_density(df_hist: pd.DataFrame,
516
+ t: float,
517
+ r: float,
518
+ n_periods: int,
519
+ domains: Dict[str, np.ndarray],
520
+ bandwidth: str = 'silverman') -> Dict[str, np.ndarray]:
521
+ """
522
+ Calculate historical density using KDE of historical returns.
523
+
524
+ Parameters:
525
+ -----------
526
+ df_hist : pd.DataFrame
527
+ Historical price data
528
+ t : float
529
+ Time to maturity in years
530
+ r : float
531
+ Risk-free rate
532
+ n_periods : int
533
+ Number of periods to scale returns
534
+ domains : Dict[str, np.ndarray]
535
+ Domain arrays
536
+ bandwidth : str
537
+ KDE bandwidth method
538
+
539
+ Returns:
540
+ --------
541
+ Dict[str, np.ndarray]
542
+ Dictionary of PDFs in different domains
543
+ """
544
+ # Extract domains
545
+ LM = domains['log_moneyness']
546
+ M = domains['moneyness']
547
+ R = domains['returns']
548
+ K = domains['strikes']
549
+ dx = domains['dx']
550
+
551
+ # Filter historical data for the maturity's lookback period
552
+ start_date = pd.Timestamp.now() - pd.Timedelta(days=int(t * 365.25))
553
+ maturity_hist = df_hist[df_hist.index >= start_date].copy()
554
+
555
+ if len(maturity_hist) < 10:
556
+ raise VolyError(f"Not enough historical data for maturity (t={t:.4f})")
557
+
558
+ # Calculate scaled returns
559
+ maturity_hist['log_returns'] = np.log(maturity_hist['close'] / maturity_hist['close'].shift(1)) * np.sqrt(n_periods)
560
+ maturity_hist = maturity_hist.dropna()
561
+ returns = maturity_hist['log_returns'].values
562
+
563
+ if len(returns) < 2:
564
+ raise VolyError(f"Not enough valid returns for maturity (t={t:.4f})")
565
+
566
+ # Girsanov adjustment to shift to risk-neutral measure
567
+ mu_scaled = returns.mean()
568
+ sigma_scaled = returns.std()
569
+ expected_risk_neutral_mean = (r - 0.5 * sigma_scaled ** 2) * np.sqrt(t)
570
+ adjustment = mu_scaled - expected_risk_neutral_mean
571
+ adj_returns = returns - adjustment
572
+
573
+ # Create PDF with KDE
574
+ kde = stats.gaussian_kde(adj_returns, bw_method=bandwidth)
575
+ pdf_lm = kde(LM)
576
+
577
+ # Normalize the PDF
578
+ pdf_lm = pdf_lm / np.trapz(pdf_lm, LM)
579
+
580
+ # Transform to other domains
581
+ pdf_m = pdf_lm / M
582
+ pdf_k = pdf_lm / K
583
+ pdf_r = pdf_lm / (1 + R)
584
+
585
+ # Calculate CDF
586
+ cdf = np.cumsum(pdf_lm * dx)
587
+ cdf = cdf / cdf[-1]
588
+
589
+ return {
590
+ 'log_moneyness': pdf_lm,
591
+ 'moneyness': pdf_m,
592
+ 'returns': pdf_r,
593
+ 'strikes': pdf_k,
594
+ 'cdf': cdf
595
+ }
596
+
597
+
598
+ @catch_exception
599
+ def calculate_volatility_density(vol_model: Dict[str, Any],
600
+ s: float,
601
+ t: float,
602
+ r: float,
603
+ n_periods: int,
604
+ tau_days: float,
605
+ domains: Dict[str, np.ndarray],
606
+ simulations: int = 5000,
607
+ bandwidth: str = 'silverman') -> Tuple[Dict[str, np.ndarray], Dict[str, Any]]:
608
+ """
609
+ Calculate historical density using volatility model simulation.
610
+
611
+ Parameters:
612
+ -----------
613
+ vol_model : Dict[str, Any]
614
+ Volatility model from fit_volatility_model()
615
+ s : float
616
+ Spot price
617
+ t : float
618
+ Time to maturity in years
619
+ r : float
620
+ Risk-free rate
621
+ n_periods : int
622
+ Number of periods to scale returns
623
+ tau_days : float
624
+ Days to maturity
625
+ domains : Dict[str, np.ndarray]
626
+ Domain arrays
627
+ simulations : int
628
+ Number of Monte Carlo simulations
629
+ bandwidth : str
630
+ KDE bandwidth method
631
+
632
+ Returns:
633
+ --------
634
+ Tuple[Dict[str, np.ndarray], Dict[str, Any]]
635
+ Dictionary of PDFs in different domains and model parameters
636
+ """
637
+ # Extract domains
638
+ LM = domains['log_moneyness']
639
+ M = domains['moneyness']
640
+ R = domains['returns']
641
+ K = domains['strikes']
642
+ dx = domains['dx']
643
+
644
+ # Simulate paths with the volatility model
645
+ horizon = max(1, int(tau_days))
646
+ simulated_returns, simulated_mu = generate_volatility_paths(
647
+ vol_model,
648
+ horizon,
649
+ simulations
650
+ )
651
+
652
+ # Scale the simulated returns to match target time horizon
653
+ scaling_factor = np.sqrt(n_periods / tau_days)
654
+ scaled_returns = simulated_returns * scaling_factor
655
+
656
+ # Risk-neutral adjustment
657
+ mu_scaled = scaled_returns.mean()
658
+ sigma_scaled = scaled_returns.std()
659
+ expected_risk_neutral_mean = (r - 0.5 * (sigma_scaled / 100) ** 2) * 100 * np.sqrt(t)
660
+ adjustment = mu_scaled - expected_risk_neutral_mean
661
+ risk_neutral_returns = scaled_returns - adjustment
662
+
663
+ # Convert to terminal prices
664
+ simulated_prices = s * np.exp(risk_neutral_returns / 100)
665
+
666
+ # Convert to moneyness domain (x-domain)
667
+ simulated_moneyness = s / simulated_prices
668
+
669
+ # Calculate PDF with KDE
670
+ kde = stats.gaussian_kde(simulated_moneyness, bw_method=bandwidth)
671
+ pdf_m = kde(M)
672
+
673
+ # Normalize the PDF
674
+ pdf_m = pdf_m / np.trapz(pdf_m, M)
675
+
676
+ # Transform to other domains
677
+ pdf_lm = pdf_m * M
678
+ pdf_k = pdf_lm / K
679
+ pdf_r = pdf_lm / (1 + R)
680
+
681
+ # Calculate CDF
682
+ cdf = np.cumsum(pdf_lm * dx)
683
+ cdf = cdf / cdf[-1]
684
+
685
+ # Prepare model parameters for moments
686
+ avg_params = vol_model['avg_params']
687
+ param_names = vol_model['param_names']
688
+ model_params = {name.replace('[1]', ''): value for name, value in zip(param_names, avg_params)}
689
+ model_params['model_type'] = vol_model['model_type']
690
+ model_params['distribution'] = vol_model['distribution']
691
+
692
+ # Add persistence for GARCH models
693
+ if vol_model['model_type'] == 'garch':
694
+ model_params['persistence'] = model_params.get('alpha', 0) + model_params.get('beta', 0)
695
+
696
+ return {
697
+ 'log_moneyness': pdf_lm,
698
+ 'moneyness': pdf_m,
699
+ 'returns': pdf_r,
700
+ 'strikes': pdf_k,
701
+ 'cdf': cdf
702
+ }, model_params
703
+
704
+
705
+ @catch_exception
410
706
  def get_hd_surface(model_results: pd.DataFrame,
411
707
  df_hist: pd.DataFrame,
412
708
  domain_params: Tuple[float, float, int] = (-1.5, 1.5, 1000),
@@ -427,29 +723,26 @@ def get_hd_surface(model_results: pd.DataFrame,
427
723
  df_hist : pd.DataFrame
428
724
  DataFrame with historical price data
429
725
  domain_params : Tuple[float, float, int]
430
- Tuple of (min, max, num_points) for x-domain
726
+ (min_log_moneyness, max_log_moneyness, num_points)
431
727
  return_domain : str
432
- Domain for x-axis values ('log_moneyness', 'moneyness', 'returns', 'strikes')
728
+ Domain for results ('log_moneyness', 'moneyness', 'returns', 'strikes')
433
729
  method : str
434
- Method to use for HD estimation:
435
- - 'garch': GARCH(1,1) model
436
- - 'egarch': EGARCH(1,1,1) model with asymmetry
437
- - 'basic': Simple histogram/KDE of historical returns
730
+ Method for HD estimation ('garch', 'egarch', 'basic')
438
731
  distribution : str
439
- Distribution to use for volatility models ('normal', 'studentst', or 'skewstudent')
732
+ Distribution for volatility models ('normal', 'studentst', 'skewstudent')
440
733
  window_length : str
441
- Length of sliding windows as string (e.g., '30d')
734
+ Length of sliding windows for model fitting (e.g., '30d')
442
735
  n_fits : int
443
- Number of sliding windows for volatility model fitting
736
+ Number of sliding windows for model fitting
444
737
  simulations : int
445
- Number of Monte Carlo simulations for volatility models
738
+ Number of Monte Carlo simulations
446
739
  bandwidth : str
447
- KDE bandwidth method (default: 'silverman')
740
+ KDE bandwidth method
448
741
 
449
742
  Returns:
450
743
  --------
451
744
  Dict[str, Any]
452
- Dictionary containing pdf_surface, cdf_surface, x_surface, and moments
745
+ Dictionary with pdf_surface, cdf_surface, x_surface, and moments
453
746
  """
454
747
  # Validate inputs
455
748
  required_columns = ['s', 't', 'r']
@@ -460,11 +753,11 @@ def get_hd_surface(model_results: pd.DataFrame,
460
753
  if len(df_hist) < 2:
461
754
  raise VolyError("Not enough data points in df_hist")
462
755
 
463
- # Determine granularity from df_hist
756
+ # Determine granularity from data
464
757
  minutes_diff = (df_hist.index[1] - df_hist.index[0]).total_seconds() / 60
465
758
  minutes_per_period = max(1, int(minutes_diff))
466
759
 
467
- # Validate method and model parameters
760
+ # Validate method and distribution
468
761
  valid_methods = ['garch', 'egarch', 'basic']
469
762
  valid_distributions = ['normal', 'studentst', 'skewstudent']
470
763
 
@@ -477,16 +770,20 @@ def get_hd_surface(model_results: pd.DataFrame,
477
770
  if method in ['garch', 'egarch'] and distribution not in valid_distributions:
478
771
  raise VolyError(f"Invalid distribution: {distribution}. Must be one of {valid_distributions}")
479
772
 
480
- # Calculate log returns from price history
773
+ # Validate return domain
774
+ valid_domains = ['log_moneyness', 'moneyness', 'returns', 'strikes']
775
+ if return_domain not in valid_domains:
776
+ raise VolyError(f"Invalid return_domain: {return_domain}. Must be one of {valid_domains}")
777
+
778
+ # Calculate log returns
481
779
  log_returns = np.log(df_hist['close'] / df_hist['close'].shift(1)) * 100
482
780
  log_returns = log_returns.dropna().values
483
781
 
484
- # Fit volatility model if using GARCH or EGARCH
782
+ # Fit volatility model if needed
485
783
  vol_model = None
486
784
  if method in ['garch', 'egarch']:
487
- model_type = method # Use method as model_type
488
- logger.info(
489
- f"Using {model_type.upper()} with {distribution} distribution, {n_fits} fits, {simulations} simulations")
785
+ model_type = method
786
+ logger.info(f"Using {model_type.upper()} with {distribution} distribution")
490
787
 
491
788
  vol_model = fit_volatility_model(
492
789
  log_returns=log_returns,
@@ -496,8 +793,6 @@ def get_hd_surface(model_results: pd.DataFrame,
496
793
  window_length=window_length,
497
794
  n_fits=n_fits
498
795
  )
499
- elif method == 'basic':
500
- logger.info(f"Using basic returns-based KDE method with bandwidth {bandwidth}")
501
796
 
502
797
  # Initialize result containers
503
798
  pdf_surface = {}
@@ -507,155 +802,75 @@ def get_hd_surface(model_results: pd.DataFrame,
507
802
 
508
803
  # Process each maturity
509
804
  for i in model_results.index:
510
- # Get parameters for this maturity
511
- s = model_results.loc[i, 's'] # Current spot price
512
- r = model_results.loc[i, 'r'] # Risk-free rate
513
- t = model_results.loc[i, 't'] # Time to maturity in years
514
-
515
- # Get domain grids
516
- LM = np.linspace(domain_params[0], domain_params[1], domain_params[2])
517
- M = np.exp(LM) # Moneyness
518
- R = M - 1 # Returns
519
- K = s / M # Strike prices
520
-
521
- # For time scaling calculations
522
- tau_days_float = t * 365.25 # Exact number of days
523
- n_periods = max(1, int(t * 365.25 * 24 * 60 / minutes_per_period))
524
-
525
- logger.info(f"Processing HD for maturity {i} (t={t:.4f} years, {tau_days_float:.2f} days)")
526
-
527
- if method == 'basic':
528
- # Simple returns-based method
529
- # Filter historical data for this maturity's lookback period
530
- start_date = pd.Timestamp.now() - pd.Timedelta(days=int(t * 365.25))
531
- maturity_hist = df_hist[df_hist.index >= start_date].copy()
532
-
533
- if len(maturity_hist) < 10:
534
- logger.warning(f"Not enough historical data for maturity {i}, skipping.")
535
- continue
536
-
537
- # Calculate scaled returns
538
- maturity_hist['log_returns'] = np.log(maturity_hist['close'] / maturity_hist['close'].shift(1)) * np.sqrt(
539
- n_periods)
540
- maturity_hist = maturity_hist.dropna()
541
-
542
- returns = maturity_hist['log_returns'].values
543
- if len(returns) < 2:
544
- logger.warning(f"Not enough valid returns for maturity {i}, skipping.")
545
- continue
546
-
547
- # Girsanov adjustment to shift to risk-neutral measure
548
- mu_scaled = returns.mean()
549
- sigma_scaled = returns.std()
550
- expected_risk_neutral_mean = (r - 0.5 * sigma_scaled ** 2) * np.sqrt(t)
551
- adjustment = mu_scaled - expected_risk_neutral_mean
552
- adj_returns = returns - adjustment
553
-
554
- # Create HD and normalize
555
- f = stats.gaussian_kde(adj_returns, bw_method=bandwidth)
556
- pdf_values = f(LM)
557
-
558
- # Transform according to return domain
559
- pdf_lm = pdf_values
560
- pdf_m = pdf_lm / M
561
- pdf_k = pdf_lm / K
562
- pdf_r = pdf_lm / (1 + R)
563
-
564
- # No model parameters to include
565
- model_params = None
566
-
567
- elif method in ['garch', 'egarch']:
568
- # Volatility model-based method
569
- if vol_model is None:
570
- logger.warning(f"Volatility model fitting failed, skipping maturity {i}")
571
- continue
572
-
573
- # Simulate paths with the volatility model
574
- horizon = max(1, int(tau_days_float))
575
- simulated_returns, simulated_mu = simulate_volatility_paths(
576
- vol_model,
577
- horizon,
578
- simulations
579
- )
580
-
581
- # Scale the simulated returns to match target time horizon
582
- scaling_factor = np.sqrt(n_periods / tau_days_float)
583
- scaled_returns = simulated_returns * scaling_factor
584
-
585
- # Risk-neutral adjustment
586
- mu_scaled = scaled_returns.mean()
587
- sigma_scaled = scaled_returns.std()
588
- expected_risk_neutral_mean = (r - 0.5 * (sigma_scaled / 100) ** 2) * 100 * np.sqrt(t)
589
- adjustment = mu_scaled - expected_risk_neutral_mean
590
- risk_neutral_returns = scaled_returns - adjustment
591
-
592
- # Convert to terminal prices
593
- simulated_prices = s * np.exp(risk_neutral_returns / 100)
594
-
595
- # Convert to moneyness domain (x-domain)
596
- simulated_moneyness = s / simulated_prices
597
-
598
- # Perform KDE to get PDF
599
- kde = stats.gaussian_kde(simulated_moneyness, bw_method=bandwidth)
600
- pdf_values = kde(M)
601
-
602
- # Transform according to return domain
603
- pdf_m = pdf_values
604
- pdf_lm = pdf_m * M
605
- pdf_k = pdf_lm / K
606
- pdf_r = pdf_lm / (1 + R)
607
-
608
- # Include volatility model params in moments
609
- avg_params = vol_model['avg_params']
610
- param_names = vol_model['param_names']
611
- model_params = {name.replace('[1]', ''): value for name, value in zip(param_names, avg_params)}
612
- model_params['model_type'] = method
613
- model_params['distribution'] = distribution
614
-
615
- # Add persistence for GARCH models
616
- if method == 'garch':
617
- model_params['persistence'] = model_params.get('alpha', 0) + model_params.get('beta', 0)
618
- else:
619
- continue # Skip if invalid method
620
-
621
- # Ensure density integrates to 1
622
- dx = LM[1] - LM[0]
623
- total_area = np.sum(pdf_values * dx)
624
- if total_area <= 0:
625
- logger.warning(f"Invalid density (area <= 0) for maturity {i}, skipping.")
626
- continue
627
-
628
- pdf_values = pdf_values / total_area
629
-
630
- # Calculate CDF
631
- cdf = np.cumsum(pdf_lm * dx)
632
- cdf = np.minimum(cdf / cdf[-1], 1.0) # Ensure CDF is between 0 and 1
633
-
634
- # Select appropriate domain and calculate moments
635
- if return_domain == 'log_moneyness':
636
- x = LM
637
- pdf = pdf_lm
638
- moments = get_all_moments(x, pdf, model_params)
639
- elif return_domain == 'moneyness':
640
- x = M
641
- pdf = pdf_m
642
- moments = get_all_moments(x, pdf, model_params)
643
- elif return_domain == 'returns':
644
- x = R
645
- pdf = pdf_r
646
- moments = get_all_moments(x, pdf, model_params)
647
- elif return_domain == 'strikes':
648
- x = K
649
- pdf = pdf_k
805
+ try:
806
+ # Get parameters for this maturity
807
+ s = model_results.loc[i, 's'] # Spot price
808
+ r = model_results.loc[i, 'r'] # Risk-free rate
809
+ t = model_results.loc[i, 't'] # Time to maturity in years
810
+
811
+ # Calculate time scaling parameters
812
+ tau_days = t * 365.25 # Days to expiry
813
+ n_periods = max(1, int(tau_days * 24 * 60 / minutes_per_period)) # Number of periods
814
+
815
+ logger.info(f"Processing HD for maturity {i} (t={t:.4f} years, {tau_days:.2f} days)")
816
+
817
+ # Prepare domains
818
+ domains = prepare_domains(domain_params, s, return_domain)
819
+
820
+ # Calculate density based on method
821
+ if method == 'basic':
822
+ pdfs = calculate_basic_density(
823
+ df_hist=df_hist,
824
+ t=t,
825
+ r=r,
826
+ n_periods=n_periods,
827
+ domains=domains,
828
+ bandwidth=bandwidth
829
+ )
830
+ model_params = None
831
+
832
+ else: # 'garch' or 'egarch'
833
+ if vol_model is None:
834
+ logger.warning(f"Volatility model fitting failed, skipping maturity {i}")
835
+ continue
836
+
837
+ pdfs, model_params = calculate_volatility_density(
838
+ vol_model=vol_model,
839
+ s=s,
840
+ t=t,
841
+ r=r,
842
+ n_periods=n_periods,
843
+ tau_days=tau_days,
844
+ domains=domains,
845
+ simulations=simulations,
846
+ bandwidth=bandwidth
847
+ )
848
+
849
+ # Get domain arrays for output
850
+ if return_domain == 'log_moneyness':
851
+ x = domains['log_moneyness']
852
+ pdf = pdfs['log_moneyness']
853
+ elif return_domain == 'moneyness':
854
+ x = domains['moneyness']
855
+ pdf = pdfs['moneyness']
856
+ elif return_domain == 'returns':
857
+ x = domains['returns']
858
+ pdf = pdfs['returns']
859
+ elif return_domain == 'strikes':
860
+ x = domains['strikes']
861
+ pdf = pdfs['strikes']
862
+
863
+ # Calculate statistical moments
650
864
  moments = get_all_moments(x, pdf, model_params)
651
- else:
652
- raise VolyError(f"Unsupported return_domain: {return_domain}")
653
865
 
654
- # Store results
655
- pdf_surface[i] = pdf
656
- cdf_surface[i] = cdf
657
- x_surface[i] = x
658
- all_moments[i] = moments
866
+ # Store results
867
+ pdf_surface[i] = pdf
868
+ cdf_surface[i] = pdfs['cdf']
869
+ x_surface[i] = x
870
+ all_moments[i] = moments
871
+
872
+ except Exception as e:
873
+ logger.warning(f"Failed to calculate HD for maturity {i}: {str(e)}")
659
874
 
660
875
  # Check if we have any valid results
661
876
  if not pdf_surface: