voly 0.0.145__py3-none-any.whl → 0.0.147__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
voly/core/hd.py CHANGED
@@ -8,7 +8,7 @@ import pandas as pd
8
8
  import numpy as np
9
9
  import datetime as dt
10
10
  from scipy import stats
11
- from typing import Dict, List, Tuple, Optional, Union, Any
11
+ from typing import Dict, List, Tuple, Optional, Union, Any, Callable
12
12
  from voly.utils.logger import logger, catch_exception
13
13
  from voly.exceptions import VolyError
14
14
  from voly.core.rnd import get_all_moments
@@ -16,29 +16,35 @@ from voly.formulas import iv, get_domain
16
16
  from voly.models import SVIModel
17
17
  from voly.core.fit import fit_model
18
18
  from arch import arch_model
19
- from arch.univariate import GARCH, EGARCH
20
19
 
21
20
 
22
21
  @catch_exception
23
- def get_historical_data(currency, lookback_days, granularity, exchange_name):
22
+ def get_historical_data(currency: str,
23
+ lookback_days: str,
24
+ granularity: str,
25
+ exchange_name: str) -> pd.DataFrame:
24
26
  """
25
27
  Fetch historical OHLCV data for a cryptocurrency.
26
28
 
27
29
  Parameters:
28
- ----------
30
+ -----------
29
31
  currency : str
30
- The cryptocurrency to fetch data for (e.g., 'BTC', 'ETH').
32
+ The cryptocurrency to fetch data for (e.g., 'BTC', 'ETH')
31
33
  lookback_days : str
32
34
  The lookback period in days, formatted as '90d', '30d', etc.
33
35
  granularity : str
34
- The time interval for data points (e.g., '15m', '1h', '1d').
36
+ The time interval for data points (e.g., '15m', '1h', '1d')
35
37
  exchange_name : str
36
- The exchange to fetch data from (default: 'binance').
38
+ The exchange to fetch data from (default: 'binance')
37
39
 
38
40
  Returns:
39
- -------
40
- df_hist : pandas.DataFrame containing the historical price data with OHLCV columns.
41
+ --------
42
+ pd.DataFrame
43
+ Historical price data with OHLCV columns and datetime index
41
44
  """
45
+ # Validate inputs
46
+ if not lookback_days.endswith('d'):
47
+ raise VolyError("lookback_days should be in format '90d', '30d', etc.")
42
48
 
43
49
  try:
44
50
  # Get the exchange class from ccxt
@@ -48,252 +54,277 @@ def get_historical_data(currency, lookback_days, granularity, exchange_name):
48
54
  raise VolyError(f"Exchange '{exchange_name}' not found in ccxt. Please check the exchange name.")
49
55
 
50
56
  # Form the trading pair symbol
51
- symbol = currency + '/USDT'
57
+ symbol = f"{currency}/USDT"
52
58
 
53
59
  # Convert lookback_days to timestamp
54
- if lookback_days.endswith('d'):
55
- days_ago = int(lookback_days[:-1])
56
- date_start = (dt.datetime.now() - dt.timedelta(days=days_ago)).strftime('%Y-%m-%d %H:%M:%S')
57
- else:
58
- raise VolyError("lookback_days should be in format '90d', '30d', etc.")
59
-
60
+ days_ago = int(lookback_days[:-1])
61
+ date_start = (dt.datetime.now() - dt.timedelta(days=days_ago)).strftime('%Y-%m-%d %H:%M:%S')
60
62
  from_ts = exchange.parse8601(date_start)
61
- ohlcv_list = []
62
- ohlcv = exchange.fetch_ohlcv(symbol, granularity, since=from_ts, limit=1000)
63
- ohlcv_list.append(ohlcv)
63
+
64
+ # Fetch data with pagination
65
+ ohlcv = []
66
+ last_ts = from_ts
67
+
68
+ logger.info(f"Fetching {currency} historical data from {exchange_name} for past {days_ago} days")
69
+
64
70
  while True:
65
- from_ts = ohlcv[-1][0]
66
- new_ohlcv = exchange.fetch_ohlcv(symbol, granularity, since=from_ts, limit=1000)
67
- ohlcv.extend(new_ohlcv)
68
- if len(new_ohlcv) != 1000:
71
+ batch = exchange.fetch_ohlcv(symbol, granularity, since=last_ts, limit=1000)
72
+
73
+ if not batch or len(batch) == 0:
69
74
  break
70
75
 
76
+ if len(ohlcv) > 0 and batch[0][0] == ohlcv[-1][0]:
77
+ # Skip first element if it's a duplicate of the last from previous batch
78
+ batch = batch[1:]
79
+
80
+ if not batch:
81
+ break
82
+
83
+ ohlcv.extend(batch)
84
+ last_ts = batch[-1][0]
85
+
86
+ if len(batch) < 1000:
87
+ break
88
+
89
+ logger.debug(f"Fetched {len(batch)} candles, total now: {len(ohlcv)}")
90
+
71
91
  # Convert to DataFrame
72
92
  df_hist = pd.DataFrame(ohlcv, columns=['date', 'open', 'high', 'low', 'close', 'volume'])
73
93
  df_hist['date'] = pd.to_datetime(df_hist['date'], unit='ms')
74
94
  df_hist.set_index('date', inplace=True)
75
95
  df_hist = df_hist.sort_index(ascending=True)
76
96
 
77
- print(f"Data fetched successfully: {len(df_hist)} rows from {df_hist.index[0]} to {df_hist.index[-1]}")
97
+ logger.info(f"Data fetched successfully: {len(df_hist)} rows from {df_hist.index[0]} to {df_hist.index[-1]}")
78
98
 
79
99
  return df_hist
80
100
 
81
101
 
82
102
  @catch_exception
83
- def parse_window_length(window_length, df_hist):
103
+ def parse_window_length(window_length: str, df_hist: pd.DataFrame) -> int:
84
104
  """
85
- Parse window length from string format (e.g., '7d', '30d') to number of data points.
105
+ Convert window length string (e.g., '30d') to number of data points.
86
106
 
87
107
  Parameters:
88
108
  -----------
89
109
  window_length : str
90
110
  Window length in days, formatted as '7d', '30d', etc.
91
111
  df_hist : pd.DataFrame
92
- Historical data DataFrame with datetime index.
112
+ Historical data DataFrame with datetime index
93
113
 
94
114
  Returns:
95
115
  --------
96
116
  int
97
- Number of data points corresponding to the window length.
117
+ Number of data points corresponding to the window length
98
118
  """
99
- if not window_length.endswith('d'):
119
+ # Validate inputs
120
+ if not isinstance(window_length, str) or not window_length.endswith('d'):
100
121
  raise VolyError("window_length should be in format '7d', '30d', etc.")
101
122
 
123
+ if len(df_hist) < 2:
124
+ raise VolyError("Historical data must contain at least 2 points to calculate granularity")
125
+
102
126
  # Extract number of days
103
127
  days = int(window_length[:-1])
104
128
 
105
- # Calculate time delta between consecutive data points
106
- if len(df_hist) > 1:
107
- avg_delta = (df_hist.index[-1] - df_hist.index[0]) / (len(df_hist) - 1)
108
- # Convert to days and get points per day
109
- days_per_point = avg_delta.total_seconds() / (24 * 60 * 60)
110
- # Calculate number of points for the window
111
- n_points = int(days / days_per_point)
112
- return max(n_points, 10) # Ensure at least 10 points
113
- else:
114
- raise VolyError("Not enough data points in df_hist to calculate granularity.")
129
+ # Calculate average time delta between data points
130
+ avg_delta = (df_hist.index[-1] - df_hist.index[0]).total_seconds() / (len(df_hist) - 1)
131
+
132
+ # Convert to days and calculate points per window
133
+ days_per_point = avg_delta / (24 * 60 * 60)
134
+ n_points = int(days / days_per_point)
135
+
136
+ # Ensure minimum number of points
137
+ return max(n_points, 10)
138
+
139
+
140
+ def get_param_names(model_type: str, distribution: str) -> List[str]:
141
+ """
142
+ Get parameter names for a volatility model and distribution.
143
+
144
+ Parameters:
145
+ -----------
146
+ model_type : str
147
+ Type of volatility model ('garch' or 'egarch')
148
+ distribution : str
149
+ Distribution type ('normal', 'studentst', or 'skewstudent')
150
+
151
+ Returns:
152
+ --------
153
+ List[str]
154
+ List of parameter names
155
+ """
156
+ # GARCH(1,1) parameters
157
+ if model_type.lower() == 'garch':
158
+ if distribution.lower() == 'normal':
159
+ return ['mu', 'omega', 'alpha[1]', 'beta[1]']
160
+ elif distribution.lower() == 'studentst':
161
+ return ['mu', 'omega', 'alpha[1]', 'beta[1]', 'nu']
162
+ elif distribution.lower() == 'skewstudent':
163
+ return ['mu', 'omega', 'alpha[1]', 'beta[1]', 'nu', 'lambda']
164
+
165
+ # EGARCH(1,1,1) parameters
166
+ elif model_type.lower() == 'egarch':
167
+ if distribution.lower() == 'normal':
168
+ return ['mu', 'omega', 'alpha[1]', 'gamma[1]', 'beta[1]']
169
+ elif distribution.lower() == 'studentst':
170
+ return ['mu', 'omega', 'alpha[1]', 'gamma[1]', 'beta[1]', 'nu']
171
+ elif distribution.lower() == 'skewstudent':
172
+ return ['mu', 'omega', 'alpha[1]', 'gamma[1]', 'beta[1]', 'nu', 'lambda']
173
+
174
+ raise VolyError(f"Invalid model_type '{model_type}' or distribution '{distribution}'")
115
175
 
116
176
 
117
177
  @catch_exception
118
- def fit_volatility_model(log_returns, df_hist, model_type='garch', distribution='normal', window_length='30d',
119
- n_fits=400):
178
+ def fit_volatility_model(log_returns: np.ndarray,
179
+ df_hist: pd.DataFrame,
180
+ model_type: str = 'garch',
181
+ distribution: str = 'normal',
182
+ window_length: str = '30d',
183
+ n_fits: int = 400) -> Dict[str, Any]:
120
184
  """
121
- Fit a volatility model (GARCH or EGARCH) to log returns.
185
+ Fit a volatility model (GARCH or EGARCH) to historical returns.
122
186
 
123
- Args:
124
- log_returns: Array of log returns
125
- df_hist: DataFrame with historical price data
126
- model_type: Type of volatility model ('garch' or 'egarch')
127
- distribution: Distribution type ('normal', 'studentst', or 'skewstudent')
128
- window_length: Length of each window as a string (e.g., '30d')
129
- n_fits: Number of sliding windows
187
+ Parameters:
188
+ -----------
189
+ log_returns : np.ndarray
190
+ Array of log returns (percent)
191
+ df_hist : pd.DataFrame
192
+ Historical price data
193
+ model_type : str
194
+ Type of volatility model ('garch' or 'egarch')
195
+ distribution : str
196
+ Distribution type ('normal', 'studentst', or 'skewstudent')
197
+ window_length : str
198
+ Length of sliding window in days (e.g., '30d')
199
+ n_fits : int
200
+ Number of sliding windows to fit
130
201
 
131
202
  Returns:
132
- Dict with model parameters and processes
203
+ --------
204
+ Dict[str, Any]
205
+ Dictionary with model parameters and fitting results
133
206
  """
134
207
  # Parse window length
135
208
  window_points = parse_window_length(window_length, df_hist)
136
209
 
210
+ # Validate data
137
211
  if len(log_returns) < window_points + n_fits:
138
212
  raise VolyError(f"Not enough data points. Need at least {window_points + n_fits}, got {len(log_returns)}")
139
213
 
140
- # Adjust window sizes if necessary
141
- n_fits = min(n_fits, len(log_returns) // 3)
142
- window_points = min(window_points, len(log_returns) // 3)
214
+ # Adjust window sizes to avoid overfitting
215
+ n_fits = min(n_fits, max(100, len(log_returns) // 3))
216
+ window_points = min(window_points, max(20, len(log_returns) // 3))
143
217
 
144
- start = window_points + n_fits
145
- end = n_fits
218
+ # Calculate start and end indices for sliding windows
219
+ start_idx = window_points + n_fits
220
+ end_idx = n_fits
146
221
 
147
- # Different number of parameters based on model type and distribution
148
- if model_type.lower() == 'garch':
149
- if distribution.lower() == 'normal':
150
- n_params = 4 # mu, omega, alpha, beta
151
- elif distribution.lower() == 'studentst':
152
- n_params = 5 # mu, omega, alpha, beta, nu
153
- else: # skewstudent
154
- n_params = 6 # mu, omega, alpha, beta, nu, lambda (skew)
155
- else: # egarch
156
- if distribution.lower() == 'normal':
157
- n_params = 5 # mu, omega, alpha, gamma, beta
158
- elif distribution.lower() == 'studentst':
159
- n_params = 6 # mu, omega, alpha, gamma, beta, nu
160
- else: # skewstudent
161
- n_params = 7 # mu, omega, alpha, gamma, beta, nu, lambda (skew)
222
+ # Get parameter names for the model
223
+ param_names = get_param_names(model_type, distribution)
224
+ n_params = len(param_names)
162
225
 
226
+ # Initialize arrays for parameters and innovations
163
227
  parameters = np.zeros((n_fits, n_params))
164
228
  z_process = []
165
229
 
166
- logger.info(f"Fitting {model_type.upper()} model with {distribution} distribution using {n_fits} windows...")
230
+ logger.info(f"Fitting {model_type.upper()} model with {distribution} distribution "
231
+ f"using {n_fits} windows of {window_length}")
167
232
 
233
+ # Fit models with sliding windows
168
234
  for i in range(n_fits):
169
- window = log_returns[end - i - 1:start - i - 1]
235
+ # Log progress
236
+ if i % (n_fits // 10) == 0:
237
+ logger.info(f"Fitting progress: {i}/{n_fits}")
238
+
239
+ # Check if we have enough data for this window
240
+ if end_idx - i - 1 < 0 or start_idx - i - 1 > len(log_returns):
241
+ continue
242
+
243
+ # Extract window data
244
+ window = log_returns[end_idx - i - 1:start_idx - i - 1]
245
+
246
+ # Skip invalid windows
247
+ if len(window) < 10 or np.isnan(window).any() or np.isinf(window).any():
248
+ continue
249
+
250
+ # Mean-center the data for numerical stability
170
251
  data = window - np.mean(window)
171
252
 
172
253
  try:
173
- # Configure model based on type and distribution
254
+ # Configure and fit model
174
255
  if model_type.lower() == 'garch':
175
256
  model = arch_model(data, vol='GARCH', p=1, q=1, dist=distribution.lower())
176
257
  else: # egarch
177
258
  model = arch_model(data, vol='EGARCH', p=1, o=1, q=1, dist=distribution.lower())
178
259
 
179
- fit_result = model.fit(disp='off')
260
+ # Fit with optimization settings
261
+ fit_result = model.fit(disp='off', options={'maxiter': 1000})
180
262
 
181
- # Extract parameters based on model type and distribution
263
+ # Extract parameters
182
264
  params_dict = fit_result.params.to_dict()
265
+ param_values = [params_dict.get(param, 0) for param in param_names]
266
+ parameters[i, :] = param_values
183
267
 
184
- if model_type.lower() == 'garch':
185
- mu = params_dict.get("mu", 0)
186
- omega = params_dict.get("omega", 0)
187
- alpha = params_dict.get("alpha[1]", 0)
188
- beta = params_dict.get("beta[1]", 0)
189
-
190
- if distribution.lower() == 'normal':
191
- parameters[i, :] = [mu, omega, alpha, beta]
192
- elif distribution.lower() == 'studentst':
193
- nu = params_dict.get("nu", 0)
194
- parameters[i, :] = [mu, omega, alpha, beta, nu]
195
- else: # skewstudent
196
- nu = params_dict.get("nu", 0)
197
- lam = params_dict.get("lambda", 0)
198
- parameters[i, :] = [mu, omega, alpha, beta, nu, lam]
199
- else: # egarch
200
- mu = params_dict.get("mu", 0)
201
- omega = params_dict.get("omega", 0)
202
- alpha = params_dict.get("alpha[1]", 0)
203
- gamma = params_dict.get("gamma[1]", 0)
204
- beta = params_dict.get("beta[1]", 0)
205
-
206
- if distribution.lower() == 'normal':
207
- parameters[i, :] = [mu, omega, alpha, gamma, beta]
208
- elif distribution.lower() == 'studentst':
209
- nu = params_dict.get("nu", 0)
210
- parameters[i, :] = [mu, omega, alpha, gamma, beta, nu]
211
- else: # skewstudent
212
- nu = params_dict.get("nu", 0)
213
- lam = params_dict.get("lambda", 0)
214
- parameters[i, :] = [mu, omega, alpha, gamma, beta, nu, lam]
215
-
216
- # Get last innovation
268
+ # Extract standardized residuals (innovations)
217
269
  residuals = fit_result.resid
218
270
  conditional_vol = fit_result.conditional_volatility
219
- z_t = residuals[-1] / conditional_vol[-1]
220
- z_process.append(z_t)
271
+
272
+ if len(residuals) > 0 and len(conditional_vol) > 0:
273
+ z_t = residuals[-1] / conditional_vol[-1]
274
+ if not np.isnan(z_t) and not np.isinf(z_t):
275
+ z_process.append(z_t)
221
276
 
222
277
  except Exception as e:
223
278
  logger.warning(f"Model fit failed for window {i}: {str(e)}")
224
279
 
225
- # Clean up any failed fits
280
+ # Check if we have enough successful fits
226
281
  if len(z_process) < n_fits / 2:
227
- raise VolyError("Too many model fits failed. Check your data.")
282
+ raise VolyError(f"Too many model fits failed ({len(z_process)}/{n_fits}). Check your data.")
283
+
284
+ # Remove failed fits
285
+ valid_rows = ~np.all(parameters == 0, axis=1)
286
+ parameters = parameters[valid_rows]
228
287
 
288
+ # Calculate average parameters and standard deviations
229
289
  avg_params = np.mean(parameters, axis=0)
230
290
  std_params = np.std(parameters, axis=0)
231
291
 
232
292
  return {
293
+ 'model_type': model_type,
294
+ 'distribution': distribution,
233
295
  'parameters': parameters,
234
296
  'avg_params': avg_params,
235
297
  'std_params': std_params,
236
298
  'z_process': np.array(z_process),
237
- 'model_type': model_type,
238
- 'distribution': distribution,
239
- 'param_names': get_param_names(model_type, distribution)
299
+ 'param_names': param_names
240
300
  }
241
301
 
242
302
 
243
- def get_param_names(model_type, distribution):
244
- """Get parameter names based on model type and distribution."""
245
- if model_type.lower() == 'garch':
246
- if distribution.lower() == 'normal':
247
- return ['mu', 'omega', 'alpha', 'beta']
248
- elif distribution.lower() == 'studentst':
249
- return ['mu', 'omega', 'alpha', 'beta', 'nu']
250
- else: # skewstudent
251
- return ['mu', 'omega', 'alpha', 'beta', 'nu', 'lambda']
252
- else: # egarch
253
- if distribution.lower() == 'normal':
254
- return ['mu', 'omega', 'alpha', 'gamma', 'beta']
255
- elif distribution.lower() == 'studentst':
256
- return ['mu', 'omega', 'alpha', 'gamma', 'beta', 'nu']
257
- else: # skewstudent
258
- return ['mu', 'omega', 'alpha', 'gamma', 'beta', 'nu', 'lambda']
259
-
260
-
261
303
  @catch_exception
262
- def simulate_volatility_paths(vol_model, horizon, simulations=5000, variate_parameters=True):
304
+ def create_innovation_sampler(vol_model: Dict[str, Any]) -> Callable:
263
305
  """
264
- Simulate future paths using a fitted volatility model.
306
+ Create a function to sample innovations based on the volatility model.
265
307
 
266
- Args:
267
- vol_model: Dict with volatility model parameters
268
- horizon: Number of steps to simulate
269
- simulations: Number of paths to simulate
270
- variate_parameters: Whether to vary parameters between simulations
308
+ Parameters:
309
+ -----------
310
+ vol_model : Dict[str, Any]
311
+ Volatility model information from fit_volatility_model()
271
312
 
272
313
  Returns:
273
- Array of simulated log returns
314
+ --------
315
+ Callable
316
+ Function that returns random innovations when called
274
317
  """
275
- parameters = vol_model['parameters']
276
- z_process = vol_model['z_process']
277
- model_type = vol_model['model_type']
278
318
  distribution = vol_model['distribution']
279
- param_names = vol_model['param_names']
280
-
281
- # Use mean parameters as starting point
282
- pars = vol_model['avg_params'].copy()
283
- bounds = vol_model['std_params'].copy()
284
-
285
- # Log parameters
286
- param_str = ", ".join([f"{name}={par:.6f}" for name, par in zip(param_names, pars)])
287
- logger.info(f"{model_type.upper()} parameters: {param_str}")
319
+ z_process = vol_model['z_process']
288
320
 
289
- # Create KDE for innovations based on distribution
290
321
  if distribution.lower() == 'normal':
291
322
  # Use standard normal for normal distribution
292
323
  def sample_innovation(size=1):
293
324
  return np.random.normal(0, 1, size=size)
294
325
  else:
295
326
  # Use KDE for non-normal distributions to capture empirical distribution
296
- kde = stats.gaussian_kde(z_process, bw_method='silverman') # original code doesn't include bw_method
327
+ kde = stats.gaussian_kde(z_process, bw_method='silverman')
297
328
  z_range = np.linspace(min(z_process), max(z_process), 1000)
298
329
  z_prob = kde(z_range)
299
330
  z_prob = z_prob / np.sum(z_prob)
@@ -301,52 +332,104 @@ def simulate_volatility_paths(vol_model, horizon, simulations=5000, variate_para
301
332
  def sample_innovation(size=1):
302
333
  return np.random.choice(z_range, size=size, p=z_prob)
303
334
 
304
- # Simulate paths
335
+ return sample_innovation
336
+
337
+
338
+ @catch_exception
339
+ def generate_volatility_paths(vol_model: Dict[str, Any],
340
+ horizon: int,
341
+ simulations: int = 5000) -> Tuple[np.ndarray, float]:
342
+ """
343
+ Simulate future price paths using a fitted volatility model.
344
+
345
+ Parameters:
346
+ -----------
347
+ vol_model : Dict[str, Any]
348
+ Volatility model information from fit_volatility_model()
349
+ horizon : int
350
+ Number of time steps to simulate
351
+ simulations : int
352
+ Number of paths to simulate
353
+
354
+ Returns:
355
+ --------
356
+ Tuple[np.ndarray, float]
357
+ Array of simulated returns and the drift term
358
+ """
359
+ # Extract model information
360
+ parameters = vol_model['parameters']
361
+ model_type = vol_model['model_type']
362
+ distribution = vol_model['distribution']
363
+ param_names = vol_model['param_names']
364
+
365
+ # Get mean parameters
366
+ pars = vol_model['avg_params'].copy()
367
+ bounds = vol_model['std_params'].copy()
368
+
369
+ # Create parameter dictionary for easier access
370
+ param_dict = {name: value for name, value in zip(param_names, pars)}
371
+
372
+ # Log parameters
373
+ param_str = ", ".join([f"{name}={param_dict.get(name, 0):.6f}" for name in param_names])
374
+ logger.info(f"{model_type.upper()} parameters: {param_str}")
375
+
376
+ # Create innovation sampler
377
+ sample_innovation = create_innovation_sampler(vol_model)
378
+
379
+ # Initialize results array
305
380
  simulated_returns = np.zeros(simulations)
381
+ mu = param_dict.get('mu', 0)
382
+
383
+ logger.info(f"Simulating {simulations} paths for horizon {horizon}")
306
384
 
385
+ # Simulate paths
307
386
  for i in range(simulations):
387
+ # Log progress
308
388
  if (i + 1) % (simulations // 10) == 0:
309
389
  logger.info(f"Simulation progress: {i + 1}/{simulations}")
310
390
 
311
- # Optionally vary parameters
312
- if variate_parameters and (i + 1) % (simulations // 20) == 0:
313
- new_pars = []
314
- for j, (par, bound) in enumerate(zip(pars, bounds)):
315
- var = bound ** 2 / len(parameters)
316
- new_par = np.random.normal(par, var)
317
- # Ensure omega is positive, betas are between 0 and 1, etc.
318
- if j >= 1 and new_par <= 0:
319
- new_par = 0.01
320
- new_pars.append(new_par)
321
- sim_pars = new_pars
391
+ # Vary parameters periodically for robustness
392
+ if (i + 1) % (simulations // 20) == 0:
393
+ # Create parameter variations based on their estimated distribution
394
+ sim_params = {}
395
+ for j, (name, par, bound) in enumerate(zip(param_names, pars, bounds)):
396
+ var = bound ** 2 / max(len(parameters), 1)
397
+ # Generate new parameter from normal distribution around the mean
398
+ new_par = np.random.normal(par, np.sqrt(var))
399
+
400
+ # Apply constraints to ensure valid parameters
401
+ if name == 'omega':
402
+ new_par = max(new_par, 1e-6) # Must be positive
403
+ elif name in ['alpha[1]', 'beta[1]']:
404
+ new_par = max(min(new_par, 0.999), 0.001) # Between 0 and 1
405
+ elif name == 'nu':
406
+ new_par = max(new_par, 2.1) # Degrees of freedom > 2
407
+
408
+ sim_params[name] = new_par
322
409
  else:
323
- sim_pars = pars.copy()
410
+ sim_params = param_dict.copy()
324
411
 
325
- # Initialize variables based on model type
412
+ # Initialize volatility based on model type
326
413
  if model_type.lower() == 'garch':
327
- if distribution.lower() == 'normal':
328
- mu, omega, alpha, beta = sim_pars
329
- sigma2 = omega / (1 - alpha - beta)
330
- elif distribution.lower() == 'studentst':
331
- mu, omega, alpha, beta, nu = sim_pars
332
- sigma2 = omega / (1 - alpha - beta)
333
- else: # skewstudent
334
- mu, omega, alpha, beta, nu, lam = sim_pars
335
- sigma2 = omega / (1 - alpha - beta)
414
+ # Extract GARCH parameters
415
+ omega = sim_params.get('omega', 0)
416
+ alpha = sim_params.get('alpha[1]', 0)
417
+ beta = sim_params.get('beta[1]', 0)
418
+
419
+ # Initialize with unconditional variance
420
+ persistence = alpha + beta
421
+ sigma2 = omega / (1 - persistence) if persistence < 1 else omega / 0.99
422
+
336
423
  else: # egarch
337
- if distribution.lower() == 'normal':
338
- mu, omega, alpha, gamma, beta = sim_pars
339
- log_sigma2 = omega / (1 - beta)
340
- sigma2 = np.exp(log_sigma2)
341
- elif distribution.lower() == 'studentst':
342
- mu, omega, alpha, gamma, beta, nu = sim_pars
343
- log_sigma2 = omega / (1 - beta)
344
- sigma2 = np.exp(log_sigma2)
345
- else: # skewstudent
346
- mu, omega, alpha, gamma, beta, nu, lam = sim_pars
347
- log_sigma2 = omega / (1 - beta)
348
- sigma2 = np.exp(log_sigma2)
424
+ # Extract EGARCH parameters
425
+ omega = sim_params.get('omega', 0)
426
+ beta = sim_params.get('beta[1]', 0)
427
+
428
+ # Initialize log variance
429
+ log_sigma2 = omega / (1 - beta) if beta < 1 else omega / 0.99
430
+ sigma2 = np.exp(log_sigma2)
349
431
 
432
+ # Initialize return sum
350
433
  returns_sum = 0
351
434
 
352
435
  # Simulate path
@@ -354,116 +437,364 @@ def simulate_volatility_paths(vol_model, horizon, simulations=5000, variate_para
354
437
  # Sample innovation
355
438
  z = sample_innovation()
356
439
 
357
- # Update volatility and returns based on model type
440
+ # Update returns and volatility based on model type
358
441
  if model_type.lower() == 'garch':
359
442
  # Calculate return
360
443
  e = z * np.sqrt(sigma2)
361
444
  returns_sum += e + mu
362
445
 
363
446
  # Update GARCH volatility
364
- sigma2 = omega + alpha * e ** 2 + beta * sigma2
447
+ sigma2 = (sim_params.get('omega', 0) +
448
+ sim_params.get('alpha[1]', 0) * e ** 2 +
449
+ sim_params.get('beta[1]', 0) * sigma2)
450
+
365
451
  else: # egarch
366
452
  # Calculate return
367
453
  e = z * np.sqrt(sigma2)
368
454
  returns_sum += e + mu
369
455
 
456
+ # Extract EGARCH parameters
457
+ gamma = sim_params.get('gamma[1]', 0)
458
+ alpha = sim_params.get('alpha[1]', 0)
459
+ beta = sim_params.get('beta[1]', 0)
460
+ omega = sim_params.get('omega', 0)
461
+
370
462
  # Update EGARCH volatility
371
463
  abs_z = abs(z)
372
464
  log_sigma2 = omega + beta * log_sigma2 + alpha * (abs_z - np.sqrt(2 / np.pi)) + gamma * z
373
465
  sigma2 = np.exp(log_sigma2)
374
466
 
467
+ # Store final return
375
468
  simulated_returns[i] = returns_sum
376
469
 
377
470
  return simulated_returns, mu * horizon
378
471
 
379
472
 
473
+ @catch_exception
474
+ def prepare_domains(domain_params: Tuple[float, float, int],
475
+ s: float,
476
+ return_domain: str) -> Dict[str, np.ndarray]:
477
+ """
478
+ Prepare domain arrays for different representations.
479
+
480
+ Parameters:
481
+ -----------
482
+ domain_params : Tuple[float, float, int]
483
+ (min_log_moneyness, max_log_moneyness, num_points)
484
+ s : float
485
+ Spot price
486
+ return_domain : str
487
+ Domain for results
488
+
489
+ Returns:
490
+ --------
491
+ Dict[str, np.ndarray]
492
+ Dictionary of domain arrays
493
+ """
494
+ # Create log-moneyness grid
495
+ LM = np.linspace(domain_params[0], domain_params[1], domain_params[2])
496
+
497
+ # Calculate other domains
498
+ M = np.exp(LM) # Moneyness
499
+ R = M - 1 # Returns
500
+ K = s / M # Strike prices
501
+
502
+ # Calculate grid spacing
503
+ dx = LM[1] - LM[0]
504
+
505
+ return {
506
+ 'log_moneyness': LM,
507
+ 'moneyness': M,
508
+ 'returns': R,
509
+ 'strikes': K,
510
+ 'dx': dx
511
+ }
512
+
513
+
514
+ @catch_exception
515
+ def calculate_basic_density(df_hist: pd.DataFrame,
516
+ t: float,
517
+ r: float,
518
+ n_periods: int,
519
+ domains: Dict[str, np.ndarray],
520
+ bandwidth: str = 'silverman') -> Dict[str, np.ndarray]:
521
+ """
522
+ Calculate historical density using KDE of historical returns.
523
+
524
+ Parameters:
525
+ -----------
526
+ df_hist : pd.DataFrame
527
+ Historical price data
528
+ t : float
529
+ Time to maturity in years
530
+ r : float
531
+ Risk-free rate
532
+ n_periods : int
533
+ Number of periods to scale returns
534
+ domains : Dict[str, np.ndarray]
535
+ Domain arrays
536
+ bandwidth : str
537
+ KDE bandwidth method
538
+
539
+ Returns:
540
+ --------
541
+ Dict[str, np.ndarray]
542
+ Dictionary of PDFs in different domains
543
+ """
544
+ # Extract domains
545
+ LM = domains['log_moneyness']
546
+ M = domains['moneyness']
547
+ R = domains['returns']
548
+ K = domains['strikes']
549
+ dx = domains['dx']
550
+
551
+ # Filter historical data for the maturity's lookback period
552
+ start_date = pd.Timestamp.now() - pd.Timedelta(days=int(t * 365.25))
553
+ maturity_hist = df_hist[df_hist.index >= start_date].copy()
554
+
555
+ if len(maturity_hist) < 10:
556
+ raise VolyError(f"Not enough historical data for maturity (t={t:.4f})")
557
+
558
+ # Calculate scaled returns
559
+ maturity_hist['log_returns'] = np.log(maturity_hist['close'] / maturity_hist['close'].shift(1)) * np.sqrt(n_periods)
560
+ maturity_hist = maturity_hist.dropna()
561
+ returns = maturity_hist['log_returns'].values
562
+
563
+ if len(returns) < 2:
564
+ raise VolyError(f"Not enough valid returns for maturity (t={t:.4f})")
565
+
566
+ # Girsanov adjustment to shift to risk-neutral measure
567
+ mu_scaled = returns.mean()
568
+ sigma_scaled = returns.std()
569
+ expected_risk_neutral_mean = (r - 0.5 * sigma_scaled ** 2) * np.sqrt(t)
570
+ adjustment = mu_scaled - expected_risk_neutral_mean
571
+ adj_returns = returns - adjustment
572
+
573
+ # Create PDF with KDE
574
+ kde = stats.gaussian_kde(adj_returns, bw_method=bandwidth)
575
+ pdf_lm = kde(LM)
576
+
577
+ # Normalize the PDF
578
+ pdf_lm = pdf_lm / np.trapz(pdf_lm, LM)
579
+
580
+ # Transform to other domains
581
+ pdf_m = pdf_lm / M
582
+ pdf_k = pdf_lm / K
583
+ pdf_r = pdf_lm / (1 + R)
584
+
585
+ # Calculate CDF
586
+ cdf = np.cumsum(pdf_lm * dx)
587
+ cdf = cdf / cdf[-1]
588
+
589
+ return {
590
+ 'log_moneyness': pdf_lm,
591
+ 'moneyness': pdf_m,
592
+ 'returns': pdf_r,
593
+ 'strikes': pdf_k,
594
+ 'cdf': cdf
595
+ }
596
+
597
+
598
+ @catch_exception
599
+ def calculate_volatility_density(vol_model: Dict[str, Any],
600
+ s: float,
601
+ t: float,
602
+ r: float,
603
+ n_periods: int,
604
+ tau_days: float,
605
+ domains: Dict[str, np.ndarray],
606
+ simulations: int = 5000,
607
+ bandwidth: str = 'silverman') -> Tuple[Dict[str, np.ndarray], Dict[str, Any]]:
608
+ """
609
+ Calculate historical density using volatility model simulation.
610
+
611
+ Parameters:
612
+ -----------
613
+ vol_model : Dict[str, Any]
614
+ Volatility model from fit_volatility_model()
615
+ s : float
616
+ Spot price
617
+ t : float
618
+ Time to maturity in years
619
+ r : float
620
+ Risk-free rate
621
+ n_periods : int
622
+ Number of periods to scale returns
623
+ tau_days : float
624
+ Days to maturity
625
+ domains : Dict[str, np.ndarray]
626
+ Domain arrays
627
+ simulations : int
628
+ Number of Monte Carlo simulations
629
+ bandwidth : str
630
+ KDE bandwidth method
631
+
632
+ Returns:
633
+ --------
634
+ Tuple[Dict[str, np.ndarray], Dict[str, Any]]
635
+ Dictionary of PDFs in different domains and model parameters
636
+ """
637
+ # Extract domains
638
+ LM = domains['log_moneyness']
639
+ M = domains['moneyness']
640
+ R = domains['returns']
641
+ K = domains['strikes']
642
+ dx = domains['dx']
643
+
644
+ # Simulate paths with the volatility model
645
+ horizon = max(1, int(tau_days))
646
+ simulated_returns, simulated_mu = generate_volatility_paths(
647
+ vol_model,
648
+ horizon,
649
+ simulations
650
+ )
651
+
652
+ # Scale the simulated returns to match target time horizon
653
+ scaling_factor = np.sqrt(n_periods / tau_days)
654
+ scaled_returns = simulated_returns * scaling_factor
655
+
656
+ # Risk-neutral adjustment
657
+ mu_scaled = scaled_returns.mean()
658
+ sigma_scaled = scaled_returns.std()
659
+ expected_risk_neutral_mean = (r - 0.5 * (sigma_scaled / 100) ** 2) * 100 * np.sqrt(t)
660
+ adjustment = mu_scaled - expected_risk_neutral_mean
661
+ risk_neutral_returns = scaled_returns - adjustment
662
+
663
+ # Convert to terminal prices
664
+ simulated_prices = s * np.exp(risk_neutral_returns / 100)
665
+
666
+ # Convert to moneyness domain (x-domain)
667
+ simulated_moneyness = s / simulated_prices
668
+
669
+ # Calculate PDF with KDE
670
+ kde = stats.gaussian_kde(simulated_moneyness, bw_method=bandwidth)
671
+ pdf_m = kde(M)
672
+
673
+ # Normalize the PDF
674
+ pdf_m = pdf_m / np.trapz(pdf_m, M)
675
+
676
+ # Transform to other domains
677
+ pdf_lm = pdf_m * M
678
+ pdf_k = pdf_lm / K
679
+ pdf_r = pdf_lm / (1 + R)
680
+
681
+ # Calculate CDF
682
+ cdf = np.cumsum(pdf_lm * dx)
683
+ cdf = cdf / cdf[-1]
684
+
685
+ # Prepare model parameters for moments
686
+ avg_params = vol_model['avg_params']
687
+ param_names = vol_model['param_names']
688
+ model_params = {name.replace('[1]', ''): value for name, value in zip(param_names, avg_params)}
689
+ model_params['model_type'] = vol_model['model_type']
690
+ model_params['distribution'] = vol_model['distribution']
691
+
692
+ # Add persistence for GARCH models
693
+ if vol_model['model_type'] == 'garch':
694
+ model_params['persistence'] = model_params.get('alpha', 0) + model_params.get('beta', 0)
695
+
696
+ return {
697
+ 'log_moneyness': pdf_lm,
698
+ 'moneyness': pdf_m,
699
+ 'returns': pdf_r,
700
+ 'strikes': pdf_k,
701
+ 'cdf': cdf
702
+ }, model_params
703
+
704
+
705
+ @catch_exception
380
706
  def get_hd_surface(model_results: pd.DataFrame,
381
707
  df_hist: pd.DataFrame,
382
708
  domain_params: Tuple[float, float, int] = (-1.5, 1.5, 1000),
383
709
  return_domain: str = 'log_moneyness',
384
- method: str = 'arch_returns',
385
- model_type: str = 'garch',
710
+ method: str = 'garch',
386
711
  distribution: str = 'normal',
387
- **kwargs) -> Dict[str, Any]:
712
+ window_length: str = '30d',
713
+ n_fits: int = 400,
714
+ simulations: int = 5000,
715
+ bandwidth: str = 'silverman') -> Dict[str, Any]:
388
716
  """
389
717
  Generate historical density surface from historical price data.
390
718
 
391
719
  Parameters:
392
- model_results: DataFrame with model parameters and maturities
393
- df_hist: DataFrame with historical price data
394
- domain_params: Tuple of (min, max, num_points) for x-domain
395
- return_domain: Domain for x-axis values ('log_moneyness', 'moneyness', 'returns', 'strikes')
396
- method: Method to use for HD estimation ('hist_returns' or 'arch_returns')
397
- model_type: Type of volatility model to use ('garch' or 'egarch')
398
- distribution: Distribution to use ('normal', 'studentst', or 'skewstudent')
399
- **kwargs: Additional parameters for specific methods:
400
- For volatility models ('garch'/'egarch' method):
401
- n_fits: Number of sliding windows (default: 400)
402
- simulations: Number of Monte Carlo simulations (default: 5000)
403
- window_length: Length of sliding windows as string (default: '30d')
404
- variate_parameters: Whether to vary parameters (default: True)
405
- bandwidth: KDE bandwidth (default: 'silverman')
406
- For 'hist_returns' method:
407
- bandwidth: KDE bandwidth (default: 'silverman')
720
+ -----------
721
+ model_results : pd.DataFrame
722
+ DataFrame with model parameters and maturities
723
+ df_hist : pd.DataFrame
724
+ DataFrame with historical price data
725
+ domain_params : Tuple[float, float, int]
726
+ (min_log_moneyness, max_log_moneyness, num_points)
727
+ return_domain : str
728
+ Domain for results ('log_moneyness', 'moneyness', 'returns', 'strikes')
729
+ method : str
730
+ Method for HD estimation ('garch', 'egarch', 'basic')
731
+ distribution : str
732
+ Distribution for volatility models ('normal', 'studentst', 'skewstudent')
733
+ window_length : str
734
+ Length of sliding windows for model fitting (e.g., '30d')
735
+ n_fits : int
736
+ Number of sliding windows for model fitting
737
+ simulations : int
738
+ Number of Monte Carlo simulations
739
+ bandwidth : str
740
+ KDE bandwidth method
408
741
 
409
742
  Returns:
410
- Dictionary containing pdf_surface, cdf_surface, x_surface, and moments
743
+ --------
744
+ Dict[str, Any]
745
+ Dictionary with pdf_surface, cdf_surface, x_surface, and moments
411
746
  """
412
- # Check if required columns are present
747
+ # Validate inputs
413
748
  required_columns = ['s', 't', 'r']
414
749
  missing_columns = [col for col in required_columns if col not in model_results.columns]
415
750
  if missing_columns:
416
751
  raise VolyError(f"Required columns missing in model_results: {missing_columns}")
417
752
 
418
- # Determine granularity from df_hist
419
- if len(df_hist) > 1:
420
- # Calculate minutes between consecutive timestamps
421
- minutes_diff = (df_hist.index[1] - df_hist.index[0]).total_seconds() / 60
422
- minutes_per_period = int(minutes_diff)
423
- else:
424
- raise VolyError("Cannot determine granularity from df_hist.")
753
+ if len(df_hist) < 2:
754
+ raise VolyError("Not enough data points in df_hist")
425
755
 
426
- # Validate model_type and distribution
427
- valid_model_types = ['garch', 'egarch']
756
+ # Determine granularity from data
757
+ minutes_diff = (df_hist.index[1] - df_hist.index[0]).total_seconds() / 60
758
+ minutes_per_period = max(1, int(minutes_diff))
759
+
760
+ # Validate method and distribution
761
+ valid_methods = ['garch', 'egarch', 'basic']
428
762
  valid_distributions = ['normal', 'studentst', 'skewstudent']
429
763
 
430
- if model_type.lower() not in valid_model_types:
431
- raise VolyError(f"Invalid model_type: {model_type}. Must be one of {valid_model_types}")
764
+ method = method.lower()
765
+ distribution = distribution.lower()
766
+
767
+ if method not in valid_methods:
768
+ raise VolyError(f"Invalid method: {method}. Must be one of {valid_methods}")
432
769
 
433
- if distribution.lower() not in valid_distributions:
770
+ if method in ['garch', 'egarch'] and distribution not in valid_distributions:
434
771
  raise VolyError(f"Invalid distribution: {distribution}. Must be one of {valid_distributions}")
435
772
 
436
- # Get method-specific parameters
437
- if method == 'arch_returns':
438
- n_fits = kwargs.get('n_fits', 400)
439
- simulations = kwargs.get('simulations', 5000)
440
- window_length = kwargs.get('window_length', '30d')
441
- variate_parameters = kwargs.get('variate_parameters', True)
442
- bandwidth = kwargs.get('bandwidth', 'silverman')
443
- logger.info(
444
- f"Using {model_type.upper()} method with {distribution} distribution, {n_fits} fits, {simulations} simulations")
445
- elif method == 'hist_returns':
446
- bandwidth = kwargs.get('bandwidth', 'silverman')
447
- logger.info(f"Using returns-based KDE method with bandwidth {bandwidth}")
448
- else:
449
- raise VolyError(f"Unknown method: {method}. Use 'hist_returns', 'arch_returns'.")
773
+ # Validate return domain
774
+ valid_domains = ['log_moneyness', 'moneyness', 'returns', 'strikes']
775
+ if return_domain not in valid_domains:
776
+ raise VolyError(f"Invalid return_domain: {return_domain}. Must be one of {valid_domains}")
450
777
 
451
- # Calculate log returns from price history
778
+ # Calculate log returns
452
779
  log_returns = np.log(df_hist['close'] / df_hist['close'].shift(1)) * 100
453
780
  log_returns = log_returns.dropna().values
454
781
 
455
- # Fit volatility model once if using garch/egarch method
782
+ # Fit volatility model if needed
456
783
  vol_model = None
457
- if method == 'arch_returns':
784
+ if method in ['garch', 'egarch']:
785
+ model_type = method
786
+ logger.info(f"Using {model_type.upper()} with {distribution} distribution")
787
+
458
788
  vol_model = fit_volatility_model(
459
- log_returns,
460
- df_hist,
789
+ log_returns=log_returns,
790
+ df_hist=df_hist,
461
791
  model_type=model_type,
462
792
  distribution=distribution,
463
793
  window_length=window_length,
464
794
  n_fits=n_fits
465
795
  )
466
796
 
797
+ # Initialize result containers
467
798
  pdf_surface = {}
468
799
  cdf_surface = {}
469
800
  x_surface = {}
@@ -471,161 +802,84 @@ def get_hd_surface(model_results: pd.DataFrame,
471
802
 
472
803
  # Process each maturity
473
804
  for i in model_results.index:
474
- # Get parameters for this maturity
475
- s = model_results.loc[i, 's'] # Current spot price
476
- r = model_results.loc[i, 'r'] # Risk-free rate
477
- t = model_results.loc[i, 't'] # Time to maturity in years
478
-
479
- # Get domain grids
480
- LM = np.linspace(domain_params[0], domain_params[1], domain_params[2])
481
- M = np.exp(LM) # Moneyness
482
- R = M - 1 # Returns
483
- K = s / M # Strike prices
484
-
485
- # For time scaling calculations
486
- tau_days_float = t * 365.25 # Exact number of days
487
- n_periods = max(1, int(t * 365.25 * 24 * 60 / minutes_per_period))
488
-
489
- logger.info(f"Processing HD for maturity {i} (t={t:.4f} years, {tau_days_float:.2f} days)")
490
-
491
- if method == 'hist_returns':
492
- # Standard returns-based method
493
- # Filter historical data for this maturity's lookback period
494
- start_date = pd.Timestamp.now() - pd.Timedelta(days=int(t * 365.25))
495
- maturity_hist = df_hist[df_hist.index >= start_date].copy()
496
-
497
- if len(maturity_hist) < 10:
498
- logger.warning(f"Not enough historical data for maturity {i}, skipping.")
499
- continue
500
-
501
- # Calculate scaled returns
502
- maturity_hist['log_returns'] = np.log(maturity_hist['close'] / maturity_hist['close'].shift(1)) * np.sqrt(
503
- n_periods)
504
- maturity_hist = maturity_hist.dropna()
505
-
506
- returns = maturity_hist['log_returns'].values
507
- if len(returns) < 2:
508
- logger.warning(f"Not enough valid returns for maturity {i}, skipping.")
509
- continue
510
-
511
- # Girsanov adjustment to shift to risk-neutral measure
512
- mu_scaled = returns.mean()
513
- sigma_scaled = returns.std()
514
- expected_risk_neutral_mean = (r - 0.5 * sigma_scaled ** 2) * np.sqrt(t)
515
- adjustment = mu_scaled - expected_risk_neutral_mean
516
- adj_returns = returns - adjustment
517
-
518
- # Create HD and normalize
519
- f = stats.gaussian_kde(adj_returns, bw_method=bandwidth)
520
- pdf_values = f(LM)
521
-
522
- elif method == 'arch_returns':
523
- # Volatility model-based method
524
- if vol_model is None:
525
- logger.warning(f"Volatility model fitting failed, skipping maturity {i}")
526
- continue
527
-
528
- # Simulate paths with the volatility model
529
- horizon = max(1, int(tau_days_float))
530
- simulated_returns, simulated_mu = simulate_volatility_paths(
531
- vol_model,
532
- horizon,
533
- simulations,
534
- variate_parameters
535
- )
536
-
537
- # Scale the simulated returns to match target time horizon
538
- scaling_factor = np.sqrt(n_periods / tau_days_float)
539
- scaled_returns = simulated_returns * scaling_factor
540
-
541
- # Risk-neutral adjustment
542
- mu_scaled = scaled_returns.mean()
543
- sigma_scaled = scaled_returns.std()
544
- expected_risk_neutral_mean = (r - 0.5 * (sigma_scaled / 100) ** 2) * 100 * np.sqrt(t)
545
- adjustment = mu_scaled - expected_risk_neutral_mean
546
- risk_neutral_returns = scaled_returns - adjustment
547
-
548
- # Convert to terminal prices
549
- simulated_prices = s * np.exp(risk_neutral_returns / 100)
550
-
551
- # Convert to moneyness domain
552
- simulated_moneyness = s / simulated_prices
553
-
554
- # Perform KDE to get PDF
555
- kde = stats.gaussian_kde(simulated_moneyness, bw_method=bandwidth)
556
- pdf_values = kde(M)
557
-
558
- # Include volatility model params in moments
559
- avg_params = vol_model['avg_params']
560
- param_names = vol_model['param_names']
561
- model_params = {name: value for name, value in zip(param_names, avg_params)}
562
- model_params['model_type'] = model_type
563
- model_params['distribution'] = distribution
564
-
565
- # Add persistence for GARCH-type models
566
- if model_type.lower() == 'garch':
567
- model_params['persistence'] = model_params.get('alpha', 0) + model_params.get('beta', 0)
568
- else:
569
- continue # Skip this maturity if method is invalid
570
-
571
- # Ensure density integrates to 1
572
- dx = LM[1] - LM[0]
573
- total_area = np.sum(pdf_values * dx)
574
- if total_area <= 0:
575
- logger.warning(f"Invalid density (area <= 0) for maturity {i}, skipping.")
576
- continue
805
+ try:
806
+ # Get parameters for this maturity
807
+ s = model_results.loc[i, 's'] # Spot price
808
+ r = model_results.loc[i, 'r'] # Risk-free rate
809
+ t = model_results.loc[i, 't'] # Time to maturity in years
810
+
811
+ # Calculate time scaling parameters
812
+ tau_days = t * 365.25 # Days to expiry
813
+ n_periods = max(1, int(tau_days * 24 * 60 / minutes_per_period)) # Number of periods
814
+
815
+ logger.info(f"Processing HD for maturity {i} (t={t:.4f} years, {tau_days:.2f} days)")
816
+
817
+ # Prepare domains
818
+ domains = prepare_domains(domain_params, s, return_domain)
819
+
820
+ # Calculate density based on method
821
+ if method == 'basic':
822
+ pdfs = calculate_basic_density(
823
+ df_hist=df_hist,
824
+ t=t,
825
+ r=r,
826
+ n_periods=n_periods,
827
+ domains=domains,
828
+ bandwidth=bandwidth
829
+ )
830
+ model_params = None
831
+
832
+ else: # 'garch' or 'egarch'
833
+ if vol_model is None:
834
+ logger.warning(f"Volatility model fitting failed, skipping maturity {i}")
835
+ continue
836
+
837
+ pdfs, model_params = calculate_volatility_density(
838
+ vol_model=vol_model,
839
+ s=s,
840
+ t=t,
841
+ r=r,
842
+ n_periods=n_periods,
843
+ tau_days=tau_days,
844
+ domains=domains,
845
+ simulations=simulations,
846
+ bandwidth=bandwidth
847
+ )
848
+
849
+ # Get domain arrays for output
850
+ if return_domain == 'log_moneyness':
851
+ x = domains['log_moneyness']
852
+ pdf = pdfs['log_moneyness']
853
+ elif return_domain == 'moneyness':
854
+ x = domains['moneyness']
855
+ pdf = pdfs['moneyness']
856
+ elif return_domain == 'returns':
857
+ x = domains['returns']
858
+ pdf = pdfs['returns']
859
+ elif return_domain == 'strikes':
860
+ x = domains['strikes']
861
+ pdf = pdfs['strikes']
862
+
863
+ # Calculate statistical moments
864
+ moments = get_all_moments(x, pdf, model_params)
865
+
866
+ # Store results
867
+ pdf_surface[i] = pdf
868
+ cdf_surface[i] = pdfs['cdf']
869
+ x_surface[i] = x
870
+ all_moments[i] = moments
577
871
 
578
- pdf_values = pdf_values / total_area
579
-
580
- # Common processing for both methods
581
-
582
- # Transform densities to various domains
583
- if method == 'hist_returns':
584
- pdf_lm = pdf_values
585
- pdf_m = pdf_lm / M
586
- pdf_k = pdf_lm / K
587
- pdf_r = pdf_lm / (1 + R)
588
- else: # volatility models
589
- pdf_m = pdf_values
590
- pdf_lm = pdf_m * M
591
- pdf_k = pdf_lm / K
592
- pdf_r = pdf_lm / (1 + R)
593
-
594
- # Calculate CDF
595
- cdf = np.cumsum(pdf_lm * dx)
596
- cdf = np.minimum(cdf / cdf[-1], 1.0)
597
-
598
- # Select appropriate domain and calculate moments
599
- if return_domain == 'log_moneyness':
600
- x = LM
601
- pdf = pdf_lm
602
- moments = get_all_moments(x, pdf, model_params if method == 'arch_returns' else None)
603
- elif return_domain == 'moneyness':
604
- x = M
605
- pdf = pdf_m
606
- moments = get_all_moments(x, pdf, model_params if method == 'arch_returns' else None)
607
- elif return_domain == 'returns':
608
- x = R
609
- pdf = pdf_r
610
- moments = get_all_moments(x, pdf, model_params if method == 'arch_returns' else None)
611
- elif return_domain == 'strikes':
612
- x = K
613
- pdf = pdf_k
614
- moments = get_all_moments(x, pdf, model_params if method == 'arch_returns' else None)
615
- else:
616
- raise VolyError(f"Unsupported return_domain: {return_domain}")
872
+ except Exception as e:
873
+ logger.warning(f"Failed to calculate HD for maturity {i}: {str(e)}")
617
874
 
618
- # Store results
619
- pdf_surface[i] = pdf
620
- cdf_surface[i] = cdf
621
- x_surface[i] = x
622
- all_moments[i] = moments
875
+ # Check if we have any valid results
876
+ if not pdf_surface:
877
+ raise VolyError("No valid densities could be calculated. Check your input data.")
623
878
 
624
879
  # Create DataFrame with moments
625
880
  moments = pd.DataFrame(all_moments).T
626
881
 
627
- logger.info(
628
- f"Historical density calculation complete using {method} method with {model_type} model and {distribution} distribution")
882
+ logger.info(f"Historical density calculation complete using {method} method")
629
883
 
630
884
  return {
631
885
  'pdf_surface': pdf_surface,