voly 0.0.151__py3-none-any.whl → 0.0.153__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
voly/core/hd.py CHANGED
@@ -1,21 +1,18 @@
1
1
  """
2
- This module handles calculating historical densities from
3
- time series of prices and converting them to implied volatility smiles.
2
+ This module handles calculating historical densities from time series of prices
3
+ and converting them to probability distributions.
4
4
  """
5
5
 
6
6
  import ccxt
7
- import pandas as pd
8
7
  import numpy as np
8
+ import pandas as pd
9
9
  import datetime as dt
10
+ from typing import Dict, Tuple, Any, Optional, List
10
11
  from scipy import stats
11
- from typing import Dict, List, Tuple, Optional, Union, Any, Callable
12
12
  from voly.utils.logger import logger, catch_exception
13
13
  from voly.exceptions import VolyError
14
14
  from voly.core.rnd import get_all_moments
15
- from voly.formulas import iv, get_domain
16
- from voly.models import SVIModel
17
- from voly.core.fit import fit_model
18
- from arch import arch_model
15
+ from voly.utils.density import prepare_domains, normalize_density, transform_to_domains, select_domain_results
19
16
 
20
17
 
21
18
  @catch_exception
@@ -64,10 +61,13 @@ def get_historical_data(currency: str,
64
61
  ohlcv_list = []
65
62
  ohlcv = exchange.fetch_ohlcv(symbol, granularity, since=from_ts, limit=1000)
66
63
  ohlcv_list.append(ohlcv)
64
+
67
65
  while True:
68
66
  from_ts = ohlcv[-1][0]
69
67
  new_ohlcv = exchange.fetch_ohlcv(symbol, granularity, since=from_ts, limit=1000)
70
- ohlcv.extend(new_ohlcv)
68
+ if len(new_ohlcv) <= 1: # No new data or just one overlapping candle
69
+ break
70
+ ohlcv.extend(new_ohlcv[1:]) # Skip the first one to avoid duplicates
71
71
  if len(new_ohlcv) != 1000:
72
72
  break
73
73
 
@@ -76,6 +76,7 @@ def get_historical_data(currency: str,
76
76
  df_hist['date'] = pd.to_datetime(df_hist['date'], unit='ms')
77
77
  df_hist.set_index('date', inplace=True)
78
78
  df_hist = df_hist.sort_index(ascending=True)
79
+ df_hist = df_hist[~df_hist.index.duplicated(keep='last')].sort_index()
79
80
 
80
81
  logger.info(f"Data fetched successfully: {len(df_hist)} rows from {df_hist.index[0]} to {df_hist.index[-1]}")
81
82
 
@@ -83,426 +84,13 @@ def get_historical_data(currency: str,
83
84
 
84
85
 
85
86
  @catch_exception
86
- def parse_window_length(window_length: str, df_hist: pd.DataFrame) -> int:
87
+ def calculate_normal_hd(df_hist: pd.DataFrame,
88
+ t: float,
89
+ r: float,
90
+ n_periods: int,
91
+ domains: Dict[str, np.ndarray]) -> Dict[str, np.ndarray]:
87
92
  """
88
- Convert window length string (e.g., '30d') to number of data points.
89
-
90
- Parameters:
91
- -----------
92
- window_length : str
93
- Window length in days, formatted as '7d', '30d', etc.
94
- df_hist : pd.DataFrame
95
- Historical data DataFrame with datetime index
96
-
97
- Returns:
98
- --------
99
- int
100
- Number of data points corresponding to the window length
101
- """
102
- # Validate inputs
103
- if not isinstance(window_length, str) or not window_length.endswith('d'):
104
- raise VolyError("window_length should be in format '7d', '30d', etc.")
105
-
106
- if len(df_hist) < 2:
107
- raise VolyError("Historical data must contain at least 2 points to calculate granularity")
108
-
109
- # Extract number of days
110
- days = int(window_length[:-1])
111
-
112
- # Calculate average time delta between data points
113
- avg_delta = (df_hist.index[-1] - df_hist.index[0]).total_seconds() / (len(df_hist) - 1)
114
-
115
- # Convert to days and calculate points per window
116
- days_per_point = avg_delta / (24 * 60 * 60)
117
- n_points = int(days / days_per_point)
118
-
119
- # Ensure minimum number of points
120
- return max(n_points, 10)
121
-
122
-
123
- def get_param_names(model_type: str, distribution: str) -> List[str]:
124
- """
125
- Get parameter names for a volatility model and distribution.
126
-
127
- Parameters:
128
- -----------
129
- model_type : str
130
- Type of volatility model ('garch' or 'egarch')
131
- distribution : str
132
- Distribution type ('normal', 'studentst', or 'skewstudent')
133
-
134
- Returns:
135
- --------
136
- List[str]
137
- List of parameter names
138
- """
139
- # GARCH(1,1) parameters
140
- if model_type.lower() == 'garch':
141
- if distribution.lower() == 'normal':
142
- return ['mu', 'omega', 'alpha[1]', 'beta[1]']
143
- elif distribution.lower() == 'studentst':
144
- return ['mu', 'omega', 'alpha[1]', 'beta[1]', 'nu']
145
- elif distribution.lower() == 'skewstudent':
146
- return ['mu', 'omega', 'alpha[1]', 'beta[1]', 'nu', 'lambda']
147
-
148
- # EGARCH(1,1,1) parameters
149
- elif model_type.lower() == 'egarch':
150
- if distribution.lower() == 'normal':
151
- return ['mu', 'omega', 'alpha[1]', 'gamma[1]', 'beta[1]']
152
- elif distribution.lower() == 'studentst':
153
- return ['mu', 'omega', 'alpha[1]', 'gamma[1]', 'beta[1]', 'nu']
154
- elif distribution.lower() == 'skewstudent':
155
- return ['mu', 'omega', 'alpha[1]', 'gamma[1]', 'beta[1]', 'nu', 'lambda']
156
-
157
- raise VolyError(f"Invalid model_type '{model_type}' or distribution '{distribution}'")
158
-
159
-
160
- @catch_exception
161
- def fit_volatility_model(log_returns: np.ndarray,
162
- df_hist: pd.DataFrame,
163
- model_type: str = 'garch',
164
- distribution: str = 'normal',
165
- window_length: str = '30d',
166
- n_fits: int = 400) -> Dict[str, Any]:
167
- """
168
- Fit a volatility model (GARCH or EGARCH) to historical returns.
169
-
170
- Parameters:
171
- -----------
172
- log_returns : np.ndarray
173
- Array of log returns (percent)
174
- df_hist : pd.DataFrame
175
- Historical price data
176
- model_type : str
177
- Type of volatility model ('garch' or 'egarch')
178
- distribution : str
179
- Distribution type ('normal', 'studentst', or 'skewstudent')
180
- window_length : str
181
- Length of sliding window in days (e.g., '30d')
182
- n_fits : int
183
- Number of sliding windows to fit
184
-
185
- Returns:
186
- --------
187
- Dict[str, Any]
188
- Dictionary with model parameters and fitting results
189
- """
190
- # Parse window length
191
- window_points = parse_window_length(window_length, df_hist)
192
-
193
- # Validate data
194
- if len(log_returns) < window_points + n_fits:
195
- raise VolyError(f"Not enough data points. Need at least {window_points + n_fits}, got {len(log_returns)}")
196
-
197
- # Adjust window sizes to avoid overfitting
198
- n_fits = min(n_fits, max(100, len(log_returns) // 3))
199
- window_points = min(window_points, max(20, len(log_returns) // 3))
200
-
201
- # Calculate start and end indices for sliding windows
202
- start_idx = window_points + n_fits
203
- end_idx = n_fits
204
-
205
- # Get parameter names for the model
206
- param_names = get_param_names(model_type, distribution)
207
- n_params = len(param_names)
208
-
209
- # Initialize arrays for parameters and innovations
210
- parameters = np.zeros((n_fits, n_params))
211
- z_process = []
212
-
213
- logger.info(f"Fitting {model_type.upper()} model with {distribution} distribution "
214
- f"using {n_fits} windows of {window_length}")
215
-
216
- # Fit models with sliding windows
217
- for i in range(n_fits):
218
- # Log progress
219
- if i % (n_fits // 10) == 0:
220
- logger.info(f"Fitting progress: {i}/{n_fits}")
221
-
222
- # Check if we have enough data for this window
223
- if end_idx - i - 1 < 0 or start_idx - i - 1 > len(log_returns):
224
- continue
225
-
226
- # Extract window data
227
- window = log_returns[end_idx - i - 1:start_idx - i - 1]
228
-
229
- # Skip invalid windows
230
- if len(window) < 10 or np.isnan(window).any() or np.isinf(window).any():
231
- continue
232
-
233
- # Mean-center the data for numerical stability
234
- data = window - np.mean(window)
235
-
236
- try:
237
- # Configure and fit model
238
- if model_type.lower() == 'garch':
239
- model = arch_model(data, vol='GARCH', p=1, q=1, dist=distribution.lower())
240
- else: # egarch
241
- model = arch_model(data, vol='EGARCH', p=1, o=1, q=1, dist=distribution.lower())
242
-
243
- # Fit with optimization settings
244
- fit_result = model.fit(disp='off', options={'maxiter': 1000})
245
-
246
- # Extract parameters
247
- params_dict = fit_result.params.to_dict()
248
- param_values = [params_dict.get(param, 0) for param in param_names]
249
- parameters[i, :] = param_values
250
-
251
- # Extract standardized residuals (innovations)
252
- residuals = fit_result.resid
253
- conditional_vol = fit_result.conditional_volatility
254
-
255
- if len(residuals) > 0 and len(conditional_vol) > 0:
256
- z_t = residuals[-1] / conditional_vol[-1]
257
- if not np.isnan(z_t) and not np.isinf(z_t):
258
- z_process.append(z_t)
259
-
260
- except Exception as e:
261
- logger.warning(f"Model fit failed for window {i}: {str(e)}")
262
-
263
- # Check if we have enough successful fits
264
- if len(z_process) < n_fits / 2:
265
- raise VolyError(f"Too many model fits failed ({len(z_process)}/{n_fits}). Check your data.")
266
-
267
- # Remove failed fits
268
- valid_rows = ~np.all(parameters == 0, axis=1)
269
- parameters = parameters[valid_rows]
270
-
271
- # Calculate average parameters and standard deviations
272
- avg_params = np.mean(parameters, axis=0)
273
- std_params = np.std(parameters, axis=0)
274
-
275
- return {
276
- 'model_type': model_type,
277
- 'distribution': distribution,
278
- 'parameters': parameters,
279
- 'avg_params': avg_params,
280
- 'std_params': std_params,
281
- 'z_process': np.array(z_process),
282
- 'param_names': param_names
283
- }
284
-
285
-
286
- @catch_exception
287
- def create_innovation_sampler(vol_model: Dict[str, Any]) -> Callable:
288
- """
289
- Create a function to sample innovations based on the volatility model.
290
-
291
- Parameters:
292
- -----------
293
- vol_model : Dict[str, Any]
294
- Volatility model information from fit_volatility_model()
295
-
296
- Returns:
297
- --------
298
- Callable
299
- Function that returns random innovations when called
300
- """
301
- distribution = vol_model['distribution']
302
- z_process = vol_model['z_process']
303
-
304
- if distribution.lower() == 'normal':
305
- # Use standard normal for normal distribution
306
- def sample_innovation(size=1):
307
- return np.random.normal(0, 1, size=size)
308
- else:
309
- # Use KDE for non-normal distributions to capture empirical distribution
310
- kde = stats.gaussian_kde(z_process, bw_method='silverman')
311
- z_range = np.linspace(min(z_process), max(z_process), 1000)
312
- z_prob = kde(z_range)
313
- z_prob = z_prob / np.sum(z_prob)
314
-
315
- def sample_innovation(size=1):
316
- return np.random.choice(z_range, size=size, p=z_prob)
317
-
318
- return sample_innovation
319
-
320
-
321
- @catch_exception
322
- def generate_volatility_paths(vol_model: Dict[str, Any],
323
- horizon: int,
324
- simulations: int = 5000) -> Tuple[np.ndarray, float]:
325
- """
326
- Simulate future price paths using a fitted volatility model.
327
-
328
- Parameters:
329
- -----------
330
- vol_model : Dict[str, Any]
331
- Volatility model information from fit_volatility_model()
332
- horizon : int
333
- Number of time steps to simulate
334
- simulations : int
335
- Number of paths to simulate
336
-
337
- Returns:
338
- --------
339
- Tuple[np.ndarray, float]
340
- Array of simulated returns and the drift term
341
- """
342
- # Extract model information
343
- parameters = vol_model['parameters']
344
- model_type = vol_model['model_type']
345
- distribution = vol_model['distribution']
346
- param_names = vol_model['param_names']
347
-
348
- # Get mean parameters
349
- pars = vol_model['avg_params'].copy()
350
- bounds = vol_model['std_params'].copy()
351
-
352
- # Create parameter dictionary for easier access
353
- param_dict = {name: value for name, value in zip(param_names, pars)}
354
-
355
- # Log parameters
356
- param_str = ", ".join([f"{name}={param_dict.get(name, 0):.6f}" for name in param_names])
357
- logger.info(f"{model_type.upper()} parameters: {param_str}")
358
-
359
- # Create innovation sampler
360
- sample_innovation = create_innovation_sampler(vol_model)
361
-
362
- # Initialize results array
363
- simulated_returns = np.zeros(simulations)
364
- mu = param_dict.get('mu', 0)
365
-
366
- logger.info(f"Simulating {simulations} paths for horizon {horizon}")
367
-
368
- # Simulate paths
369
- for i in range(simulations):
370
- # Log progress
371
- if (i + 1) % (simulations // 10) == 0:
372
- logger.info(f"Simulation progress: {i + 1}/{simulations}")
373
-
374
- # Vary parameters periodically for robustness
375
- if (i + 1) % (simulations // 20) == 0:
376
- # Create parameter variations based on their estimated distribution
377
- sim_params = {}
378
- for j, (name, par, bound) in enumerate(zip(param_names, pars, bounds)):
379
- var = bound ** 2 / max(len(parameters), 1)
380
- # Generate new parameter from normal distribution around the mean
381
- new_par = np.random.normal(par, np.sqrt(var))
382
-
383
- # Apply constraints to ensure valid parameters
384
- if name == 'omega':
385
- new_par = max(new_par, 1e-6) # Must be positive
386
- elif name in ['alpha[1]', 'beta[1]']:
387
- new_par = max(min(new_par, 0.999), 0.001) # Between 0 and 1
388
- elif name == 'nu':
389
- new_par = max(new_par, 2.1) # Degrees of freedom > 2
390
-
391
- sim_params[name] = new_par
392
- else:
393
- sim_params = param_dict.copy()
394
-
395
- # Initialize volatility based on model type
396
- if model_type.lower() == 'garch':
397
- # Extract GARCH parameters
398
- omega = sim_params.get('omega', 0)
399
- alpha = sim_params.get('alpha[1]', 0)
400
- beta = sim_params.get('beta[1]', 0)
401
-
402
- # Initialize with unconditional variance
403
- persistence = alpha + beta
404
- sigma2 = omega / (1 - persistence) if persistence < 1 else omega / 0.99
405
-
406
- else: # egarch
407
- # Extract EGARCH parameters
408
- omega = sim_params.get('omega', 0)
409
- beta = sim_params.get('beta[1]', 0)
410
-
411
- # Initialize log variance
412
- log_sigma2 = omega / (1 - beta) if beta < 1 else omega / 0.99
413
- sigma2 = np.exp(log_sigma2)
414
-
415
- # Initialize return sum
416
- returns_sum = 0
417
-
418
- # Simulate path
419
- for _ in range(horizon):
420
- # Sample innovation
421
- z = sample_innovation()
422
-
423
- # Update returns and volatility based on model type
424
- if model_type.lower() == 'garch':
425
- # Calculate return
426
- e = z * np.sqrt(sigma2)
427
- returns_sum += e + mu
428
-
429
- # Update GARCH volatility
430
- sigma2 = (sim_params.get('omega', 0) +
431
- sim_params.get('alpha[1]', 0) * e ** 2 +
432
- sim_params.get('beta[1]', 0) * sigma2)
433
-
434
- else: # egarch
435
- # Calculate return
436
- e = z * np.sqrt(sigma2)
437
- returns_sum += e + mu
438
-
439
- # Extract EGARCH parameters
440
- gamma = sim_params.get('gamma[1]', 0)
441
- alpha = sim_params.get('alpha[1]', 0)
442
- beta = sim_params.get('beta[1]', 0)
443
- omega = sim_params.get('omega', 0)
444
-
445
- # Update EGARCH volatility
446
- abs_z = abs(z)
447
- log_sigma2 = omega + beta * log_sigma2 + alpha * (abs_z - np.sqrt(2 / np.pi)) + gamma * z
448
- sigma2 = np.exp(log_sigma2)
449
-
450
- # Store final return
451
- simulated_returns[i] = returns_sum
452
-
453
- return simulated_returns, mu * horizon
454
-
455
-
456
- @catch_exception
457
- def prepare_domains(domain_params: Tuple[float, float, int],
458
- s: float,
459
- return_domain: str) -> Dict[str, np.ndarray]:
460
- """
461
- Prepare domain arrays for different representations.
462
-
463
- Parameters:
464
- -----------
465
- domain_params : Tuple[float, float, int]
466
- (min_log_moneyness, max_log_moneyness, num_points)
467
- s : float
468
- Spot price
469
- return_domain : str
470
- Domain for results
471
-
472
- Returns:
473
- --------
474
- Dict[str, np.ndarray]
475
- Dictionary of domain arrays
476
- """
477
- # Create log-moneyness grid
478
- LM = np.linspace(domain_params[0], domain_params[1], domain_params[2])
479
-
480
- # Calculate other domains
481
- M = np.exp(LM) # Moneyness
482
- R = M - 1 # Returns
483
- K = s / M # Strike prices
484
-
485
- # Calculate grid spacing
486
- dx = LM[1] - LM[0]
487
-
488
- return {
489
- 'log_moneyness': LM,
490
- 'moneyness': M,
491
- 'returns': R,
492
- 'strikes': K,
493
- 'dx': dx
494
- }
495
-
496
-
497
- @catch_exception
498
- def calculate_basic_density(df_hist: pd.DataFrame,
499
- t: float,
500
- r: float,
501
- n_periods: int,
502
- domains: Dict[str, np.ndarray],
503
- bandwidth: str = 'silverman') -> Dict[str, np.ndarray]:
504
- """
505
- Calculate historical density using KDE of historical returns.
93
+ Calculate historical density using a normal distribution based on historical returns.
506
94
 
507
95
  Parameters:
508
96
  -----------
@@ -516,203 +104,54 @@ def calculate_basic_density(df_hist: pd.DataFrame,
516
104
  Number of periods to scale returns
517
105
  domains : Dict[str, np.ndarray]
518
106
  Domain arrays
519
- bandwidth : str
520
- KDE bandwidth method
521
107
 
522
108
  Returns:
523
109
  --------
524
110
  Dict[str, np.ndarray]
525
111
  Dictionary of PDFs in different domains
526
112
  """
527
- # Extract domains
113
+ # Extract log-moneyness domain
528
114
  LM = domains['log_moneyness']
529
- M = domains['moneyness']
530
- R = domains['returns']
531
- K = domains['strikes']
532
115
  dx = domains['dx']
533
116
 
534
- # Filter historical data for the maturity's lookback period - use exact time to expiry
535
- lookback_days = t * 365.25 # Exact number of days to expiry
536
- start_date = pd.Timestamp.now() - pd.Timedelta(days=lookback_days)
537
- maturity_hist = df_hist[df_hist.index >= start_date].copy()
538
-
539
- # Better diagnostics for debugging
540
- if len(maturity_hist) < 2:
541
- n_available = len(df_hist)
542
- earliest = df_hist.index[0] if n_available > 0 else "N/A"
543
- latest = df_hist.index[-1] if n_available > 0 else "N/A"
544
-
545
- logger.warning(f"Insufficient data for t={t:.4f} years ({lookback_days:.2f} days lookback)")
546
- logger.warning(f"Available data: {n_available} points from {earliest} to {latest}")
547
- logger.warning(f"Required start date: {start_date}")
548
-
549
- # Try using all available data as fallback
550
- if n_available >= 2:
551
- logger.warning(f"Using all available {n_available} data points as fallback")
552
- maturity_hist = df_hist.copy()
553
- else:
554
- raise VolyError(f"Not enough historical data for maturity (t={t:.4f})")
555
-
556
- # Calculate scaled returns
557
- maturity_hist['log_returns'] = np.log(maturity_hist['close'] / maturity_hist['close'].shift(1)) * np.sqrt(n_periods)
558
- maturity_hist = maturity_hist.dropna()
559
- returns = maturity_hist['log_returns'].values
560
-
561
- if len(returns) < 2:
562
- raise VolyError(f"Not enough valid returns for maturity (t={t:.4f})")
563
-
564
- # Girsanov adjustment to shift to risk-neutral measure
565
- mu_scaled = returns.mean()
566
- sigma_scaled = returns.std()
567
- expected_risk_neutral_mean = (r - 0.5 * sigma_scaled ** 2) * np.sqrt(t)
568
- adjustment = mu_scaled - expected_risk_neutral_mean
569
- adj_returns = returns - adjustment
570
-
571
- # Create PDF with KDE
572
- kde = stats.gaussian_kde(adj_returns, bw_method=bandwidth)
573
- pdf_lm = kde(LM)
574
-
575
- # Normalize the PDF
576
- pdf_lm = pdf_lm / np.trapz(pdf_lm, LM)
577
-
578
- # Transform to other domains
579
- pdf_m = pdf_lm / M
580
- pdf_k = pdf_lm / K
581
- pdf_r = pdf_lm / (1 + R)
582
-
583
- # Calculate CDF
584
- cdf = np.cumsum(pdf_lm * dx)
585
- cdf = cdf / cdf[-1]
586
-
587
- return {
588
- 'log_moneyness': pdf_lm,
589
- 'moneyness': pdf_m,
590
- 'returns': pdf_r,
591
- 'strikes': pdf_k,
592
- 'cdf': cdf
593
- }
594
-
595
-
596
- @catch_exception
597
- def calculate_volatility_density(vol_model: Dict[str, Any],
598
- s: float,
599
- t: float,
600
- r: float,
601
- n_periods: int,
602
- tau_days: float,
603
- domains: Dict[str, np.ndarray],
604
- simulations: int = 5000,
605
- bandwidth: str = 'silverman') -> Tuple[Dict[str, np.ndarray], Dict[str, Any]]:
606
- """
607
- Calculate historical density using volatility model simulation.
117
+ # Calculate log returns
118
+ returns = np.log(df_hist['close'] / df_hist['close'].shift(1)).dropna().values
608
119
 
609
- Parameters:
610
- -----------
611
- vol_model : Dict[str, Any]
612
- Volatility model from fit_volatility_model()
613
- s : float
614
- Spot price
615
- t : float
616
- Time to maturity in years
617
- r : float
618
- Risk-free rate
619
- n_periods : int
620
- Number of periods to scale returns
621
- tau_days : float
622
- Days to maturity
623
- domains : Dict[str, np.ndarray]
624
- Domain arrays
625
- simulations : int
626
- Number of Monte Carlo simulations
627
- bandwidth : str
628
- KDE bandwidth method
120
+ # Filter historical data based on n_periods
121
+ if len(returns) < n_periods:
122
+ logger.warning(f"Not enough historical data, using all {len(returns)} points available")
123
+ dte_returns = returns
124
+ else:
125
+ dte_returns = returns[-n_periods:]
629
126
 
630
- Returns:
631
- --------
632
- Tuple[Dict[str, np.ndarray], Dict[str, Any]]
633
- Dictionary of PDFs in different domains and model parameters
634
- """
635
- # Extract domains
636
- LM = domains['log_moneyness']
637
- M = domains['moneyness']
638
- R = domains['returns']
639
- K = domains['strikes']
640
- dx = domains['dx']
127
+ # Calculate scaled parameters for normal distribution
128
+ mu_scaled = np.mean(dte_returns) * np.sqrt(n_periods)
129
+ sigma_scaled = np.std(dte_returns) * np.sqrt(n_periods)
641
130
 
642
- # Simulate paths with the volatility model
643
- horizon = max(1, int(tau_days))
644
- simulated_returns, simulated_mu = generate_volatility_paths(
645
- vol_model,
646
- horizon,
647
- simulations
648
- )
649
-
650
- # Scale the simulated returns to match target time horizon
651
- scaling_factor = np.sqrt(n_periods / tau_days)
652
- scaled_returns = simulated_returns * scaling_factor
653
-
654
- # Risk-neutral adjustment
655
- mu_scaled = scaled_returns.mean()
656
- sigma_scaled = scaled_returns.std()
657
- expected_risk_neutral_mean = (r - 0.5 * (sigma_scaled / 100) ** 2) * 100 * np.sqrt(t)
131
+ # Apply Girsanov adjustment to shift to risk-neutral measure
132
+ expected_risk_neutral_mean = (r - 0.5 * sigma_scaled ** 2) * np.sqrt(t)
658
133
  adjustment = mu_scaled - expected_risk_neutral_mean
659
- risk_neutral_returns = scaled_returns - adjustment
134
+ mu_rn = mu_scaled - adjustment
660
135
 
661
- # Convert to terminal prices
662
- simulated_prices = s * np.exp(risk_neutral_returns / 100)
663
-
664
- # Convert to moneyness domain (x-domain)
665
- simulated_moneyness = s / simulated_prices
666
-
667
- # Calculate PDF with KDE
668
- kde = stats.gaussian_kde(simulated_moneyness, bw_method=bandwidth)
669
- pdf_m = kde(M)
136
+ # Calculate PDF using normal distribution in log-moneyness domain
137
+ pdf_lm = stats.norm.pdf(LM, loc=mu_rn, scale=sigma_scaled)
670
138
 
671
139
  # Normalize the PDF
672
- pdf_m = pdf_m / np.trapz(pdf_m, M)
140
+ pdf_lm = normalize_density(pdf_lm, dx)
673
141
 
674
142
  # Transform to other domains
675
- pdf_lm = pdf_m * M
676
- pdf_k = pdf_lm / K
677
- pdf_r = pdf_lm / (1 + R)
678
-
679
- # Calculate CDF
680
- cdf = np.cumsum(pdf_lm * dx)
681
- cdf = cdf / cdf[-1]
143
+ pdfs = transform_to_domains(pdf_lm, domains)
682
144
 
683
- # Prepare model parameters for moments
684
- avg_params = vol_model['avg_params']
685
- param_names = vol_model['param_names']
686
- model_params = {name.replace('[1]', ''): value for name, value in zip(param_names, avg_params)}
687
- model_params['model_type'] = vol_model['model_type']
688
- model_params['distribution'] = vol_model['distribution']
689
-
690
- # Add persistence for GARCH models
691
- if vol_model['model_type'] == 'garch':
692
- model_params['persistence'] = model_params.get('alpha', 0) + model_params.get('beta', 0)
693
-
694
- return {
695
- 'log_moneyness': pdf_lm,
696
- 'moneyness': pdf_m,
697
- 'returns': pdf_r,
698
- 'strikes': pdf_k,
699
- 'cdf': cdf
700
- }, model_params
145
+ return pdfs
701
146
 
702
147
 
703
148
  @catch_exception
704
149
  def get_hd_surface(model_results: pd.DataFrame,
705
150
  df_hist: pd.DataFrame,
706
151
  domain_params: Tuple[float, float, int] = (-1.5, 1.5, 1000),
707
- return_domain: str = 'log_moneyness',
708
- method: str = 'garch',
709
- distribution: str = 'normal',
710
- window_length: str = '30d',
711
- n_fits: int = 400,
712
- simulations: int = 5000,
713
- bandwidth: str = 'silverman') -> Dict[str, Any]:
152
+ return_domain: str = 'log_moneyness') -> Dict[str, Any]:
714
153
  """
715
- Generate historical density surface from historical price data.
154
+ Generate historical density surface using normal distributions.
716
155
 
717
156
  Parameters:
718
157
  -----------
@@ -724,18 +163,6 @@ def get_hd_surface(model_results: pd.DataFrame,
724
163
  (min_log_moneyness, max_log_moneyness, num_points)
725
164
  return_domain : str
726
165
  Domain for results ('log_moneyness', 'moneyness', 'returns', 'strikes')
727
- method : str
728
- Method for HD estimation ('garch', 'egarch', 'basic')
729
- distribution : str
730
- Distribution for volatility models ('normal', 'studentst', 'skewstudent')
731
- window_length : str
732
- Length of sliding windows for model fitting (e.g., '30d')
733
- n_fits : int
734
- Number of sliding windows for model fitting
735
- simulations : int
736
- Number of Monte Carlo simulations
737
- bandwidth : str
738
- KDE bandwidth method
739
166
 
740
167
  Returns:
741
168
  --------
@@ -751,46 +178,14 @@ def get_hd_surface(model_results: pd.DataFrame,
751
178
  if len(df_hist) < 2:
752
179
  raise VolyError("Not enough data points in df_hist")
753
180
 
754
- # Determine granularity from data
755
- minutes_diff = (df_hist.index[1] - df_hist.index[0]).total_seconds() / 60
756
- minutes_per_period = max(1, int(minutes_diff))
757
-
758
- # Validate method and distribution
759
- valid_methods = ['garch', 'egarch', 'basic']
760
- valid_distributions = ['normal', 'studentst', 'skewstudent']
761
-
762
- method = method.lower()
763
- distribution = distribution.lower()
764
-
765
- if method not in valid_methods:
766
- raise VolyError(f"Invalid method: {method}. Must be one of {valid_methods}")
767
-
768
- if method in ['garch', 'egarch'] and distribution not in valid_distributions:
769
- raise VolyError(f"Invalid distribution: {distribution}. Must be one of {valid_distributions}")
770
-
771
181
  # Validate return domain
772
182
  valid_domains = ['log_moneyness', 'moneyness', 'returns', 'strikes']
773
183
  if return_domain not in valid_domains:
774
184
  raise VolyError(f"Invalid return_domain: {return_domain}. Must be one of {valid_domains}")
775
185
 
776
- # Calculate log returns
777
- log_returns = np.log(df_hist['close'] / df_hist['close'].shift(1)) * 100
778
- log_returns = log_returns.dropna().values
779
-
780
- # Fit volatility model if needed
781
- vol_model = None
782
- if method in ['garch', 'egarch']:
783
- model_type = method
784
- logger.info(f"Using {model_type.upper()} with {distribution} distribution")
785
-
786
- vol_model = fit_volatility_model(
787
- log_returns=log_returns,
788
- df_hist=df_hist,
789
- model_type=model_type,
790
- distribution=distribution,
791
- window_length=window_length,
792
- n_fits=n_fits
793
- )
186
+ # Determine granularity from data (minutes between data points)
187
+ time_diff = (df_hist.index[1] - df_hist.index[0]).total_seconds() / 60
188
+ minutes_per_period = max(1, int(time_diff))
794
189
 
795
190
  # Initialize result containers
796
191
  pdf_surface = {}
@@ -803,67 +198,34 @@ def get_hd_surface(model_results: pd.DataFrame,
803
198
  try:
804
199
  # Get parameters for this maturity
805
200
  s = model_results.loc[i, 's'] # Spot price
806
- r = model_results.loc[i, 'r'] # Risk-free rate
807
201
  t = model_results.loc[i, 't'] # Time to maturity in years
202
+ r = model_results.loc[i, 'r'] # Risk-free rate
808
203
 
809
- # Calculate time scaling parameters
810
- tau_days = t * 365.25 # Days to expiry
811
- n_periods = max(1, int(tau_days * 24 * 60 / minutes_per_period)) # Number of periods
812
-
813
- logger.info(f"Processing HD for maturity {i} (t={t:.4f} years, {tau_days:.2f} days)")
204
+ # Calculate relevant periods for this maturity
205
+ dte = t * 365.25 # Days to expiry
206
+ n_periods = max(1, int(dte * 24 * 60 / minutes_per_period))
814
207
 
815
208
  # Prepare domains
816
- domains = prepare_domains(domain_params, s, return_domain)
817
-
818
- # Calculate density based on method
819
- if method == 'basic':
820
- pdfs = calculate_basic_density(
821
- df_hist=df_hist,
822
- t=t,
823
- r=r,
824
- n_periods=n_periods,
825
- domains=domains,
826
- bandwidth=bandwidth
827
- )
828
- model_params = None
829
-
830
- else: # 'garch' or 'egarch'
831
- if vol_model is None:
832
- logger.warning(f"Volatility model fitting failed, skipping maturity {i}")
833
- continue
834
-
835
- pdfs, model_params = calculate_volatility_density(
836
- vol_model=vol_model,
837
- s=s,
838
- t=t,
839
- r=r,
840
- n_periods=n_periods,
841
- tau_days=tau_days,
842
- domains=domains,
843
- simulations=simulations,
844
- bandwidth=bandwidth
845
- )
846
-
847
- # Get domain arrays for output
848
- if return_domain == 'log_moneyness':
849
- x = domains['log_moneyness']
850
- pdf = pdfs['log_moneyness']
851
- elif return_domain == 'moneyness':
852
- x = domains['moneyness']
853
- pdf = pdfs['moneyness']
854
- elif return_domain == 'returns':
855
- x = domains['returns']
856
- pdf = pdfs['returns']
857
- elif return_domain == 'strikes':
858
- x = domains['strikes']
859
- pdf = pdfs['strikes']
860
-
861
- # Calculate statistical moments
862
- moments = get_all_moments(x, pdf, model_params)
209
+ domains = prepare_domains(domain_params, s)
210
+
211
+ # Calculate density
212
+ pdfs = calculate_normal_hd(
213
+ df_hist=df_hist,
214
+ t=t,
215
+ r=r,
216
+ n_periods=n_periods,
217
+ domains=domains
218
+ )
219
+
220
+ # Select results for the requested domain
221
+ pdf, cdf, x = select_domain_results(pdfs, domains, return_domain)
222
+
223
+ # Calculate moments
224
+ moments = get_all_moments(x, pdf)
863
225
 
864
226
  # Store results
865
227
  pdf_surface[i] = pdf
866
- cdf_surface[i] = pdfs['cdf']
228
+ cdf_surface[i] = cdf
867
229
  x_surface[i] = x
868
230
  all_moments[i] = moments
869
231
 
@@ -877,7 +239,7 @@ def get_hd_surface(model_results: pd.DataFrame,
877
239
  # Create DataFrame with moments
878
240
  moments = pd.DataFrame(all_moments).T
879
241
 
880
- logger.info(f"Historical density calculation complete using {method} method")
242
+ logger.info("Historical density calculation complete using normal distribution")
881
243
 
882
244
  return {
883
245
  'pdf_surface': pdf_surface,