voly 0.0.152__py3-none-any.whl → 0.0.153__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
voly/core/hd.py CHANGED
@@ -1,21 +1,18 @@
1
1
  """
2
- This module handles calculating historical densities from
3
- time series of prices and converting them to implied volatility smiles.
2
+ This module handles calculating historical densities from time series of prices
3
+ and converting them to probability distributions.
4
4
  """
5
5
 
6
6
  import ccxt
7
- import pandas as pd
8
7
  import numpy as np
8
+ import pandas as pd
9
9
  import datetime as dt
10
+ from typing import Dict, Tuple, Any, Optional, List
10
11
  from scipy import stats
11
- from typing import Dict, List, Tuple, Optional, Union, Any, Callable
12
12
  from voly.utils.logger import logger, catch_exception
13
13
  from voly.exceptions import VolyError
14
14
  from voly.core.rnd import get_all_moments
15
- from voly.formulas import iv, get_domain
16
- from voly.models import SVIModel
17
- from voly.core.fit import fit_model
18
- from arch import arch_model
15
+ from voly.utils.density import prepare_domains, normalize_density, transform_to_domains, select_domain_results
19
16
 
20
17
 
21
18
  @catch_exception
@@ -64,10 +61,13 @@ def get_historical_data(currency: str,
64
61
  ohlcv_list = []
65
62
  ohlcv = exchange.fetch_ohlcv(symbol, granularity, since=from_ts, limit=1000)
66
63
  ohlcv_list.append(ohlcv)
64
+
67
65
  while True:
68
66
  from_ts = ohlcv[-1][0]
69
67
  new_ohlcv = exchange.fetch_ohlcv(symbol, granularity, since=from_ts, limit=1000)
70
- ohlcv.extend(new_ohlcv)
68
+ if len(new_ohlcv) <= 1: # No new data or just one overlapping candle
69
+ break
70
+ ohlcv.extend(new_ohlcv[1:]) # Skip the first one to avoid duplicates
71
71
  if len(new_ohlcv) != 1000:
72
72
  break
73
73
 
@@ -84,426 +84,13 @@ def get_historical_data(currency: str,
84
84
 
85
85
 
86
86
  @catch_exception
87
- def parse_window_length(window_length: str, df_hist: pd.DataFrame) -> int:
87
+ def calculate_normal_hd(df_hist: pd.DataFrame,
88
+ t: float,
89
+ r: float,
90
+ n_periods: int,
91
+ domains: Dict[str, np.ndarray]) -> Dict[str, np.ndarray]:
88
92
  """
89
- Convert window length string (e.g., '30d') to number of data points.
90
-
91
- Parameters:
92
- -----------
93
- window_length : str
94
- Window length in days, formatted as '7d', '30d', etc.
95
- df_hist : pd.DataFrame
96
- Historical data DataFrame with datetime index
97
-
98
- Returns:
99
- --------
100
- int
101
- Number of data points corresponding to the window length
102
- """
103
- # Validate inputs
104
- if not isinstance(window_length, str) or not window_length.endswith('d'):
105
- raise VolyError("window_length should be in format '7d', '30d', etc.")
106
-
107
- if len(df_hist) < 2:
108
- raise VolyError("Historical data must contain at least 2 points to calculate granularity")
109
-
110
- # Extract number of days
111
- days = int(window_length[:-1])
112
-
113
- # Calculate average time delta between data points
114
- avg_delta = (df_hist.index[-1] - df_hist.index[0]).total_seconds() / (len(df_hist) - 1)
115
-
116
- # Convert to days and calculate points per window
117
- days_per_point = avg_delta / (24 * 60 * 60)
118
- n_points = int(days / days_per_point)
119
-
120
- # Ensure minimum number of points
121
- return max(n_points, 10)
122
-
123
-
124
- def get_param_names(model_type: str, distribution: str) -> List[str]:
125
- """
126
- Get parameter names for a volatility model and distribution.
127
-
128
- Parameters:
129
- -----------
130
- model_type : str
131
- Type of volatility model ('garch' or 'egarch')
132
- distribution : str
133
- Distribution type ('normal', 'studentst', or 'skewstudent')
134
-
135
- Returns:
136
- --------
137
- List[str]
138
- List of parameter names
139
- """
140
- # GARCH(1,1) parameters
141
- if model_type.lower() == 'garch':
142
- if distribution.lower() == 'normal':
143
- return ['mu', 'omega', 'alpha[1]', 'beta[1]']
144
- elif distribution.lower() == 'studentst':
145
- return ['mu', 'omega', 'alpha[1]', 'beta[1]', 'nu']
146
- elif distribution.lower() == 'skewstudent':
147
- return ['mu', 'omega', 'alpha[1]', 'beta[1]', 'nu', 'lambda']
148
-
149
- # EGARCH(1,1,1) parameters
150
- elif model_type.lower() == 'egarch':
151
- if distribution.lower() == 'normal':
152
- return ['mu', 'omega', 'alpha[1]', 'gamma[1]', 'beta[1]']
153
- elif distribution.lower() == 'studentst':
154
- return ['mu', 'omega', 'alpha[1]', 'gamma[1]', 'beta[1]', 'nu']
155
- elif distribution.lower() == 'skewstudent':
156
- return ['mu', 'omega', 'alpha[1]', 'gamma[1]', 'beta[1]', 'nu', 'lambda']
157
-
158
- raise VolyError(f"Invalid model_type '{model_type}' or distribution '{distribution}'")
159
-
160
-
161
- @catch_exception
162
- def fit_volatility_model(log_returns: np.ndarray,
163
- df_hist: pd.DataFrame,
164
- model_type: str = 'garch',
165
- distribution: str = 'normal',
166
- window_length: str = '30d',
167
- n_fits: int = 400) -> Dict[str, Any]:
168
- """
169
- Fit a volatility model (GARCH or EGARCH) to historical returns.
170
-
171
- Parameters:
172
- -----------
173
- log_returns : np.ndarray
174
- Array of log returns (percent)
175
- df_hist : pd.DataFrame
176
- Historical price data
177
- model_type : str
178
- Type of volatility model ('garch' or 'egarch')
179
- distribution : str
180
- Distribution type ('normal', 'studentst', or 'skewstudent')
181
- window_length : str
182
- Length of sliding window in days (e.g., '30d')
183
- n_fits : int
184
- Number of sliding windows to fit
185
-
186
- Returns:
187
- --------
188
- Dict[str, Any]
189
- Dictionary with model parameters and fitting results
190
- """
191
- # Parse window length
192
- window_points = parse_window_length(window_length, df_hist)
193
-
194
- # Validate data
195
- if len(log_returns) < window_points + n_fits:
196
- raise VolyError(f"Not enough data points. Need at least {window_points + n_fits}, got {len(log_returns)}")
197
-
198
- # Adjust window sizes to avoid overfitting
199
- n_fits = min(n_fits, max(100, len(log_returns) // 3))
200
- window_points = min(window_points, max(20, len(log_returns) // 3))
201
-
202
- # Calculate start and end indices for sliding windows
203
- start_idx = window_points + n_fits
204
- end_idx = n_fits
205
-
206
- # Get parameter names for the model
207
- param_names = get_param_names(model_type, distribution)
208
- n_params = len(param_names)
209
-
210
- # Initialize arrays for parameters and innovations
211
- parameters = np.zeros((n_fits, n_params))
212
- z_process = []
213
-
214
- logger.info(f"Fitting {model_type.upper()} model with {distribution} distribution "
215
- f"using {n_fits} windows of {window_length}")
216
-
217
- # Fit models with sliding windows
218
- for i in range(n_fits):
219
- # Log progress
220
- if i % (n_fits // 10) == 0:
221
- logger.info(f"Fitting progress: {i}/{n_fits}")
222
-
223
- # Check if we have enough data for this window
224
- if end_idx - i - 1 < 0 or start_idx - i - 1 > len(log_returns):
225
- continue
226
-
227
- # Extract window data
228
- window = log_returns[end_idx - i - 1:start_idx - i - 1]
229
-
230
- # Skip invalid windows
231
- if len(window) < 10 or np.isnan(window).any() or np.isinf(window).any():
232
- continue
233
-
234
- # Mean-center the data for numerical stability
235
- data = window - np.mean(window)
236
-
237
- try:
238
- # Configure and fit model
239
- if model_type.lower() == 'garch':
240
- model = arch_model(data, vol='GARCH', p=1, q=1, dist=distribution.lower())
241
- else: # egarch
242
- model = arch_model(data, vol='EGARCH', p=1, o=1, q=1, dist=distribution.lower())
243
-
244
- # Fit with optimization settings
245
- fit_result = model.fit(disp='off', options={'maxiter': 1000})
246
-
247
- # Extract parameters
248
- params_dict = fit_result.params.to_dict()
249
- param_values = [params_dict.get(param, 0) for param in param_names]
250
- parameters[i, :] = param_values
251
-
252
- # Extract standardized residuals (innovations)
253
- residuals = fit_result.resid
254
- conditional_vol = fit_result.conditional_volatility
255
-
256
- if len(residuals) > 0 and len(conditional_vol) > 0:
257
- z_t = residuals[-1] / conditional_vol[-1]
258
- if not np.isnan(z_t) and not np.isinf(z_t):
259
- z_process.append(z_t)
260
-
261
- except Exception as e:
262
- logger.warning(f"Model fit failed for window {i}: {str(e)}")
263
-
264
- # Check if we have enough successful fits
265
- if len(z_process) < n_fits / 2:
266
- raise VolyError(f"Too many model fits failed ({len(z_process)}/{n_fits}). Check your data.")
267
-
268
- # Remove failed fits
269
- valid_rows = ~np.all(parameters == 0, axis=1)
270
- parameters = parameters[valid_rows]
271
-
272
- # Calculate average parameters and standard deviations
273
- avg_params = np.mean(parameters, axis=0)
274
- std_params = np.std(parameters, axis=0)
275
-
276
- return {
277
- 'model_type': model_type,
278
- 'distribution': distribution,
279
- 'parameters': parameters,
280
- 'avg_params': avg_params,
281
- 'std_params': std_params,
282
- 'z_process': np.array(z_process),
283
- 'param_names': param_names
284
- }
285
-
286
-
287
- @catch_exception
288
- def create_innovation_sampler(vol_model: Dict[str, Any]) -> Callable:
289
- """
290
- Create a function to sample innovations based on the volatility model.
291
-
292
- Parameters:
293
- -----------
294
- vol_model : Dict[str, Any]
295
- Volatility model information from fit_volatility_model()
296
-
297
- Returns:
298
- --------
299
- Callable
300
- Function that returns random innovations when called
301
- """
302
- distribution = vol_model['distribution']
303
- z_process = vol_model['z_process']
304
-
305
- if distribution.lower() == 'normal':
306
- # Use standard normal for normal distribution
307
- def sample_innovation(size=1):
308
- return np.random.normal(0, 1, size=size)
309
- else:
310
- # Use KDE for non-normal distributions to capture empirical distribution
311
- kde = stats.gaussian_kde(z_process, bw_method='silverman')
312
- z_range = np.linspace(min(z_process), max(z_process), 1000)
313
- z_prob = kde(z_range)
314
- z_prob = z_prob / np.sum(z_prob)
315
-
316
- def sample_innovation(size=1):
317
- return np.random.choice(z_range, size=size, p=z_prob)
318
-
319
- return sample_innovation
320
-
321
-
322
- @catch_exception
323
- def generate_volatility_paths(vol_model: Dict[str, Any],
324
- horizon: int,
325
- simulations: int = 5000) -> Tuple[np.ndarray, float]:
326
- """
327
- Simulate future price paths using a fitted volatility model.
328
-
329
- Parameters:
330
- -----------
331
- vol_model : Dict[str, Any]
332
- Volatility model information from fit_volatility_model()
333
- horizon : int
334
- Number of time steps to simulate
335
- simulations : int
336
- Number of paths to simulate
337
-
338
- Returns:
339
- --------
340
- Tuple[np.ndarray, float]
341
- Array of simulated returns and the drift term
342
- """
343
- # Extract model information
344
- parameters = vol_model['parameters']
345
- model_type = vol_model['model_type']
346
- distribution = vol_model['distribution']
347
- param_names = vol_model['param_names']
348
-
349
- # Get mean parameters
350
- pars = vol_model['avg_params'].copy()
351
- bounds = vol_model['std_params'].copy()
352
-
353
- # Create parameter dictionary for easier access
354
- param_dict = {name: value for name, value in zip(param_names, pars)}
355
-
356
- # Log parameters
357
- param_str = ", ".join([f"{name}={param_dict.get(name, 0):.6f}" for name in param_names])
358
- logger.info(f"{model_type.upper()} parameters: {param_str}")
359
-
360
- # Create innovation sampler
361
- sample_innovation = create_innovation_sampler(vol_model)
362
-
363
- # Initialize results array
364
- simulated_returns = np.zeros(simulations)
365
- mu = param_dict.get('mu', 0)
366
-
367
- logger.info(f"Simulating {simulations} paths for horizon {horizon}")
368
-
369
- # Simulate paths
370
- for i in range(simulations):
371
- # Log progress
372
- if (i + 1) % (simulations // 10) == 0:
373
- logger.info(f"Simulation progress: {i + 1}/{simulations}")
374
-
375
- # Vary parameters periodically for robustness
376
- if (i + 1) % (simulations // 20) == 0:
377
- # Create parameter variations based on their estimated distribution
378
- sim_params = {}
379
- for j, (name, par, bound) in enumerate(zip(param_names, pars, bounds)):
380
- var = bound ** 2 / max(len(parameters), 1)
381
- # Generate new parameter from normal distribution around the mean
382
- new_par = np.random.normal(par, np.sqrt(var))
383
-
384
- # Apply constraints to ensure valid parameters
385
- if name == 'omega':
386
- new_par = max(new_par, 1e-6) # Must be positive
387
- elif name in ['alpha[1]', 'beta[1]']:
388
- new_par = max(min(new_par, 0.999), 0.001) # Between 0 and 1
389
- elif name == 'nu':
390
- new_par = max(new_par, 2.1) # Degrees of freedom > 2
391
-
392
- sim_params[name] = new_par
393
- else:
394
- sim_params = param_dict.copy()
395
-
396
- # Initialize volatility based on model type
397
- if model_type.lower() == 'garch':
398
- # Extract GARCH parameters
399
- omega = sim_params.get('omega', 0)
400
- alpha = sim_params.get('alpha[1]', 0)
401
- beta = sim_params.get('beta[1]', 0)
402
-
403
- # Initialize with unconditional variance
404
- persistence = alpha + beta
405
- sigma2 = omega / (1 - persistence) if persistence < 1 else omega / 0.99
406
-
407
- else: # egarch
408
- # Extract EGARCH parameters
409
- omega = sim_params.get('omega', 0)
410
- beta = sim_params.get('beta[1]', 0)
411
-
412
- # Initialize log variance
413
- log_sigma2 = omega / (1 - beta) if beta < 1 else omega / 0.99
414
- sigma2 = np.exp(log_sigma2)
415
-
416
- # Initialize return sum
417
- returns_sum = 0
418
-
419
- # Simulate path
420
- for _ in range(horizon):
421
- # Sample innovation
422
- z = sample_innovation()
423
-
424
- # Update returns and volatility based on model type
425
- if model_type.lower() == 'garch':
426
- # Calculate return
427
- e = z * np.sqrt(sigma2)
428
- returns_sum += e + mu
429
-
430
- # Update GARCH volatility
431
- sigma2 = (sim_params.get('omega', 0) +
432
- sim_params.get('alpha[1]', 0) * e ** 2 +
433
- sim_params.get('beta[1]', 0) * sigma2)
434
-
435
- else: # egarch
436
- # Calculate return
437
- e = z * np.sqrt(sigma2)
438
- returns_sum += e + mu
439
-
440
- # Extract EGARCH parameters
441
- gamma = sim_params.get('gamma[1]', 0)
442
- alpha = sim_params.get('alpha[1]', 0)
443
- beta = sim_params.get('beta[1]', 0)
444
- omega = sim_params.get('omega', 0)
445
-
446
- # Update EGARCH volatility
447
- abs_z = abs(z)
448
- log_sigma2 = omega + beta * log_sigma2 + alpha * (abs_z - np.sqrt(2 / np.pi)) + gamma * z
449
- sigma2 = np.exp(log_sigma2)
450
-
451
- # Store final return
452
- simulated_returns[i] = returns_sum
453
-
454
- return simulated_returns, mu * horizon
455
-
456
-
457
- @catch_exception
458
- def prepare_domains(domain_params: Tuple[float, float, int],
459
- s: float,
460
- return_domain: str) -> Dict[str, np.ndarray]:
461
- """
462
- Prepare domain arrays for different representations.
463
-
464
- Parameters:
465
- -----------
466
- domain_params : Tuple[float, float, int]
467
- (min_log_moneyness, max_log_moneyness, num_points)
468
- s : float
469
- Spot price
470
- return_domain : str
471
- Domain for results
472
-
473
- Returns:
474
- --------
475
- Dict[str, np.ndarray]
476
- Dictionary of domain arrays
477
- """
478
- # Create log-moneyness grid
479
- LM = np.linspace(domain_params[0], domain_params[1], domain_params[2])
480
-
481
- # Calculate other domains
482
- M = np.exp(LM) # Moneyness
483
- R = M - 1 # Returns
484
- K = s / M # Strike prices
485
-
486
- # Calculate grid spacing
487
- dx = LM[1] - LM[0]
488
-
489
- return {
490
- 'log_moneyness': LM,
491
- 'moneyness': M,
492
- 'returns': R,
493
- 'strikes': K,
494
- 'dx': dx
495
- }
496
-
497
-
498
- @catch_exception
499
- def calculate_basic_density(df_hist: pd.DataFrame,
500
- t: float,
501
- r: float,
502
- n_periods: int,
503
- domains: Dict[str, np.ndarray],
504
- bandwidth: str = 'silverman') -> Dict[str, np.ndarray]:
505
- """
506
- Calculate historical density using KDE of historical returns.
93
+ Calculate historical density using a normal distribution based on historical returns.
507
94
 
508
95
  Parameters:
509
96
  -----------
@@ -517,203 +104,54 @@ def calculate_basic_density(df_hist: pd.DataFrame,
517
104
  Number of periods to scale returns
518
105
  domains : Dict[str, np.ndarray]
519
106
  Domain arrays
520
- bandwidth : str
521
- KDE bandwidth method
522
107
 
523
108
  Returns:
524
109
  --------
525
110
  Dict[str, np.ndarray]
526
111
  Dictionary of PDFs in different domains
527
112
  """
528
- # Extract domains
113
+ # Extract log-moneyness domain
529
114
  LM = domains['log_moneyness']
530
- M = domains['moneyness']
531
- R = domains['returns']
532
- K = domains['strikes']
533
115
  dx = domains['dx']
534
116
 
535
- # Filter historical data for the maturity's lookback period - use exact time to expiry
536
- lookback_days = t * 365.25 # Exact number of days to expiry
537
- start_date = pd.Timestamp.now() - pd.Timedelta(days=lookback_days)
538
- maturity_hist = df_hist[df_hist.index >= start_date].copy()
539
-
540
- # Better diagnostics for debugging
541
- if len(maturity_hist) < 2:
542
- n_available = len(df_hist)
543
- earliest = df_hist.index[0] if n_available > 0 else "N/A"
544
- latest = df_hist.index[-1] if n_available > 0 else "N/A"
545
-
546
- logger.warning(f"Insufficient data for t={t:.4f} years ({lookback_days:.2f} days lookback)")
547
- logger.warning(f"Available data: {n_available} points from {earliest} to {latest}")
548
- logger.warning(f"Required start date: {start_date}")
549
-
550
- # Try using all available data as fallback
551
- if n_available >= 2:
552
- logger.warning(f"Using all available {n_available} data points as fallback")
553
- maturity_hist = df_hist.copy()
554
- else:
555
- raise VolyError(f"Not enough historical data for maturity (t={t:.4f})")
556
-
557
- # Calculate scaled returns
558
- maturity_hist['log_returns'] = np.log(maturity_hist['close'] / maturity_hist['close'].shift(1)) * np.sqrt(n_periods)
559
- maturity_hist = maturity_hist.dropna()
560
- returns = maturity_hist['log_returns'].values
561
-
562
- if len(returns) < 2:
563
- raise VolyError(f"Not enough valid returns for maturity (t={t:.4f})")
564
-
565
- # Girsanov adjustment to shift to risk-neutral measure
566
- mu_scaled = returns.mean()
567
- sigma_scaled = returns.std()
568
- expected_risk_neutral_mean = (r - 0.5 * sigma_scaled ** 2) * np.sqrt(t)
569
- adjustment = mu_scaled - expected_risk_neutral_mean
570
- adj_returns = returns - adjustment
571
-
572
- # Create PDF with KDE
573
- kde = stats.gaussian_kde(adj_returns, bw_method=bandwidth)
574
- pdf_lm = kde(LM)
575
-
576
- # Normalize the PDF
577
- pdf_lm = pdf_lm / np.trapz(pdf_lm, LM)
578
-
579
- # Transform to other domains
580
- pdf_m = pdf_lm / M
581
- pdf_k = pdf_lm / K
582
- pdf_r = pdf_lm / (1 + R)
583
-
584
- # Calculate CDF
585
- cdf = np.cumsum(pdf_lm * dx)
586
- cdf = cdf / cdf[-1]
587
-
588
- return {
589
- 'log_moneyness': pdf_lm,
590
- 'moneyness': pdf_m,
591
- 'returns': pdf_r,
592
- 'strikes': pdf_k,
593
- 'cdf': cdf
594
- }
595
-
596
-
597
- @catch_exception
598
- def calculate_volatility_density(vol_model: Dict[str, Any],
599
- s: float,
600
- t: float,
601
- r: float,
602
- n_periods: int,
603
- tau_days: float,
604
- domains: Dict[str, np.ndarray],
605
- simulations: int = 5000,
606
- bandwidth: str = 'silverman') -> Tuple[Dict[str, np.ndarray], Dict[str, Any]]:
607
- """
608
- Calculate historical density using volatility model simulation.
117
+ # Calculate log returns
118
+ returns = np.log(df_hist['close'] / df_hist['close'].shift(1)).dropna().values
609
119
 
610
- Parameters:
611
- -----------
612
- vol_model : Dict[str, Any]
613
- Volatility model from fit_volatility_model()
614
- s : float
615
- Spot price
616
- t : float
617
- Time to maturity in years
618
- r : float
619
- Risk-free rate
620
- n_periods : int
621
- Number of periods to scale returns
622
- tau_days : float
623
- Days to maturity
624
- domains : Dict[str, np.ndarray]
625
- Domain arrays
626
- simulations : int
627
- Number of Monte Carlo simulations
628
- bandwidth : str
629
- KDE bandwidth method
120
+ # Filter historical data based on n_periods
121
+ if len(returns) < n_periods:
122
+ logger.warning(f"Not enough historical data, using all {len(returns)} points available")
123
+ dte_returns = returns
124
+ else:
125
+ dte_returns = returns[-n_periods:]
630
126
 
631
- Returns:
632
- --------
633
- Tuple[Dict[str, np.ndarray], Dict[str, Any]]
634
- Dictionary of PDFs in different domains and model parameters
635
- """
636
- # Extract domains
637
- LM = domains['log_moneyness']
638
- M = domains['moneyness']
639
- R = domains['returns']
640
- K = domains['strikes']
641
- dx = domains['dx']
127
+ # Calculate scaled parameters for normal distribution
128
+ mu_scaled = np.mean(dte_returns) * np.sqrt(n_periods)
129
+ sigma_scaled = np.std(dte_returns) * np.sqrt(n_periods)
642
130
 
643
- # Simulate paths with the volatility model
644
- horizon = max(1, int(tau_days))
645
- simulated_returns, simulated_mu = generate_volatility_paths(
646
- vol_model,
647
- horizon,
648
- simulations
649
- )
650
-
651
- # Scale the simulated returns to match target time horizon
652
- scaling_factor = np.sqrt(n_periods / tau_days)
653
- scaled_returns = simulated_returns * scaling_factor
654
-
655
- # Risk-neutral adjustment
656
- mu_scaled = scaled_returns.mean()
657
- sigma_scaled = scaled_returns.std()
658
- expected_risk_neutral_mean = (r - 0.5 * (sigma_scaled / 100) ** 2) * 100 * np.sqrt(t)
131
+ # Apply Girsanov adjustment to shift to risk-neutral measure
132
+ expected_risk_neutral_mean = (r - 0.5 * sigma_scaled ** 2) * np.sqrt(t)
659
133
  adjustment = mu_scaled - expected_risk_neutral_mean
660
- risk_neutral_returns = scaled_returns - adjustment
134
+ mu_rn = mu_scaled - adjustment
661
135
 
662
- # Convert to terminal prices
663
- simulated_prices = s * np.exp(risk_neutral_returns / 100)
664
-
665
- # Convert to moneyness domain (x-domain)
666
- simulated_moneyness = s / simulated_prices
667
-
668
- # Calculate PDF with KDE
669
- kde = stats.gaussian_kde(simulated_moneyness, bw_method=bandwidth)
670
- pdf_m = kde(M)
136
+ # Calculate PDF using normal distribution in log-moneyness domain
137
+ pdf_lm = stats.norm.pdf(LM, loc=mu_rn, scale=sigma_scaled)
671
138
 
672
139
  # Normalize the PDF
673
- pdf_m = pdf_m / np.trapz(pdf_m, M)
140
+ pdf_lm = normalize_density(pdf_lm, dx)
674
141
 
675
142
  # Transform to other domains
676
- pdf_lm = pdf_m * M
677
- pdf_k = pdf_lm / K
678
- pdf_r = pdf_lm / (1 + R)
679
-
680
- # Calculate CDF
681
- cdf = np.cumsum(pdf_lm * dx)
682
- cdf = cdf / cdf[-1]
143
+ pdfs = transform_to_domains(pdf_lm, domains)
683
144
 
684
- # Prepare model parameters for moments
685
- avg_params = vol_model['avg_params']
686
- param_names = vol_model['param_names']
687
- model_params = {name.replace('[1]', ''): value for name, value in zip(param_names, avg_params)}
688
- model_params['model_type'] = vol_model['model_type']
689
- model_params['distribution'] = vol_model['distribution']
690
-
691
- # Add persistence for GARCH models
692
- if vol_model['model_type'] == 'garch':
693
- model_params['persistence'] = model_params.get('alpha', 0) + model_params.get('beta', 0)
694
-
695
- return {
696
- 'log_moneyness': pdf_lm,
697
- 'moneyness': pdf_m,
698
- 'returns': pdf_r,
699
- 'strikes': pdf_k,
700
- 'cdf': cdf
701
- }, model_params
145
+ return pdfs
702
146
 
703
147
 
704
148
  @catch_exception
705
149
  def get_hd_surface(model_results: pd.DataFrame,
706
150
  df_hist: pd.DataFrame,
707
151
  domain_params: Tuple[float, float, int] = (-1.5, 1.5, 1000),
708
- return_domain: str = 'log_moneyness',
709
- method: str = 'garch',
710
- distribution: str = 'normal',
711
- window_length: str = '30d',
712
- n_fits: int = 400,
713
- simulations: int = 5000,
714
- bandwidth: str = 'silverman') -> Dict[str, Any]:
152
+ return_domain: str = 'log_moneyness') -> Dict[str, Any]:
715
153
  """
716
- Generate historical density surface from historical price data.
154
+ Generate historical density surface using normal distributions.
717
155
 
718
156
  Parameters:
719
157
  -----------
@@ -725,18 +163,6 @@ def get_hd_surface(model_results: pd.DataFrame,
725
163
  (min_log_moneyness, max_log_moneyness, num_points)
726
164
  return_domain : str
727
165
  Domain for results ('log_moneyness', 'moneyness', 'returns', 'strikes')
728
- method : str
729
- Method for HD estimation ('garch', 'egarch', 'basic')
730
- distribution : str
731
- Distribution for volatility models ('normal', 'studentst', 'skewstudent')
732
- window_length : str
733
- Length of sliding windows for model fitting (e.g., '30d')
734
- n_fits : int
735
- Number of sliding windows for model fitting
736
- simulations : int
737
- Number of Monte Carlo simulations
738
- bandwidth : str
739
- KDE bandwidth method
740
166
 
741
167
  Returns:
742
168
  --------
@@ -752,46 +178,14 @@ def get_hd_surface(model_results: pd.DataFrame,
752
178
  if len(df_hist) < 2:
753
179
  raise VolyError("Not enough data points in df_hist")
754
180
 
755
- # Determine granularity from data
756
- minutes_diff = (df_hist.index[1] - df_hist.index[0]).total_seconds() / 60
757
- minutes_per_period = max(1, int(minutes_diff))
758
-
759
- # Validate method and distribution
760
- valid_methods = ['garch', 'egarch', 'basic']
761
- valid_distributions = ['normal', 'studentst', 'skewstudent']
762
-
763
- method = method.lower()
764
- distribution = distribution.lower()
765
-
766
- if method not in valid_methods:
767
- raise VolyError(f"Invalid method: {method}. Must be one of {valid_methods}")
768
-
769
- if method in ['garch', 'egarch'] and distribution not in valid_distributions:
770
- raise VolyError(f"Invalid distribution: {distribution}. Must be one of {valid_distributions}")
771
-
772
181
  # Validate return domain
773
182
  valid_domains = ['log_moneyness', 'moneyness', 'returns', 'strikes']
774
183
  if return_domain not in valid_domains:
775
184
  raise VolyError(f"Invalid return_domain: {return_domain}. Must be one of {valid_domains}")
776
185
 
777
- # Calculate log returns
778
- log_returns = np.log(df_hist['close'] / df_hist['close'].shift(1)) * 100
779
- log_returns = log_returns.dropna().values
780
-
781
- # Fit volatility model if needed
782
- vol_model = None
783
- if method in ['garch', 'egarch']:
784
- model_type = method
785
- logger.info(f"Using {model_type.upper()} with {distribution} distribution")
786
-
787
- vol_model = fit_volatility_model(
788
- log_returns=log_returns,
789
- df_hist=df_hist,
790
- model_type=model_type,
791
- distribution=distribution,
792
- window_length=window_length,
793
- n_fits=n_fits
794
- )
186
+ # Determine granularity from data (minutes between data points)
187
+ time_diff = (df_hist.index[1] - df_hist.index[0]).total_seconds() / 60
188
+ minutes_per_period = max(1, int(time_diff))
795
189
 
796
190
  # Initialize result containers
797
191
  pdf_surface = {}
@@ -804,67 +198,34 @@ def get_hd_surface(model_results: pd.DataFrame,
804
198
  try:
805
199
  # Get parameters for this maturity
806
200
  s = model_results.loc[i, 's'] # Spot price
807
- r = model_results.loc[i, 'r'] # Risk-free rate
808
201
  t = model_results.loc[i, 't'] # Time to maturity in years
202
+ r = model_results.loc[i, 'r'] # Risk-free rate
809
203
 
810
- # Calculate time scaling parameters
811
- tau_days = t * 365.25 # Days to expiry
812
- n_periods = max(1, int(tau_days * 24 * 60 / minutes_per_period)) # Number of periods
813
-
814
- logger.info(f"Processing HD for maturity {i} (t={t:.4f} years, {tau_days:.2f} days)")
204
+ # Calculate relevant periods for this maturity
205
+ dte = t * 365.25 # Days to expiry
206
+ n_periods = max(1, int(dte * 24 * 60 / minutes_per_period))
815
207
 
816
208
  # Prepare domains
817
- domains = prepare_domains(domain_params, s, return_domain)
818
-
819
- # Calculate density based on method
820
- if method == 'basic':
821
- pdfs = calculate_basic_density(
822
- df_hist=df_hist,
823
- t=t,
824
- r=r,
825
- n_periods=n_periods,
826
- domains=domains,
827
- bandwidth=bandwidth
828
- )
829
- model_params = None
830
-
831
- else: # 'garch' or 'egarch'
832
- if vol_model is None:
833
- logger.warning(f"Volatility model fitting failed, skipping maturity {i}")
834
- continue
835
-
836
- pdfs, model_params = calculate_volatility_density(
837
- vol_model=vol_model,
838
- s=s,
839
- t=t,
840
- r=r,
841
- n_periods=n_periods,
842
- tau_days=tau_days,
843
- domains=domains,
844
- simulations=simulations,
845
- bandwidth=bandwidth
846
- )
847
-
848
- # Get domain arrays for output
849
- if return_domain == 'log_moneyness':
850
- x = domains['log_moneyness']
851
- pdf = pdfs['log_moneyness']
852
- elif return_domain == 'moneyness':
853
- x = domains['moneyness']
854
- pdf = pdfs['moneyness']
855
- elif return_domain == 'returns':
856
- x = domains['returns']
857
- pdf = pdfs['returns']
858
- elif return_domain == 'strikes':
859
- x = domains['strikes']
860
- pdf = pdfs['strikes']
861
-
862
- # Calculate statistical moments
863
- moments = get_all_moments(x, pdf, model_params)
209
+ domains = prepare_domains(domain_params, s)
210
+
211
+ # Calculate density
212
+ pdfs = calculate_normal_hd(
213
+ df_hist=df_hist,
214
+ t=t,
215
+ r=r,
216
+ n_periods=n_periods,
217
+ domains=domains
218
+ )
219
+
220
+ # Select results for the requested domain
221
+ pdf, cdf, x = select_domain_results(pdfs, domains, return_domain)
222
+
223
+ # Calculate moments
224
+ moments = get_all_moments(x, pdf)
864
225
 
865
226
  # Store results
866
227
  pdf_surface[i] = pdf
867
- cdf_surface[i] = pdfs['cdf']
228
+ cdf_surface[i] = cdf
868
229
  x_surface[i] = x
869
230
  all_moments[i] = moments
870
231
 
@@ -878,7 +239,7 @@ def get_hd_surface(model_results: pd.DataFrame,
878
239
  # Create DataFrame with moments
879
240
  moments = pd.DataFrame(all_moments).T
880
241
 
881
- logger.info(f"Historical density calculation complete using {method} method")
242
+ logger.info("Historical density calculation complete using normal distribution")
882
243
 
883
244
  return {
884
245
  'pdf_surface': pdf_surface,