voly 0.0.146__py3-none-any.whl → 0.0.148__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- voly/client.py +9 -4
- voly/core/hd.py +481 -283
- voly/core/rnd.py +386 -272
- {voly-0.0.146.dist-info → voly-0.0.148.dist-info}/METADATA +1 -1
- {voly-0.0.146.dist-info → voly-0.0.148.dist-info}/RECORD +8 -8
- {voly-0.0.146.dist-info → voly-0.0.148.dist-info}/WHEEL +0 -0
- {voly-0.0.146.dist-info → voly-0.0.148.dist-info}/licenses/LICENSE +0 -0
- {voly-0.0.146.dist-info → voly-0.0.148.dist-info}/top_level.txt +0 -0
voly/core/hd.py
CHANGED
|
@@ -8,7 +8,7 @@ import pandas as pd
|
|
|
8
8
|
import numpy as np
|
|
9
9
|
import datetime as dt
|
|
10
10
|
from scipy import stats
|
|
11
|
-
from typing import Dict, List, Tuple, Optional, Union, Any
|
|
11
|
+
from typing import Dict, List, Tuple, Optional, Union, Any, Callable
|
|
12
12
|
from voly.utils.logger import logger, catch_exception
|
|
13
13
|
from voly.exceptions import VolyError
|
|
14
14
|
from voly.core.rnd import get_all_moments
|
|
@@ -27,20 +27,25 @@ def get_historical_data(currency: str,
|
|
|
27
27
|
Fetch historical OHLCV data for a cryptocurrency.
|
|
28
28
|
|
|
29
29
|
Parameters:
|
|
30
|
-
|
|
30
|
+
-----------
|
|
31
31
|
currency : str
|
|
32
|
-
The cryptocurrency to fetch data for (e.g., 'BTC', 'ETH')
|
|
32
|
+
The cryptocurrency to fetch data for (e.g., 'BTC', 'ETH')
|
|
33
33
|
lookback_days : str
|
|
34
34
|
The lookback period in days, formatted as '90d', '30d', etc.
|
|
35
35
|
granularity : str
|
|
36
|
-
The time interval for data points (e.g., '15m', '1h', '1d')
|
|
36
|
+
The time interval for data points (e.g., '15m', '1h', '1d')
|
|
37
37
|
exchange_name : str
|
|
38
|
-
The exchange to fetch data from (default: 'binance')
|
|
38
|
+
The exchange to fetch data from (default: 'binance')
|
|
39
39
|
|
|
40
40
|
Returns:
|
|
41
|
-
|
|
42
|
-
pd.DataFrame
|
|
41
|
+
--------
|
|
42
|
+
pd.DataFrame
|
|
43
|
+
Historical price data with OHLCV columns and datetime index
|
|
43
44
|
"""
|
|
45
|
+
# Validate inputs
|
|
46
|
+
if not lookback_days.endswith('d'):
|
|
47
|
+
raise VolyError("lookback_days should be in format '90d', '30d', etc.")
|
|
48
|
+
|
|
44
49
|
try:
|
|
45
50
|
# Get the exchange class from ccxt
|
|
46
51
|
exchange_class = getattr(ccxt, exchange_name.lower())
|
|
@@ -49,28 +54,21 @@ def get_historical_data(currency: str,
|
|
|
49
54
|
raise VolyError(f"Exchange '{exchange_name}' not found in ccxt. Please check the exchange name.")
|
|
50
55
|
|
|
51
56
|
# Form the trading pair symbol
|
|
52
|
-
symbol = currency
|
|
57
|
+
symbol = f"{currency}/USDT"
|
|
53
58
|
|
|
54
59
|
# Convert lookback_days to timestamp
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
date_start = (dt.datetime.now() - dt.timedelta(days=days_ago)).strftime('%Y-%m-%d %H:%M:%S')
|
|
58
|
-
else:
|
|
59
|
-
raise VolyError("lookback_days should be in format '90d', '30d', etc.")
|
|
60
|
-
|
|
60
|
+
days_ago = int(lookback_days[:-1])
|
|
61
|
+
date_start = (dt.datetime.now() - dt.timedelta(days=days_ago)).strftime('%Y-%m-%d %H:%M:%S')
|
|
61
62
|
from_ts = exchange.parse8601(date_start)
|
|
63
|
+
|
|
62
64
|
ohlcv_list = []
|
|
63
65
|
ohlcv = exchange.fetch_ohlcv(symbol, granularity, since=from_ts, limit=1000)
|
|
64
66
|
ohlcv_list.append(ohlcv)
|
|
65
|
-
|
|
66
|
-
# Fetch all available data within the lookback period
|
|
67
|
-
while len(ohlcv) == 1000:
|
|
67
|
+
while True:
|
|
68
68
|
from_ts = ohlcv[-1][0]
|
|
69
69
|
new_ohlcv = exchange.fetch_ohlcv(symbol, granularity, since=from_ts, limit=1000)
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
ohlcv.extend(new_ohlcv[1:]) # Skip first element to avoid duplication
|
|
73
|
-
if len(new_ohlcv) < 1000:
|
|
70
|
+
ohlcv.extend(new_ohlcv)
|
|
71
|
+
if len(new_ohlcv) != 1000:
|
|
74
72
|
break
|
|
75
73
|
|
|
76
74
|
# Convert to DataFrame
|
|
@@ -87,36 +85,76 @@ def get_historical_data(currency: str,
|
|
|
87
85
|
@catch_exception
|
|
88
86
|
def parse_window_length(window_length: str, df_hist: pd.DataFrame) -> int:
|
|
89
87
|
"""
|
|
90
|
-
|
|
88
|
+
Convert window length string (e.g., '30d') to number of data points.
|
|
91
89
|
|
|
92
90
|
Parameters:
|
|
93
91
|
-----------
|
|
94
92
|
window_length : str
|
|
95
93
|
Window length in days, formatted as '7d', '30d', etc.
|
|
96
94
|
df_hist : pd.DataFrame
|
|
97
|
-
Historical data DataFrame with datetime index
|
|
95
|
+
Historical data DataFrame with datetime index
|
|
98
96
|
|
|
99
97
|
Returns:
|
|
100
98
|
--------
|
|
101
99
|
int
|
|
102
|
-
Number of data points corresponding to the window length
|
|
100
|
+
Number of data points corresponding to the window length
|
|
103
101
|
"""
|
|
102
|
+
# Validate inputs
|
|
104
103
|
if not isinstance(window_length, str) or not window_length.endswith('d'):
|
|
105
104
|
raise VolyError("window_length should be in format '7d', '30d', etc.")
|
|
106
105
|
|
|
106
|
+
if len(df_hist) < 2:
|
|
107
|
+
raise VolyError("Historical data must contain at least 2 points to calculate granularity")
|
|
108
|
+
|
|
107
109
|
# Extract number of days
|
|
108
110
|
days = int(window_length[:-1])
|
|
109
111
|
|
|
110
|
-
# Calculate time delta between
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
112
|
+
# Calculate average time delta between data points
|
|
113
|
+
avg_delta = (df_hist.index[-1] - df_hist.index[0]).total_seconds() / (len(df_hist) - 1)
|
|
114
|
+
|
|
115
|
+
# Convert to days and calculate points per window
|
|
116
|
+
days_per_point = avg_delta / (24 * 60 * 60)
|
|
117
|
+
n_points = int(days / days_per_point)
|
|
118
|
+
|
|
119
|
+
# Ensure minimum number of points
|
|
120
|
+
return max(n_points, 10)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def get_param_names(model_type: str, distribution: str) -> List[str]:
|
|
124
|
+
"""
|
|
125
|
+
Get parameter names for a volatility model and distribution.
|
|
126
|
+
|
|
127
|
+
Parameters:
|
|
128
|
+
-----------
|
|
129
|
+
model_type : str
|
|
130
|
+
Type of volatility model ('garch' or 'egarch')
|
|
131
|
+
distribution : str
|
|
132
|
+
Distribution type ('normal', 'studentst', or 'skewstudent')
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
--------
|
|
136
|
+
List[str]
|
|
137
|
+
List of parameter names
|
|
138
|
+
"""
|
|
139
|
+
# GARCH(1,1) parameters
|
|
140
|
+
if model_type.lower() == 'garch':
|
|
141
|
+
if distribution.lower() == 'normal':
|
|
142
|
+
return ['mu', 'omega', 'alpha[1]', 'beta[1]']
|
|
143
|
+
elif distribution.lower() == 'studentst':
|
|
144
|
+
return ['mu', 'omega', 'alpha[1]', 'beta[1]', 'nu']
|
|
145
|
+
elif distribution.lower() == 'skewstudent':
|
|
146
|
+
return ['mu', 'omega', 'alpha[1]', 'beta[1]', 'nu', 'lambda']
|
|
147
|
+
|
|
148
|
+
# EGARCH(1,1,1) parameters
|
|
149
|
+
elif model_type.lower() == 'egarch':
|
|
150
|
+
if distribution.lower() == 'normal':
|
|
151
|
+
return ['mu', 'omega', 'alpha[1]', 'gamma[1]', 'beta[1]']
|
|
152
|
+
elif distribution.lower() == 'studentst':
|
|
153
|
+
return ['mu', 'omega', 'alpha[1]', 'gamma[1]', 'beta[1]', 'nu']
|
|
154
|
+
elif distribution.lower() == 'skewstudent':
|
|
155
|
+
return ['mu', 'omega', 'alpha[1]', 'gamma[1]', 'beta[1]', 'nu', 'lambda']
|
|
156
|
+
|
|
157
|
+
raise VolyError(f"Invalid model_type '{model_type}' or distribution '{distribution}'")
|
|
120
158
|
|
|
121
159
|
|
|
122
160
|
@catch_exception
|
|
@@ -127,85 +165,90 @@ def fit_volatility_model(log_returns: np.ndarray,
|
|
|
127
165
|
window_length: str = '30d',
|
|
128
166
|
n_fits: int = 400) -> Dict[str, Any]:
|
|
129
167
|
"""
|
|
130
|
-
Fit a volatility model (GARCH or EGARCH) to
|
|
168
|
+
Fit a volatility model (GARCH or EGARCH) to historical returns.
|
|
131
169
|
|
|
132
170
|
Parameters:
|
|
133
171
|
-----------
|
|
134
172
|
log_returns : np.ndarray
|
|
135
|
-
Array of log returns
|
|
173
|
+
Array of log returns (percent)
|
|
136
174
|
df_hist : pd.DataFrame
|
|
137
|
-
|
|
175
|
+
Historical price data
|
|
138
176
|
model_type : str
|
|
139
177
|
Type of volatility model ('garch' or 'egarch')
|
|
140
178
|
distribution : str
|
|
141
179
|
Distribution type ('normal', 'studentst', or 'skewstudent')
|
|
142
180
|
window_length : str
|
|
143
|
-
Length of
|
|
181
|
+
Length of sliding window in days (e.g., '30d')
|
|
144
182
|
n_fits : int
|
|
145
|
-
Number of sliding windows
|
|
183
|
+
Number of sliding windows to fit
|
|
146
184
|
|
|
147
185
|
Returns:
|
|
148
186
|
--------
|
|
149
187
|
Dict[str, Any]
|
|
150
|
-
Dictionary with model parameters and
|
|
188
|
+
Dictionary with model parameters and fitting results
|
|
151
189
|
"""
|
|
152
190
|
# Parse window length
|
|
153
191
|
window_points = parse_window_length(window_length, df_hist)
|
|
154
192
|
|
|
193
|
+
# Validate data
|
|
155
194
|
if len(log_returns) < window_points + n_fits:
|
|
156
195
|
raise VolyError(f"Not enough data points. Need at least {window_points + n_fits}, got {len(log_returns)}")
|
|
157
196
|
|
|
158
|
-
# Adjust window sizes
|
|
197
|
+
# Adjust window sizes to avoid overfitting
|
|
159
198
|
n_fits = min(n_fits, max(100, len(log_returns) // 3))
|
|
160
199
|
window_points = min(window_points, max(20, len(log_returns) // 3))
|
|
161
200
|
|
|
162
|
-
start
|
|
163
|
-
|
|
201
|
+
# Calculate start and end indices for sliding windows
|
|
202
|
+
start_idx = window_points + n_fits
|
|
203
|
+
end_idx = n_fits
|
|
164
204
|
|
|
165
|
-
#
|
|
205
|
+
# Get parameter names for the model
|
|
166
206
|
param_names = get_param_names(model_type, distribution)
|
|
167
207
|
n_params = len(param_names)
|
|
168
208
|
|
|
209
|
+
# Initialize arrays for parameters and innovations
|
|
169
210
|
parameters = np.zeros((n_fits, n_params))
|
|
170
211
|
z_process = []
|
|
171
212
|
|
|
172
|
-
logger.info(
|
|
173
|
-
|
|
213
|
+
logger.info(f"Fitting {model_type.upper()} model with {distribution} distribution "
|
|
214
|
+
f"using {n_fits} windows of {window_length}")
|
|
174
215
|
|
|
216
|
+
# Fit models with sliding windows
|
|
175
217
|
for i in range(n_fits):
|
|
218
|
+
# Log progress
|
|
176
219
|
if i % (n_fits // 10) == 0:
|
|
177
220
|
logger.info(f"Fitting progress: {i}/{n_fits}")
|
|
178
221
|
|
|
179
|
-
#
|
|
180
|
-
if
|
|
222
|
+
# Check if we have enough data for this window
|
|
223
|
+
if end_idx - i - 1 < 0 or start_idx - i - 1 > len(log_returns):
|
|
181
224
|
continue
|
|
182
225
|
|
|
183
|
-
|
|
226
|
+
# Extract window data
|
|
227
|
+
window = log_returns[end_idx - i - 1:start_idx - i - 1]
|
|
184
228
|
|
|
185
|
-
# Skip
|
|
229
|
+
# Skip invalid windows
|
|
186
230
|
if len(window) < 10 or np.isnan(window).any() or np.isinf(window).any():
|
|
187
231
|
continue
|
|
188
232
|
|
|
189
|
-
# Mean-center the data
|
|
233
|
+
# Mean-center the data for numerical stability
|
|
190
234
|
data = window - np.mean(window)
|
|
191
235
|
|
|
192
236
|
try:
|
|
193
|
-
# Configure
|
|
237
|
+
# Configure and fit model
|
|
194
238
|
if model_type.lower() == 'garch':
|
|
195
239
|
model = arch_model(data, vol='GARCH', p=1, q=1, dist=distribution.lower())
|
|
196
240
|
else: # egarch
|
|
197
241
|
model = arch_model(data, vol='EGARCH', p=1, o=1, q=1, dist=distribution.lower())
|
|
198
242
|
|
|
243
|
+
# Fit with optimization settings
|
|
199
244
|
fit_result = model.fit(disp='off', options={'maxiter': 1000})
|
|
200
245
|
|
|
201
|
-
# Extract parameters
|
|
246
|
+
# Extract parameters
|
|
202
247
|
params_dict = fit_result.params.to_dict()
|
|
203
|
-
|
|
204
|
-
# Extract parameter values in correct order
|
|
205
248
|
param_values = [params_dict.get(param, 0) for param in param_names]
|
|
206
249
|
parameters[i, :] = param_values
|
|
207
250
|
|
|
208
|
-
#
|
|
251
|
+
# Extract standardized residuals (innovations)
|
|
209
252
|
residuals = fit_result.resid
|
|
210
253
|
conditional_vol = fit_result.conditional_volatility
|
|
211
254
|
|
|
@@ -217,11 +260,11 @@ def fit_volatility_model(log_returns: np.ndarray,
|
|
|
217
260
|
except Exception as e:
|
|
218
261
|
logger.warning(f"Model fit failed for window {i}: {str(e)}")
|
|
219
262
|
|
|
220
|
-
#
|
|
263
|
+
# Check if we have enough successful fits
|
|
221
264
|
if len(z_process) < n_fits / 2:
|
|
222
265
|
raise VolyError(f"Too many model fits failed ({len(z_process)}/{n_fits}). Check your data.")
|
|
223
266
|
|
|
224
|
-
#
|
|
267
|
+
# Remove failed fits
|
|
225
268
|
valid_rows = ~np.all(parameters == 0, axis=1)
|
|
226
269
|
parameters = parameters[valid_rows]
|
|
227
270
|
|
|
@@ -240,100 +283,95 @@ def fit_volatility_model(log_returns: np.ndarray,
|
|
|
240
283
|
}
|
|
241
284
|
|
|
242
285
|
|
|
243
|
-
|
|
286
|
+
@catch_exception
|
|
287
|
+
def create_innovation_sampler(vol_model: Dict[str, Any]) -> Callable:
|
|
244
288
|
"""
|
|
245
|
-
|
|
289
|
+
Create a function to sample innovations based on the volatility model.
|
|
246
290
|
|
|
247
291
|
Parameters:
|
|
248
292
|
-----------
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
distribution : str
|
|
252
|
-
Distribution type ('normal', 'studentst', or 'skewstudent')
|
|
293
|
+
vol_model : Dict[str, Any]
|
|
294
|
+
Volatility model information from fit_volatility_model()
|
|
253
295
|
|
|
254
296
|
Returns:
|
|
255
297
|
--------
|
|
256
|
-
|
|
257
|
-
|
|
298
|
+
Callable
|
|
299
|
+
Function that returns random innovations when called
|
|
258
300
|
"""
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
return
|
|
266
|
-
else:
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
301
|
+
distribution = vol_model['distribution']
|
|
302
|
+
z_process = vol_model['z_process']
|
|
303
|
+
|
|
304
|
+
if distribution.lower() == 'normal':
|
|
305
|
+
# Use standard normal for normal distribution
|
|
306
|
+
def sample_innovation(size=1):
|
|
307
|
+
return np.random.normal(0, 1, size=size)
|
|
308
|
+
else:
|
|
309
|
+
# Use KDE for non-normal distributions to capture empirical distribution
|
|
310
|
+
kde = stats.gaussian_kde(z_process, bw_method='silverman')
|
|
311
|
+
z_range = np.linspace(min(z_process), max(z_process), 1000)
|
|
312
|
+
z_prob = kde(z_range)
|
|
313
|
+
z_prob = z_prob / np.sum(z_prob)
|
|
314
|
+
|
|
315
|
+
def sample_innovation(size=1):
|
|
316
|
+
return np.random.choice(z_range, size=size, p=z_prob)
|
|
317
|
+
|
|
318
|
+
return sample_innovation
|
|
273
319
|
|
|
274
320
|
|
|
275
321
|
@catch_exception
|
|
276
|
-
def
|
|
322
|
+
def generate_volatility_paths(vol_model: Dict[str, Any],
|
|
277
323
|
horizon: int,
|
|
278
324
|
simulations: int = 5000) -> Tuple[np.ndarray, float]:
|
|
279
325
|
"""
|
|
280
|
-
Simulate future paths using a fitted volatility model.
|
|
326
|
+
Simulate future price paths using a fitted volatility model.
|
|
281
327
|
|
|
282
328
|
Parameters:
|
|
283
329
|
-----------
|
|
284
330
|
vol_model : Dict[str, Any]
|
|
285
|
-
|
|
331
|
+
Volatility model information from fit_volatility_model()
|
|
286
332
|
horizon : int
|
|
287
|
-
Number of steps to simulate
|
|
333
|
+
Number of time steps to simulate
|
|
288
334
|
simulations : int
|
|
289
335
|
Number of paths to simulate
|
|
290
336
|
|
|
291
337
|
Returns:
|
|
292
338
|
--------
|
|
293
339
|
Tuple[np.ndarray, float]
|
|
294
|
-
|
|
340
|
+
Array of simulated returns and the drift term
|
|
295
341
|
"""
|
|
342
|
+
# Extract model information
|
|
296
343
|
parameters = vol_model['parameters']
|
|
297
|
-
z_process = vol_model['z_process']
|
|
298
344
|
model_type = vol_model['model_type']
|
|
299
345
|
distribution = vol_model['distribution']
|
|
300
346
|
param_names = vol_model['param_names']
|
|
301
347
|
|
|
302
|
-
#
|
|
348
|
+
# Get mean parameters
|
|
303
349
|
pars = vol_model['avg_params'].copy()
|
|
304
350
|
bounds = vol_model['std_params'].copy()
|
|
305
351
|
|
|
306
|
-
# Create dictionary for easier
|
|
352
|
+
# Create parameter dictionary for easier access
|
|
307
353
|
param_dict = {name: value for name, value in zip(param_names, pars)}
|
|
308
354
|
|
|
309
|
-
# Log parameters
|
|
355
|
+
# Log parameters
|
|
310
356
|
param_str = ", ".join([f"{name}={param_dict.get(name, 0):.6f}" for name in param_names])
|
|
311
357
|
logger.info(f"{model_type.upper()} parameters: {param_str}")
|
|
312
358
|
|
|
313
|
-
# Create
|
|
314
|
-
|
|
315
|
-
# Use standard normal for normal distribution
|
|
316
|
-
def sample_innovation(size=1):
|
|
317
|
-
return np.random.normal(0, 1, size=size)
|
|
318
|
-
else:
|
|
319
|
-
# Use KDE for non-normal distributions to capture empirical distribution
|
|
320
|
-
kde = stats.gaussian_kde(z_process, bw_method='silverman') # original code didnt have bw_method
|
|
321
|
-
z_range = np.linspace(min(z_process), max(z_process), 1000)
|
|
322
|
-
z_prob = kde(z_range)
|
|
323
|
-
z_prob = z_prob / np.sum(z_prob)
|
|
324
|
-
|
|
325
|
-
def sample_innovation(size=1):
|
|
326
|
-
return np.random.choice(z_range, size=size, p=z_prob)
|
|
359
|
+
# Create innovation sampler
|
|
360
|
+
sample_innovation = create_innovation_sampler(vol_model)
|
|
327
361
|
|
|
328
|
-
#
|
|
362
|
+
# Initialize results array
|
|
329
363
|
simulated_returns = np.zeros(simulations)
|
|
330
364
|
mu = param_dict.get('mu', 0)
|
|
331
365
|
|
|
366
|
+
logger.info(f"Simulating {simulations} paths for horizon {horizon}")
|
|
367
|
+
|
|
368
|
+
# Simulate paths
|
|
332
369
|
for i in range(simulations):
|
|
370
|
+
# Log progress
|
|
333
371
|
if (i + 1) % (simulations // 10) == 0:
|
|
334
372
|
logger.info(f"Simulation progress: {i + 1}/{simulations}")
|
|
335
373
|
|
|
336
|
-
#
|
|
374
|
+
# Vary parameters periodically for robustness
|
|
337
375
|
if (i + 1) % (simulations // 20) == 0:
|
|
338
376
|
# Create parameter variations based on their estimated distribution
|
|
339
377
|
sim_params = {}
|
|
@@ -356,25 +394,30 @@ def simulate_volatility_paths(vol_model: Dict[str, Any],
|
|
|
356
394
|
|
|
357
395
|
# Initialize volatility based on model type
|
|
358
396
|
if model_type.lower() == 'garch':
|
|
397
|
+
# Extract GARCH parameters
|
|
359
398
|
omega = sim_params.get('omega', 0)
|
|
360
399
|
alpha = sim_params.get('alpha[1]', 0)
|
|
361
400
|
beta = sim_params.get('beta[1]', 0)
|
|
362
401
|
|
|
363
|
-
# Initialize
|
|
364
|
-
|
|
402
|
+
# Initialize with unconditional variance
|
|
403
|
+
persistence = alpha + beta
|
|
404
|
+
sigma2 = omega / (1 - persistence) if persistence < 1 else omega / 0.99
|
|
405
|
+
|
|
365
406
|
else: # egarch
|
|
407
|
+
# Extract EGARCH parameters
|
|
366
408
|
omega = sim_params.get('omega', 0)
|
|
367
409
|
beta = sim_params.get('beta[1]', 0)
|
|
368
410
|
|
|
369
|
-
# Initialize
|
|
411
|
+
# Initialize log variance
|
|
370
412
|
log_sigma2 = omega / (1 - beta) if beta < 1 else omega / 0.99
|
|
371
413
|
sigma2 = np.exp(log_sigma2)
|
|
372
414
|
|
|
415
|
+
# Initialize return sum
|
|
373
416
|
returns_sum = 0
|
|
374
417
|
|
|
375
|
-
# Simulate path
|
|
418
|
+
# Simulate path
|
|
376
419
|
for _ in range(horizon):
|
|
377
|
-
# Sample
|
|
420
|
+
# Sample innovation
|
|
378
421
|
z = sample_innovation()
|
|
379
422
|
|
|
380
423
|
# Update returns and volatility based on model type
|
|
@@ -384,29 +427,265 @@ def simulate_volatility_paths(vol_model: Dict[str, Any],
|
|
|
384
427
|
returns_sum += e + mu
|
|
385
428
|
|
|
386
429
|
# Update GARCH volatility
|
|
387
|
-
sigma2 = sim_params.get('omega', 0) +
|
|
388
|
-
|
|
430
|
+
sigma2 = (sim_params.get('omega', 0) +
|
|
431
|
+
sim_params.get('alpha[1]', 0) * e ** 2 +
|
|
432
|
+
sim_params.get('beta[1]', 0) * sigma2)
|
|
433
|
+
|
|
389
434
|
else: # egarch
|
|
390
435
|
# Calculate return
|
|
391
436
|
e = z * np.sqrt(sigma2)
|
|
392
437
|
returns_sum += e + mu
|
|
393
438
|
|
|
394
|
-
#
|
|
395
|
-
abs_z = abs(z)
|
|
439
|
+
# Extract EGARCH parameters
|
|
396
440
|
gamma = sim_params.get('gamma[1]', 0)
|
|
397
441
|
alpha = sim_params.get('alpha[1]', 0)
|
|
398
442
|
beta = sim_params.get('beta[1]', 0)
|
|
399
443
|
omega = sim_params.get('omega', 0)
|
|
400
444
|
|
|
401
|
-
# EGARCH
|
|
445
|
+
# Update EGARCH volatility
|
|
446
|
+
abs_z = abs(z)
|
|
402
447
|
log_sigma2 = omega + beta * log_sigma2 + alpha * (abs_z - np.sqrt(2 / np.pi)) + gamma * z
|
|
403
448
|
sigma2 = np.exp(log_sigma2)
|
|
404
449
|
|
|
450
|
+
# Store final return
|
|
405
451
|
simulated_returns[i] = returns_sum
|
|
406
452
|
|
|
407
453
|
return simulated_returns, mu * horizon
|
|
408
454
|
|
|
409
455
|
|
|
456
|
+
@catch_exception
|
|
457
|
+
def prepare_domains(domain_params: Tuple[float, float, int],
|
|
458
|
+
s: float,
|
|
459
|
+
return_domain: str) -> Dict[str, np.ndarray]:
|
|
460
|
+
"""
|
|
461
|
+
Prepare domain arrays for different representations.
|
|
462
|
+
|
|
463
|
+
Parameters:
|
|
464
|
+
-----------
|
|
465
|
+
domain_params : Tuple[float, float, int]
|
|
466
|
+
(min_log_moneyness, max_log_moneyness, num_points)
|
|
467
|
+
s : float
|
|
468
|
+
Spot price
|
|
469
|
+
return_domain : str
|
|
470
|
+
Domain for results
|
|
471
|
+
|
|
472
|
+
Returns:
|
|
473
|
+
--------
|
|
474
|
+
Dict[str, np.ndarray]
|
|
475
|
+
Dictionary of domain arrays
|
|
476
|
+
"""
|
|
477
|
+
# Create log-moneyness grid
|
|
478
|
+
LM = np.linspace(domain_params[0], domain_params[1], domain_params[2])
|
|
479
|
+
|
|
480
|
+
# Calculate other domains
|
|
481
|
+
M = np.exp(LM) # Moneyness
|
|
482
|
+
R = M - 1 # Returns
|
|
483
|
+
K = s / M # Strike prices
|
|
484
|
+
|
|
485
|
+
# Calculate grid spacing
|
|
486
|
+
dx = LM[1] - LM[0]
|
|
487
|
+
|
|
488
|
+
return {
|
|
489
|
+
'log_moneyness': LM,
|
|
490
|
+
'moneyness': M,
|
|
491
|
+
'returns': R,
|
|
492
|
+
'strikes': K,
|
|
493
|
+
'dx': dx
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
|
|
497
|
+
@catch_exception
|
|
498
|
+
def calculate_basic_density(df_hist: pd.DataFrame,
|
|
499
|
+
t: float,
|
|
500
|
+
r: float,
|
|
501
|
+
n_periods: int,
|
|
502
|
+
domains: Dict[str, np.ndarray],
|
|
503
|
+
bandwidth: str = 'silverman') -> Dict[str, np.ndarray]:
|
|
504
|
+
"""
|
|
505
|
+
Calculate historical density using KDE of historical returns.
|
|
506
|
+
|
|
507
|
+
Parameters:
|
|
508
|
+
-----------
|
|
509
|
+
df_hist : pd.DataFrame
|
|
510
|
+
Historical price data
|
|
511
|
+
t : float
|
|
512
|
+
Time to maturity in years
|
|
513
|
+
r : float
|
|
514
|
+
Risk-free rate
|
|
515
|
+
n_periods : int
|
|
516
|
+
Number of periods to scale returns
|
|
517
|
+
domains : Dict[str, np.ndarray]
|
|
518
|
+
Domain arrays
|
|
519
|
+
bandwidth : str
|
|
520
|
+
KDE bandwidth method
|
|
521
|
+
|
|
522
|
+
Returns:
|
|
523
|
+
--------
|
|
524
|
+
Dict[str, np.ndarray]
|
|
525
|
+
Dictionary of PDFs in different domains
|
|
526
|
+
"""
|
|
527
|
+
# Extract domains
|
|
528
|
+
LM = domains['log_moneyness']
|
|
529
|
+
M = domains['moneyness']
|
|
530
|
+
R = domains['returns']
|
|
531
|
+
K = domains['strikes']
|
|
532
|
+
dx = domains['dx']
|
|
533
|
+
|
|
534
|
+
# Filter historical data for the maturity's lookback period
|
|
535
|
+
start_date = pd.Timestamp.now() - pd.Timedelta(days=int(t * 365.25))
|
|
536
|
+
maturity_hist = df_hist[df_hist.index >= start_date].copy()
|
|
537
|
+
|
|
538
|
+
if len(maturity_hist) < 10:
|
|
539
|
+
raise VolyError(f"Not enough historical data for maturity (t={t:.4f})")
|
|
540
|
+
|
|
541
|
+
# Calculate scaled returns
|
|
542
|
+
maturity_hist['log_returns'] = np.log(maturity_hist['close'] / maturity_hist['close'].shift(1)) * np.sqrt(n_periods)
|
|
543
|
+
maturity_hist = maturity_hist.dropna()
|
|
544
|
+
returns = maturity_hist['log_returns'].values
|
|
545
|
+
|
|
546
|
+
if len(returns) < 2:
|
|
547
|
+
raise VolyError(f"Not enough valid returns for maturity (t={t:.4f})")
|
|
548
|
+
|
|
549
|
+
# Girsanov adjustment to shift to risk-neutral measure
|
|
550
|
+
mu_scaled = returns.mean()
|
|
551
|
+
sigma_scaled = returns.std()
|
|
552
|
+
expected_risk_neutral_mean = (r - 0.5 * sigma_scaled ** 2) * np.sqrt(t)
|
|
553
|
+
adjustment = mu_scaled - expected_risk_neutral_mean
|
|
554
|
+
adj_returns = returns - adjustment
|
|
555
|
+
|
|
556
|
+
# Create PDF with KDE
|
|
557
|
+
kde = stats.gaussian_kde(adj_returns, bw_method=bandwidth)
|
|
558
|
+
pdf_lm = kde(LM)
|
|
559
|
+
|
|
560
|
+
# Normalize the PDF
|
|
561
|
+
pdf_lm = pdf_lm / np.trapz(pdf_lm, LM)
|
|
562
|
+
|
|
563
|
+
# Transform to other domains
|
|
564
|
+
pdf_m = pdf_lm / M
|
|
565
|
+
pdf_k = pdf_lm / K
|
|
566
|
+
pdf_r = pdf_lm / (1 + R)
|
|
567
|
+
|
|
568
|
+
# Calculate CDF
|
|
569
|
+
cdf = np.cumsum(pdf_lm * dx)
|
|
570
|
+
cdf = cdf / cdf[-1]
|
|
571
|
+
|
|
572
|
+
return {
|
|
573
|
+
'log_moneyness': pdf_lm,
|
|
574
|
+
'moneyness': pdf_m,
|
|
575
|
+
'returns': pdf_r,
|
|
576
|
+
'strikes': pdf_k,
|
|
577
|
+
'cdf': cdf
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
|
|
581
|
+
@catch_exception
|
|
582
|
+
def calculate_volatility_density(vol_model: Dict[str, Any],
|
|
583
|
+
s: float,
|
|
584
|
+
t: float,
|
|
585
|
+
r: float,
|
|
586
|
+
n_periods: int,
|
|
587
|
+
tau_days: float,
|
|
588
|
+
domains: Dict[str, np.ndarray],
|
|
589
|
+
simulations: int = 5000,
|
|
590
|
+
bandwidth: str = 'silverman') -> Tuple[Dict[str, np.ndarray], Dict[str, Any]]:
|
|
591
|
+
"""
|
|
592
|
+
Calculate historical density using volatility model simulation.
|
|
593
|
+
|
|
594
|
+
Parameters:
|
|
595
|
+
-----------
|
|
596
|
+
vol_model : Dict[str, Any]
|
|
597
|
+
Volatility model from fit_volatility_model()
|
|
598
|
+
s : float
|
|
599
|
+
Spot price
|
|
600
|
+
t : float
|
|
601
|
+
Time to maturity in years
|
|
602
|
+
r : float
|
|
603
|
+
Risk-free rate
|
|
604
|
+
n_periods : int
|
|
605
|
+
Number of periods to scale returns
|
|
606
|
+
tau_days : float
|
|
607
|
+
Days to maturity
|
|
608
|
+
domains : Dict[str, np.ndarray]
|
|
609
|
+
Domain arrays
|
|
610
|
+
simulations : int
|
|
611
|
+
Number of Monte Carlo simulations
|
|
612
|
+
bandwidth : str
|
|
613
|
+
KDE bandwidth method
|
|
614
|
+
|
|
615
|
+
Returns:
|
|
616
|
+
--------
|
|
617
|
+
Tuple[Dict[str, np.ndarray], Dict[str, Any]]
|
|
618
|
+
Dictionary of PDFs in different domains and model parameters
|
|
619
|
+
"""
|
|
620
|
+
# Extract domains
|
|
621
|
+
LM = domains['log_moneyness']
|
|
622
|
+
M = domains['moneyness']
|
|
623
|
+
R = domains['returns']
|
|
624
|
+
K = domains['strikes']
|
|
625
|
+
dx = domains['dx']
|
|
626
|
+
|
|
627
|
+
# Simulate paths with the volatility model
|
|
628
|
+
horizon = max(1, int(tau_days))
|
|
629
|
+
simulated_returns, simulated_mu = generate_volatility_paths(
|
|
630
|
+
vol_model,
|
|
631
|
+
horizon,
|
|
632
|
+
simulations
|
|
633
|
+
)
|
|
634
|
+
|
|
635
|
+
# Scale the simulated returns to match target time horizon
|
|
636
|
+
scaling_factor = np.sqrt(n_periods / tau_days)
|
|
637
|
+
scaled_returns = simulated_returns * scaling_factor
|
|
638
|
+
|
|
639
|
+
# Risk-neutral adjustment
|
|
640
|
+
mu_scaled = scaled_returns.mean()
|
|
641
|
+
sigma_scaled = scaled_returns.std()
|
|
642
|
+
expected_risk_neutral_mean = (r - 0.5 * (sigma_scaled / 100) ** 2) * 100 * np.sqrt(t)
|
|
643
|
+
adjustment = mu_scaled - expected_risk_neutral_mean
|
|
644
|
+
risk_neutral_returns = scaled_returns - adjustment
|
|
645
|
+
|
|
646
|
+
# Convert to terminal prices
|
|
647
|
+
simulated_prices = s * np.exp(risk_neutral_returns / 100)
|
|
648
|
+
|
|
649
|
+
# Convert to moneyness domain (x-domain)
|
|
650
|
+
simulated_moneyness = s / simulated_prices
|
|
651
|
+
|
|
652
|
+
# Calculate PDF with KDE
|
|
653
|
+
kde = stats.gaussian_kde(simulated_moneyness, bw_method=bandwidth)
|
|
654
|
+
pdf_m = kde(M)
|
|
655
|
+
|
|
656
|
+
# Normalize the PDF
|
|
657
|
+
pdf_m = pdf_m / np.trapz(pdf_m, M)
|
|
658
|
+
|
|
659
|
+
# Transform to other domains
|
|
660
|
+
pdf_lm = pdf_m * M
|
|
661
|
+
pdf_k = pdf_lm / K
|
|
662
|
+
pdf_r = pdf_lm / (1 + R)
|
|
663
|
+
|
|
664
|
+
# Calculate CDF
|
|
665
|
+
cdf = np.cumsum(pdf_lm * dx)
|
|
666
|
+
cdf = cdf / cdf[-1]
|
|
667
|
+
|
|
668
|
+
# Prepare model parameters for moments
|
|
669
|
+
avg_params = vol_model['avg_params']
|
|
670
|
+
param_names = vol_model['param_names']
|
|
671
|
+
model_params = {name.replace('[1]', ''): value for name, value in zip(param_names, avg_params)}
|
|
672
|
+
model_params['model_type'] = vol_model['model_type']
|
|
673
|
+
model_params['distribution'] = vol_model['distribution']
|
|
674
|
+
|
|
675
|
+
# Add persistence for GARCH models
|
|
676
|
+
if vol_model['model_type'] == 'garch':
|
|
677
|
+
model_params['persistence'] = model_params.get('alpha', 0) + model_params.get('beta', 0)
|
|
678
|
+
|
|
679
|
+
return {
|
|
680
|
+
'log_moneyness': pdf_lm,
|
|
681
|
+
'moneyness': pdf_m,
|
|
682
|
+
'returns': pdf_r,
|
|
683
|
+
'strikes': pdf_k,
|
|
684
|
+
'cdf': cdf
|
|
685
|
+
}, model_params
|
|
686
|
+
|
|
687
|
+
|
|
688
|
+
@catch_exception
|
|
410
689
|
def get_hd_surface(model_results: pd.DataFrame,
|
|
411
690
|
df_hist: pd.DataFrame,
|
|
412
691
|
domain_params: Tuple[float, float, int] = (-1.5, 1.5, 1000),
|
|
@@ -427,29 +706,26 @@ def get_hd_surface(model_results: pd.DataFrame,
|
|
|
427
706
|
df_hist : pd.DataFrame
|
|
428
707
|
DataFrame with historical price data
|
|
429
708
|
domain_params : Tuple[float, float, int]
|
|
430
|
-
|
|
709
|
+
(min_log_moneyness, max_log_moneyness, num_points)
|
|
431
710
|
return_domain : str
|
|
432
|
-
Domain for
|
|
711
|
+
Domain for results ('log_moneyness', 'moneyness', 'returns', 'strikes')
|
|
433
712
|
method : str
|
|
434
|
-
Method
|
|
435
|
-
- 'garch': GARCH(1,1) model
|
|
436
|
-
- 'egarch': EGARCH(1,1,1) model with asymmetry
|
|
437
|
-
- 'basic': Simple histogram/KDE of historical returns
|
|
713
|
+
Method for HD estimation ('garch', 'egarch', 'basic')
|
|
438
714
|
distribution : str
|
|
439
|
-
Distribution
|
|
715
|
+
Distribution for volatility models ('normal', 'studentst', 'skewstudent')
|
|
440
716
|
window_length : str
|
|
441
|
-
Length of sliding windows
|
|
717
|
+
Length of sliding windows for model fitting (e.g., '30d')
|
|
442
718
|
n_fits : int
|
|
443
|
-
Number of sliding windows for
|
|
719
|
+
Number of sliding windows for model fitting
|
|
444
720
|
simulations : int
|
|
445
|
-
Number of Monte Carlo simulations
|
|
721
|
+
Number of Monte Carlo simulations
|
|
446
722
|
bandwidth : str
|
|
447
|
-
KDE bandwidth method
|
|
723
|
+
KDE bandwidth method
|
|
448
724
|
|
|
449
725
|
Returns:
|
|
450
726
|
--------
|
|
451
727
|
Dict[str, Any]
|
|
452
|
-
Dictionary
|
|
728
|
+
Dictionary with pdf_surface, cdf_surface, x_surface, and moments
|
|
453
729
|
"""
|
|
454
730
|
# Validate inputs
|
|
455
731
|
required_columns = ['s', 't', 'r']
|
|
@@ -460,11 +736,11 @@ def get_hd_surface(model_results: pd.DataFrame,
|
|
|
460
736
|
if len(df_hist) < 2:
|
|
461
737
|
raise VolyError("Not enough data points in df_hist")
|
|
462
738
|
|
|
463
|
-
# Determine granularity from
|
|
739
|
+
# Determine granularity from data
|
|
464
740
|
minutes_diff = (df_hist.index[1] - df_hist.index[0]).total_seconds() / 60
|
|
465
741
|
minutes_per_period = max(1, int(minutes_diff))
|
|
466
742
|
|
|
467
|
-
# Validate method and
|
|
743
|
+
# Validate method and distribution
|
|
468
744
|
valid_methods = ['garch', 'egarch', 'basic']
|
|
469
745
|
valid_distributions = ['normal', 'studentst', 'skewstudent']
|
|
470
746
|
|
|
@@ -477,16 +753,20 @@ def get_hd_surface(model_results: pd.DataFrame,
|
|
|
477
753
|
if method in ['garch', 'egarch'] and distribution not in valid_distributions:
|
|
478
754
|
raise VolyError(f"Invalid distribution: {distribution}. Must be one of {valid_distributions}")
|
|
479
755
|
|
|
480
|
-
#
|
|
756
|
+
# Validate return domain
|
|
757
|
+
valid_domains = ['log_moneyness', 'moneyness', 'returns', 'strikes']
|
|
758
|
+
if return_domain not in valid_domains:
|
|
759
|
+
raise VolyError(f"Invalid return_domain: {return_domain}. Must be one of {valid_domains}")
|
|
760
|
+
|
|
761
|
+
# Calculate log returns
|
|
481
762
|
log_returns = np.log(df_hist['close'] / df_hist['close'].shift(1)) * 100
|
|
482
763
|
log_returns = log_returns.dropna().values
|
|
483
764
|
|
|
484
|
-
# Fit volatility model if
|
|
765
|
+
# Fit volatility model if needed
|
|
485
766
|
vol_model = None
|
|
486
767
|
if method in ['garch', 'egarch']:
|
|
487
|
-
model_type = method
|
|
488
|
-
logger.info(
|
|
489
|
-
f"Using {model_type.upper()} with {distribution} distribution, {n_fits} fits, {simulations} simulations")
|
|
768
|
+
model_type = method
|
|
769
|
+
logger.info(f"Using {model_type.upper()} with {distribution} distribution")
|
|
490
770
|
|
|
491
771
|
vol_model = fit_volatility_model(
|
|
492
772
|
log_returns=log_returns,
|
|
@@ -496,8 +776,6 @@ def get_hd_surface(model_results: pd.DataFrame,
|
|
|
496
776
|
window_length=window_length,
|
|
497
777
|
n_fits=n_fits
|
|
498
778
|
)
|
|
499
|
-
elif method == 'basic':
|
|
500
|
-
logger.info(f"Using basic returns-based KDE method with bandwidth {bandwidth}")
|
|
501
779
|
|
|
502
780
|
# Initialize result containers
|
|
503
781
|
pdf_surface = {}
|
|
@@ -507,155 +785,75 @@ def get_hd_surface(model_results: pd.DataFrame,
|
|
|
507
785
|
|
|
508
786
|
# Process each maturity
|
|
509
787
|
for i in model_results.index:
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
#
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
#
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
#
|
|
569
|
-
if vol_model is None:
|
|
570
|
-
logger.warning(f"Volatility model fitting failed, skipping maturity {i}")
|
|
571
|
-
continue
|
|
572
|
-
|
|
573
|
-
# Simulate paths with the volatility model
|
|
574
|
-
horizon = max(1, int(tau_days_float))
|
|
575
|
-
simulated_returns, simulated_mu = simulate_volatility_paths(
|
|
576
|
-
vol_model,
|
|
577
|
-
horizon,
|
|
578
|
-
simulations
|
|
579
|
-
)
|
|
580
|
-
|
|
581
|
-
# Scale the simulated returns to match target time horizon
|
|
582
|
-
scaling_factor = np.sqrt(n_periods / tau_days_float)
|
|
583
|
-
scaled_returns = simulated_returns * scaling_factor
|
|
584
|
-
|
|
585
|
-
# Risk-neutral adjustment
|
|
586
|
-
mu_scaled = scaled_returns.mean()
|
|
587
|
-
sigma_scaled = scaled_returns.std()
|
|
588
|
-
expected_risk_neutral_mean = (r - 0.5 * (sigma_scaled / 100) ** 2) * 100 * np.sqrt(t)
|
|
589
|
-
adjustment = mu_scaled - expected_risk_neutral_mean
|
|
590
|
-
risk_neutral_returns = scaled_returns - adjustment
|
|
591
|
-
|
|
592
|
-
# Convert to terminal prices
|
|
593
|
-
simulated_prices = s * np.exp(risk_neutral_returns / 100)
|
|
594
|
-
|
|
595
|
-
# Convert to moneyness domain (x-domain)
|
|
596
|
-
simulated_moneyness = s / simulated_prices
|
|
597
|
-
|
|
598
|
-
# Perform KDE to get PDF
|
|
599
|
-
kde = stats.gaussian_kde(simulated_moneyness, bw_method=bandwidth)
|
|
600
|
-
pdf_values = kde(M)
|
|
601
|
-
|
|
602
|
-
# Transform according to return domain
|
|
603
|
-
pdf_m = pdf_values
|
|
604
|
-
pdf_lm = pdf_m * M
|
|
605
|
-
pdf_k = pdf_lm / K
|
|
606
|
-
pdf_r = pdf_lm / (1 + R)
|
|
607
|
-
|
|
608
|
-
# Include volatility model params in moments
|
|
609
|
-
avg_params = vol_model['avg_params']
|
|
610
|
-
param_names = vol_model['param_names']
|
|
611
|
-
model_params = {name.replace('[1]', ''): value for name, value in zip(param_names, avg_params)}
|
|
612
|
-
model_params['model_type'] = method
|
|
613
|
-
model_params['distribution'] = distribution
|
|
614
|
-
|
|
615
|
-
# Add persistence for GARCH models
|
|
616
|
-
if method == 'garch':
|
|
617
|
-
model_params['persistence'] = model_params.get('alpha', 0) + model_params.get('beta', 0)
|
|
618
|
-
else:
|
|
619
|
-
continue # Skip if invalid method
|
|
620
|
-
|
|
621
|
-
# Ensure density integrates to 1
|
|
622
|
-
dx = LM[1] - LM[0]
|
|
623
|
-
total_area = np.sum(pdf_values * dx)
|
|
624
|
-
if total_area <= 0:
|
|
625
|
-
logger.warning(f"Invalid density (area <= 0) for maturity {i}, skipping.")
|
|
626
|
-
continue
|
|
627
|
-
|
|
628
|
-
pdf_values = pdf_values / total_area
|
|
629
|
-
|
|
630
|
-
# Calculate CDF
|
|
631
|
-
cdf = np.cumsum(pdf_lm * dx)
|
|
632
|
-
cdf = np.minimum(cdf / cdf[-1], 1.0) # Ensure CDF is between 0 and 1
|
|
633
|
-
|
|
634
|
-
# Select appropriate domain and calculate moments
|
|
635
|
-
if return_domain == 'log_moneyness':
|
|
636
|
-
x = LM
|
|
637
|
-
pdf = pdf_lm
|
|
638
|
-
moments = get_all_moments(x, pdf, model_params)
|
|
639
|
-
elif return_domain == 'moneyness':
|
|
640
|
-
x = M
|
|
641
|
-
pdf = pdf_m
|
|
642
|
-
moments = get_all_moments(x, pdf, model_params)
|
|
643
|
-
elif return_domain == 'returns':
|
|
644
|
-
x = R
|
|
645
|
-
pdf = pdf_r
|
|
646
|
-
moments = get_all_moments(x, pdf, model_params)
|
|
647
|
-
elif return_domain == 'strikes':
|
|
648
|
-
x = K
|
|
649
|
-
pdf = pdf_k
|
|
788
|
+
try:
|
|
789
|
+
# Get parameters for this maturity
|
|
790
|
+
s = model_results.loc[i, 's'] # Spot price
|
|
791
|
+
r = model_results.loc[i, 'r'] # Risk-free rate
|
|
792
|
+
t = model_results.loc[i, 't'] # Time to maturity in years
|
|
793
|
+
|
|
794
|
+
# Calculate time scaling parameters
|
|
795
|
+
tau_days = t * 365.25 # Days to expiry
|
|
796
|
+
n_periods = max(1, int(tau_days * 24 * 60 / minutes_per_period)) # Number of periods
|
|
797
|
+
|
|
798
|
+
logger.info(f"Processing HD for maturity {i} (t={t:.4f} years, {tau_days:.2f} days)")
|
|
799
|
+
|
|
800
|
+
# Prepare domains
|
|
801
|
+
domains = prepare_domains(domain_params, s, return_domain)
|
|
802
|
+
|
|
803
|
+
# Calculate density based on method
|
|
804
|
+
if method == 'basic':
|
|
805
|
+
pdfs = calculate_basic_density(
|
|
806
|
+
df_hist=df_hist,
|
|
807
|
+
t=t,
|
|
808
|
+
r=r,
|
|
809
|
+
n_periods=n_periods,
|
|
810
|
+
domains=domains,
|
|
811
|
+
bandwidth=bandwidth
|
|
812
|
+
)
|
|
813
|
+
model_params = None
|
|
814
|
+
|
|
815
|
+
else: # 'garch' or 'egarch'
|
|
816
|
+
if vol_model is None:
|
|
817
|
+
logger.warning(f"Volatility model fitting failed, skipping maturity {i}")
|
|
818
|
+
continue
|
|
819
|
+
|
|
820
|
+
pdfs, model_params = calculate_volatility_density(
|
|
821
|
+
vol_model=vol_model,
|
|
822
|
+
s=s,
|
|
823
|
+
t=t,
|
|
824
|
+
r=r,
|
|
825
|
+
n_periods=n_periods,
|
|
826
|
+
tau_days=tau_days,
|
|
827
|
+
domains=domains,
|
|
828
|
+
simulations=simulations,
|
|
829
|
+
bandwidth=bandwidth
|
|
830
|
+
)
|
|
831
|
+
|
|
832
|
+
# Get domain arrays for output
|
|
833
|
+
if return_domain == 'log_moneyness':
|
|
834
|
+
x = domains['log_moneyness']
|
|
835
|
+
pdf = pdfs['log_moneyness']
|
|
836
|
+
elif return_domain == 'moneyness':
|
|
837
|
+
x = domains['moneyness']
|
|
838
|
+
pdf = pdfs['moneyness']
|
|
839
|
+
elif return_domain == 'returns':
|
|
840
|
+
x = domains['returns']
|
|
841
|
+
pdf = pdfs['returns']
|
|
842
|
+
elif return_domain == 'strikes':
|
|
843
|
+
x = domains['strikes']
|
|
844
|
+
pdf = pdfs['strikes']
|
|
845
|
+
|
|
846
|
+
# Calculate statistical moments
|
|
650
847
|
moments = get_all_moments(x, pdf, model_params)
|
|
651
|
-
else:
|
|
652
|
-
raise VolyError(f"Unsupported return_domain: {return_domain}")
|
|
653
848
|
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
849
|
+
# Store results
|
|
850
|
+
pdf_surface[i] = pdf
|
|
851
|
+
cdf_surface[i] = pdfs['cdf']
|
|
852
|
+
x_surface[i] = x
|
|
853
|
+
all_moments[i] = moments
|
|
854
|
+
|
|
855
|
+
except Exception as e:
|
|
856
|
+
logger.warning(f"Failed to calculate HD for maturity {i}: {str(e)}")
|
|
659
857
|
|
|
660
858
|
# Check if we have any valid results
|
|
661
859
|
if not pdf_surface:
|