voly 0.0.152__py3-none-any.whl → 0.0.153__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- voly/client.py +42 -27
- voly/core/hd.py +62 -701
- voly/core/rnd.py +34 -157
- voly/utils/density.py +155 -0
- {voly-0.0.152.dist-info → voly-0.0.153.dist-info}/METADATA +1 -1
- {voly-0.0.152.dist-info → voly-0.0.153.dist-info}/RECORD +9 -8
- {voly-0.0.152.dist-info → voly-0.0.153.dist-info}/WHEEL +0 -0
- {voly-0.0.152.dist-info → voly-0.0.153.dist-info}/licenses/LICENSE +0 -0
- {voly-0.0.152.dist-info → voly-0.0.153.dist-info}/top_level.txt +0 -0
voly/core/hd.py
CHANGED
|
@@ -1,21 +1,18 @@
|
|
|
1
1
|
"""
|
|
2
|
-
This module handles calculating historical densities from
|
|
3
|
-
|
|
2
|
+
This module handles calculating historical densities from time series of prices
|
|
3
|
+
and converting them to probability distributions.
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
6
|
import ccxt
|
|
7
|
-
import pandas as pd
|
|
8
7
|
import numpy as np
|
|
8
|
+
import pandas as pd
|
|
9
9
|
import datetime as dt
|
|
10
|
+
from typing import Dict, Tuple, Any, Optional, List
|
|
10
11
|
from scipy import stats
|
|
11
|
-
from typing import Dict, List, Tuple, Optional, Union, Any, Callable
|
|
12
12
|
from voly.utils.logger import logger, catch_exception
|
|
13
13
|
from voly.exceptions import VolyError
|
|
14
14
|
from voly.core.rnd import get_all_moments
|
|
15
|
-
from voly.
|
|
16
|
-
from voly.models import SVIModel
|
|
17
|
-
from voly.core.fit import fit_model
|
|
18
|
-
from arch import arch_model
|
|
15
|
+
from voly.utils.density import prepare_domains, normalize_density, transform_to_domains, select_domain_results
|
|
19
16
|
|
|
20
17
|
|
|
21
18
|
@catch_exception
|
|
@@ -64,10 +61,13 @@ def get_historical_data(currency: str,
|
|
|
64
61
|
ohlcv_list = []
|
|
65
62
|
ohlcv = exchange.fetch_ohlcv(symbol, granularity, since=from_ts, limit=1000)
|
|
66
63
|
ohlcv_list.append(ohlcv)
|
|
64
|
+
|
|
67
65
|
while True:
|
|
68
66
|
from_ts = ohlcv[-1][0]
|
|
69
67
|
new_ohlcv = exchange.fetch_ohlcv(symbol, granularity, since=from_ts, limit=1000)
|
|
70
|
-
|
|
68
|
+
if len(new_ohlcv) <= 1: # No new data or just one overlapping candle
|
|
69
|
+
break
|
|
70
|
+
ohlcv.extend(new_ohlcv[1:]) # Skip the first one to avoid duplicates
|
|
71
71
|
if len(new_ohlcv) != 1000:
|
|
72
72
|
break
|
|
73
73
|
|
|
@@ -84,426 +84,13 @@ def get_historical_data(currency: str,
|
|
|
84
84
|
|
|
85
85
|
|
|
86
86
|
@catch_exception
|
|
87
|
-
def
|
|
87
|
+
def calculate_normal_hd(df_hist: pd.DataFrame,
|
|
88
|
+
t: float,
|
|
89
|
+
r: float,
|
|
90
|
+
n_periods: int,
|
|
91
|
+
domains: Dict[str, np.ndarray]) -> Dict[str, np.ndarray]:
|
|
88
92
|
"""
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
Parameters:
|
|
92
|
-
-----------
|
|
93
|
-
window_length : str
|
|
94
|
-
Window length in days, formatted as '7d', '30d', etc.
|
|
95
|
-
df_hist : pd.DataFrame
|
|
96
|
-
Historical data DataFrame with datetime index
|
|
97
|
-
|
|
98
|
-
Returns:
|
|
99
|
-
--------
|
|
100
|
-
int
|
|
101
|
-
Number of data points corresponding to the window length
|
|
102
|
-
"""
|
|
103
|
-
# Validate inputs
|
|
104
|
-
if not isinstance(window_length, str) or not window_length.endswith('d'):
|
|
105
|
-
raise VolyError("window_length should be in format '7d', '30d', etc.")
|
|
106
|
-
|
|
107
|
-
if len(df_hist) < 2:
|
|
108
|
-
raise VolyError("Historical data must contain at least 2 points to calculate granularity")
|
|
109
|
-
|
|
110
|
-
# Extract number of days
|
|
111
|
-
days = int(window_length[:-1])
|
|
112
|
-
|
|
113
|
-
# Calculate average time delta between data points
|
|
114
|
-
avg_delta = (df_hist.index[-1] - df_hist.index[0]).total_seconds() / (len(df_hist) - 1)
|
|
115
|
-
|
|
116
|
-
# Convert to days and calculate points per window
|
|
117
|
-
days_per_point = avg_delta / (24 * 60 * 60)
|
|
118
|
-
n_points = int(days / days_per_point)
|
|
119
|
-
|
|
120
|
-
# Ensure minimum number of points
|
|
121
|
-
return max(n_points, 10)
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
def get_param_names(model_type: str, distribution: str) -> List[str]:
|
|
125
|
-
"""
|
|
126
|
-
Get parameter names for a volatility model and distribution.
|
|
127
|
-
|
|
128
|
-
Parameters:
|
|
129
|
-
-----------
|
|
130
|
-
model_type : str
|
|
131
|
-
Type of volatility model ('garch' or 'egarch')
|
|
132
|
-
distribution : str
|
|
133
|
-
Distribution type ('normal', 'studentst', or 'skewstudent')
|
|
134
|
-
|
|
135
|
-
Returns:
|
|
136
|
-
--------
|
|
137
|
-
List[str]
|
|
138
|
-
List of parameter names
|
|
139
|
-
"""
|
|
140
|
-
# GARCH(1,1) parameters
|
|
141
|
-
if model_type.lower() == 'garch':
|
|
142
|
-
if distribution.lower() == 'normal':
|
|
143
|
-
return ['mu', 'omega', 'alpha[1]', 'beta[1]']
|
|
144
|
-
elif distribution.lower() == 'studentst':
|
|
145
|
-
return ['mu', 'omega', 'alpha[1]', 'beta[1]', 'nu']
|
|
146
|
-
elif distribution.lower() == 'skewstudent':
|
|
147
|
-
return ['mu', 'omega', 'alpha[1]', 'beta[1]', 'nu', 'lambda']
|
|
148
|
-
|
|
149
|
-
# EGARCH(1,1,1) parameters
|
|
150
|
-
elif model_type.lower() == 'egarch':
|
|
151
|
-
if distribution.lower() == 'normal':
|
|
152
|
-
return ['mu', 'omega', 'alpha[1]', 'gamma[1]', 'beta[1]']
|
|
153
|
-
elif distribution.lower() == 'studentst':
|
|
154
|
-
return ['mu', 'omega', 'alpha[1]', 'gamma[1]', 'beta[1]', 'nu']
|
|
155
|
-
elif distribution.lower() == 'skewstudent':
|
|
156
|
-
return ['mu', 'omega', 'alpha[1]', 'gamma[1]', 'beta[1]', 'nu', 'lambda']
|
|
157
|
-
|
|
158
|
-
raise VolyError(f"Invalid model_type '{model_type}' or distribution '{distribution}'")
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
@catch_exception
|
|
162
|
-
def fit_volatility_model(log_returns: np.ndarray,
|
|
163
|
-
df_hist: pd.DataFrame,
|
|
164
|
-
model_type: str = 'garch',
|
|
165
|
-
distribution: str = 'normal',
|
|
166
|
-
window_length: str = '30d',
|
|
167
|
-
n_fits: int = 400) -> Dict[str, Any]:
|
|
168
|
-
"""
|
|
169
|
-
Fit a volatility model (GARCH or EGARCH) to historical returns.
|
|
170
|
-
|
|
171
|
-
Parameters:
|
|
172
|
-
-----------
|
|
173
|
-
log_returns : np.ndarray
|
|
174
|
-
Array of log returns (percent)
|
|
175
|
-
df_hist : pd.DataFrame
|
|
176
|
-
Historical price data
|
|
177
|
-
model_type : str
|
|
178
|
-
Type of volatility model ('garch' or 'egarch')
|
|
179
|
-
distribution : str
|
|
180
|
-
Distribution type ('normal', 'studentst', or 'skewstudent')
|
|
181
|
-
window_length : str
|
|
182
|
-
Length of sliding window in days (e.g., '30d')
|
|
183
|
-
n_fits : int
|
|
184
|
-
Number of sliding windows to fit
|
|
185
|
-
|
|
186
|
-
Returns:
|
|
187
|
-
--------
|
|
188
|
-
Dict[str, Any]
|
|
189
|
-
Dictionary with model parameters and fitting results
|
|
190
|
-
"""
|
|
191
|
-
# Parse window length
|
|
192
|
-
window_points = parse_window_length(window_length, df_hist)
|
|
193
|
-
|
|
194
|
-
# Validate data
|
|
195
|
-
if len(log_returns) < window_points + n_fits:
|
|
196
|
-
raise VolyError(f"Not enough data points. Need at least {window_points + n_fits}, got {len(log_returns)}")
|
|
197
|
-
|
|
198
|
-
# Adjust window sizes to avoid overfitting
|
|
199
|
-
n_fits = min(n_fits, max(100, len(log_returns) // 3))
|
|
200
|
-
window_points = min(window_points, max(20, len(log_returns) // 3))
|
|
201
|
-
|
|
202
|
-
# Calculate start and end indices for sliding windows
|
|
203
|
-
start_idx = window_points + n_fits
|
|
204
|
-
end_idx = n_fits
|
|
205
|
-
|
|
206
|
-
# Get parameter names for the model
|
|
207
|
-
param_names = get_param_names(model_type, distribution)
|
|
208
|
-
n_params = len(param_names)
|
|
209
|
-
|
|
210
|
-
# Initialize arrays for parameters and innovations
|
|
211
|
-
parameters = np.zeros((n_fits, n_params))
|
|
212
|
-
z_process = []
|
|
213
|
-
|
|
214
|
-
logger.info(f"Fitting {model_type.upper()} model with {distribution} distribution "
|
|
215
|
-
f"using {n_fits} windows of {window_length}")
|
|
216
|
-
|
|
217
|
-
# Fit models with sliding windows
|
|
218
|
-
for i in range(n_fits):
|
|
219
|
-
# Log progress
|
|
220
|
-
if i % (n_fits // 10) == 0:
|
|
221
|
-
logger.info(f"Fitting progress: {i}/{n_fits}")
|
|
222
|
-
|
|
223
|
-
# Check if we have enough data for this window
|
|
224
|
-
if end_idx - i - 1 < 0 or start_idx - i - 1 > len(log_returns):
|
|
225
|
-
continue
|
|
226
|
-
|
|
227
|
-
# Extract window data
|
|
228
|
-
window = log_returns[end_idx - i - 1:start_idx - i - 1]
|
|
229
|
-
|
|
230
|
-
# Skip invalid windows
|
|
231
|
-
if len(window) < 10 or np.isnan(window).any() or np.isinf(window).any():
|
|
232
|
-
continue
|
|
233
|
-
|
|
234
|
-
# Mean-center the data for numerical stability
|
|
235
|
-
data = window - np.mean(window)
|
|
236
|
-
|
|
237
|
-
try:
|
|
238
|
-
# Configure and fit model
|
|
239
|
-
if model_type.lower() == 'garch':
|
|
240
|
-
model = arch_model(data, vol='GARCH', p=1, q=1, dist=distribution.lower())
|
|
241
|
-
else: # egarch
|
|
242
|
-
model = arch_model(data, vol='EGARCH', p=1, o=1, q=1, dist=distribution.lower())
|
|
243
|
-
|
|
244
|
-
# Fit with optimization settings
|
|
245
|
-
fit_result = model.fit(disp='off', options={'maxiter': 1000})
|
|
246
|
-
|
|
247
|
-
# Extract parameters
|
|
248
|
-
params_dict = fit_result.params.to_dict()
|
|
249
|
-
param_values = [params_dict.get(param, 0) for param in param_names]
|
|
250
|
-
parameters[i, :] = param_values
|
|
251
|
-
|
|
252
|
-
# Extract standardized residuals (innovations)
|
|
253
|
-
residuals = fit_result.resid
|
|
254
|
-
conditional_vol = fit_result.conditional_volatility
|
|
255
|
-
|
|
256
|
-
if len(residuals) > 0 and len(conditional_vol) > 0:
|
|
257
|
-
z_t = residuals[-1] / conditional_vol[-1]
|
|
258
|
-
if not np.isnan(z_t) and not np.isinf(z_t):
|
|
259
|
-
z_process.append(z_t)
|
|
260
|
-
|
|
261
|
-
except Exception as e:
|
|
262
|
-
logger.warning(f"Model fit failed for window {i}: {str(e)}")
|
|
263
|
-
|
|
264
|
-
# Check if we have enough successful fits
|
|
265
|
-
if len(z_process) < n_fits / 2:
|
|
266
|
-
raise VolyError(f"Too many model fits failed ({len(z_process)}/{n_fits}). Check your data.")
|
|
267
|
-
|
|
268
|
-
# Remove failed fits
|
|
269
|
-
valid_rows = ~np.all(parameters == 0, axis=1)
|
|
270
|
-
parameters = parameters[valid_rows]
|
|
271
|
-
|
|
272
|
-
# Calculate average parameters and standard deviations
|
|
273
|
-
avg_params = np.mean(parameters, axis=0)
|
|
274
|
-
std_params = np.std(parameters, axis=0)
|
|
275
|
-
|
|
276
|
-
return {
|
|
277
|
-
'model_type': model_type,
|
|
278
|
-
'distribution': distribution,
|
|
279
|
-
'parameters': parameters,
|
|
280
|
-
'avg_params': avg_params,
|
|
281
|
-
'std_params': std_params,
|
|
282
|
-
'z_process': np.array(z_process),
|
|
283
|
-
'param_names': param_names
|
|
284
|
-
}
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
@catch_exception
|
|
288
|
-
def create_innovation_sampler(vol_model: Dict[str, Any]) -> Callable:
|
|
289
|
-
"""
|
|
290
|
-
Create a function to sample innovations based on the volatility model.
|
|
291
|
-
|
|
292
|
-
Parameters:
|
|
293
|
-
-----------
|
|
294
|
-
vol_model : Dict[str, Any]
|
|
295
|
-
Volatility model information from fit_volatility_model()
|
|
296
|
-
|
|
297
|
-
Returns:
|
|
298
|
-
--------
|
|
299
|
-
Callable
|
|
300
|
-
Function that returns random innovations when called
|
|
301
|
-
"""
|
|
302
|
-
distribution = vol_model['distribution']
|
|
303
|
-
z_process = vol_model['z_process']
|
|
304
|
-
|
|
305
|
-
if distribution.lower() == 'normal':
|
|
306
|
-
# Use standard normal for normal distribution
|
|
307
|
-
def sample_innovation(size=1):
|
|
308
|
-
return np.random.normal(0, 1, size=size)
|
|
309
|
-
else:
|
|
310
|
-
# Use KDE for non-normal distributions to capture empirical distribution
|
|
311
|
-
kde = stats.gaussian_kde(z_process, bw_method='silverman')
|
|
312
|
-
z_range = np.linspace(min(z_process), max(z_process), 1000)
|
|
313
|
-
z_prob = kde(z_range)
|
|
314
|
-
z_prob = z_prob / np.sum(z_prob)
|
|
315
|
-
|
|
316
|
-
def sample_innovation(size=1):
|
|
317
|
-
return np.random.choice(z_range, size=size, p=z_prob)
|
|
318
|
-
|
|
319
|
-
return sample_innovation
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
@catch_exception
|
|
323
|
-
def generate_volatility_paths(vol_model: Dict[str, Any],
|
|
324
|
-
horizon: int,
|
|
325
|
-
simulations: int = 5000) -> Tuple[np.ndarray, float]:
|
|
326
|
-
"""
|
|
327
|
-
Simulate future price paths using a fitted volatility model.
|
|
328
|
-
|
|
329
|
-
Parameters:
|
|
330
|
-
-----------
|
|
331
|
-
vol_model : Dict[str, Any]
|
|
332
|
-
Volatility model information from fit_volatility_model()
|
|
333
|
-
horizon : int
|
|
334
|
-
Number of time steps to simulate
|
|
335
|
-
simulations : int
|
|
336
|
-
Number of paths to simulate
|
|
337
|
-
|
|
338
|
-
Returns:
|
|
339
|
-
--------
|
|
340
|
-
Tuple[np.ndarray, float]
|
|
341
|
-
Array of simulated returns and the drift term
|
|
342
|
-
"""
|
|
343
|
-
# Extract model information
|
|
344
|
-
parameters = vol_model['parameters']
|
|
345
|
-
model_type = vol_model['model_type']
|
|
346
|
-
distribution = vol_model['distribution']
|
|
347
|
-
param_names = vol_model['param_names']
|
|
348
|
-
|
|
349
|
-
# Get mean parameters
|
|
350
|
-
pars = vol_model['avg_params'].copy()
|
|
351
|
-
bounds = vol_model['std_params'].copy()
|
|
352
|
-
|
|
353
|
-
# Create parameter dictionary for easier access
|
|
354
|
-
param_dict = {name: value for name, value in zip(param_names, pars)}
|
|
355
|
-
|
|
356
|
-
# Log parameters
|
|
357
|
-
param_str = ", ".join([f"{name}={param_dict.get(name, 0):.6f}" for name in param_names])
|
|
358
|
-
logger.info(f"{model_type.upper()} parameters: {param_str}")
|
|
359
|
-
|
|
360
|
-
# Create innovation sampler
|
|
361
|
-
sample_innovation = create_innovation_sampler(vol_model)
|
|
362
|
-
|
|
363
|
-
# Initialize results array
|
|
364
|
-
simulated_returns = np.zeros(simulations)
|
|
365
|
-
mu = param_dict.get('mu', 0)
|
|
366
|
-
|
|
367
|
-
logger.info(f"Simulating {simulations} paths for horizon {horizon}")
|
|
368
|
-
|
|
369
|
-
# Simulate paths
|
|
370
|
-
for i in range(simulations):
|
|
371
|
-
# Log progress
|
|
372
|
-
if (i + 1) % (simulations // 10) == 0:
|
|
373
|
-
logger.info(f"Simulation progress: {i + 1}/{simulations}")
|
|
374
|
-
|
|
375
|
-
# Vary parameters periodically for robustness
|
|
376
|
-
if (i + 1) % (simulations // 20) == 0:
|
|
377
|
-
# Create parameter variations based on their estimated distribution
|
|
378
|
-
sim_params = {}
|
|
379
|
-
for j, (name, par, bound) in enumerate(zip(param_names, pars, bounds)):
|
|
380
|
-
var = bound ** 2 / max(len(parameters), 1)
|
|
381
|
-
# Generate new parameter from normal distribution around the mean
|
|
382
|
-
new_par = np.random.normal(par, np.sqrt(var))
|
|
383
|
-
|
|
384
|
-
# Apply constraints to ensure valid parameters
|
|
385
|
-
if name == 'omega':
|
|
386
|
-
new_par = max(new_par, 1e-6) # Must be positive
|
|
387
|
-
elif name in ['alpha[1]', 'beta[1]']:
|
|
388
|
-
new_par = max(min(new_par, 0.999), 0.001) # Between 0 and 1
|
|
389
|
-
elif name == 'nu':
|
|
390
|
-
new_par = max(new_par, 2.1) # Degrees of freedom > 2
|
|
391
|
-
|
|
392
|
-
sim_params[name] = new_par
|
|
393
|
-
else:
|
|
394
|
-
sim_params = param_dict.copy()
|
|
395
|
-
|
|
396
|
-
# Initialize volatility based on model type
|
|
397
|
-
if model_type.lower() == 'garch':
|
|
398
|
-
# Extract GARCH parameters
|
|
399
|
-
omega = sim_params.get('omega', 0)
|
|
400
|
-
alpha = sim_params.get('alpha[1]', 0)
|
|
401
|
-
beta = sim_params.get('beta[1]', 0)
|
|
402
|
-
|
|
403
|
-
# Initialize with unconditional variance
|
|
404
|
-
persistence = alpha + beta
|
|
405
|
-
sigma2 = omega / (1 - persistence) if persistence < 1 else omega / 0.99
|
|
406
|
-
|
|
407
|
-
else: # egarch
|
|
408
|
-
# Extract EGARCH parameters
|
|
409
|
-
omega = sim_params.get('omega', 0)
|
|
410
|
-
beta = sim_params.get('beta[1]', 0)
|
|
411
|
-
|
|
412
|
-
# Initialize log variance
|
|
413
|
-
log_sigma2 = omega / (1 - beta) if beta < 1 else omega / 0.99
|
|
414
|
-
sigma2 = np.exp(log_sigma2)
|
|
415
|
-
|
|
416
|
-
# Initialize return sum
|
|
417
|
-
returns_sum = 0
|
|
418
|
-
|
|
419
|
-
# Simulate path
|
|
420
|
-
for _ in range(horizon):
|
|
421
|
-
# Sample innovation
|
|
422
|
-
z = sample_innovation()
|
|
423
|
-
|
|
424
|
-
# Update returns and volatility based on model type
|
|
425
|
-
if model_type.lower() == 'garch':
|
|
426
|
-
# Calculate return
|
|
427
|
-
e = z * np.sqrt(sigma2)
|
|
428
|
-
returns_sum += e + mu
|
|
429
|
-
|
|
430
|
-
# Update GARCH volatility
|
|
431
|
-
sigma2 = (sim_params.get('omega', 0) +
|
|
432
|
-
sim_params.get('alpha[1]', 0) * e ** 2 +
|
|
433
|
-
sim_params.get('beta[1]', 0) * sigma2)
|
|
434
|
-
|
|
435
|
-
else: # egarch
|
|
436
|
-
# Calculate return
|
|
437
|
-
e = z * np.sqrt(sigma2)
|
|
438
|
-
returns_sum += e + mu
|
|
439
|
-
|
|
440
|
-
# Extract EGARCH parameters
|
|
441
|
-
gamma = sim_params.get('gamma[1]', 0)
|
|
442
|
-
alpha = sim_params.get('alpha[1]', 0)
|
|
443
|
-
beta = sim_params.get('beta[1]', 0)
|
|
444
|
-
omega = sim_params.get('omega', 0)
|
|
445
|
-
|
|
446
|
-
# Update EGARCH volatility
|
|
447
|
-
abs_z = abs(z)
|
|
448
|
-
log_sigma2 = omega + beta * log_sigma2 + alpha * (abs_z - np.sqrt(2 / np.pi)) + gamma * z
|
|
449
|
-
sigma2 = np.exp(log_sigma2)
|
|
450
|
-
|
|
451
|
-
# Store final return
|
|
452
|
-
simulated_returns[i] = returns_sum
|
|
453
|
-
|
|
454
|
-
return simulated_returns, mu * horizon
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
@catch_exception
|
|
458
|
-
def prepare_domains(domain_params: Tuple[float, float, int],
|
|
459
|
-
s: float,
|
|
460
|
-
return_domain: str) -> Dict[str, np.ndarray]:
|
|
461
|
-
"""
|
|
462
|
-
Prepare domain arrays for different representations.
|
|
463
|
-
|
|
464
|
-
Parameters:
|
|
465
|
-
-----------
|
|
466
|
-
domain_params : Tuple[float, float, int]
|
|
467
|
-
(min_log_moneyness, max_log_moneyness, num_points)
|
|
468
|
-
s : float
|
|
469
|
-
Spot price
|
|
470
|
-
return_domain : str
|
|
471
|
-
Domain for results
|
|
472
|
-
|
|
473
|
-
Returns:
|
|
474
|
-
--------
|
|
475
|
-
Dict[str, np.ndarray]
|
|
476
|
-
Dictionary of domain arrays
|
|
477
|
-
"""
|
|
478
|
-
# Create log-moneyness grid
|
|
479
|
-
LM = np.linspace(domain_params[0], domain_params[1], domain_params[2])
|
|
480
|
-
|
|
481
|
-
# Calculate other domains
|
|
482
|
-
M = np.exp(LM) # Moneyness
|
|
483
|
-
R = M - 1 # Returns
|
|
484
|
-
K = s / M # Strike prices
|
|
485
|
-
|
|
486
|
-
# Calculate grid spacing
|
|
487
|
-
dx = LM[1] - LM[0]
|
|
488
|
-
|
|
489
|
-
return {
|
|
490
|
-
'log_moneyness': LM,
|
|
491
|
-
'moneyness': M,
|
|
492
|
-
'returns': R,
|
|
493
|
-
'strikes': K,
|
|
494
|
-
'dx': dx
|
|
495
|
-
}
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
@catch_exception
|
|
499
|
-
def calculate_basic_density(df_hist: pd.DataFrame,
|
|
500
|
-
t: float,
|
|
501
|
-
r: float,
|
|
502
|
-
n_periods: int,
|
|
503
|
-
domains: Dict[str, np.ndarray],
|
|
504
|
-
bandwidth: str = 'silverman') -> Dict[str, np.ndarray]:
|
|
505
|
-
"""
|
|
506
|
-
Calculate historical density using KDE of historical returns.
|
|
93
|
+
Calculate historical density using a normal distribution based on historical returns.
|
|
507
94
|
|
|
508
95
|
Parameters:
|
|
509
96
|
-----------
|
|
@@ -517,203 +104,54 @@ def calculate_basic_density(df_hist: pd.DataFrame,
|
|
|
517
104
|
Number of periods to scale returns
|
|
518
105
|
domains : Dict[str, np.ndarray]
|
|
519
106
|
Domain arrays
|
|
520
|
-
bandwidth : str
|
|
521
|
-
KDE bandwidth method
|
|
522
107
|
|
|
523
108
|
Returns:
|
|
524
109
|
--------
|
|
525
110
|
Dict[str, np.ndarray]
|
|
526
111
|
Dictionary of PDFs in different domains
|
|
527
112
|
"""
|
|
528
|
-
# Extract
|
|
113
|
+
# Extract log-moneyness domain
|
|
529
114
|
LM = domains['log_moneyness']
|
|
530
|
-
M = domains['moneyness']
|
|
531
|
-
R = domains['returns']
|
|
532
|
-
K = domains['strikes']
|
|
533
115
|
dx = domains['dx']
|
|
534
116
|
|
|
535
|
-
#
|
|
536
|
-
|
|
537
|
-
start_date = pd.Timestamp.now() - pd.Timedelta(days=lookback_days)
|
|
538
|
-
maturity_hist = df_hist[df_hist.index >= start_date].copy()
|
|
539
|
-
|
|
540
|
-
# Better diagnostics for debugging
|
|
541
|
-
if len(maturity_hist) < 2:
|
|
542
|
-
n_available = len(df_hist)
|
|
543
|
-
earliest = df_hist.index[0] if n_available > 0 else "N/A"
|
|
544
|
-
latest = df_hist.index[-1] if n_available > 0 else "N/A"
|
|
545
|
-
|
|
546
|
-
logger.warning(f"Insufficient data for t={t:.4f} years ({lookback_days:.2f} days lookback)")
|
|
547
|
-
logger.warning(f"Available data: {n_available} points from {earliest} to {latest}")
|
|
548
|
-
logger.warning(f"Required start date: {start_date}")
|
|
549
|
-
|
|
550
|
-
# Try using all available data as fallback
|
|
551
|
-
if n_available >= 2:
|
|
552
|
-
logger.warning(f"Using all available {n_available} data points as fallback")
|
|
553
|
-
maturity_hist = df_hist.copy()
|
|
554
|
-
else:
|
|
555
|
-
raise VolyError(f"Not enough historical data for maturity (t={t:.4f})")
|
|
556
|
-
|
|
557
|
-
# Calculate scaled returns
|
|
558
|
-
maturity_hist['log_returns'] = np.log(maturity_hist['close'] / maturity_hist['close'].shift(1)) * np.sqrt(n_periods)
|
|
559
|
-
maturity_hist = maturity_hist.dropna()
|
|
560
|
-
returns = maturity_hist['log_returns'].values
|
|
561
|
-
|
|
562
|
-
if len(returns) < 2:
|
|
563
|
-
raise VolyError(f"Not enough valid returns for maturity (t={t:.4f})")
|
|
564
|
-
|
|
565
|
-
# Girsanov adjustment to shift to risk-neutral measure
|
|
566
|
-
mu_scaled = returns.mean()
|
|
567
|
-
sigma_scaled = returns.std()
|
|
568
|
-
expected_risk_neutral_mean = (r - 0.5 * sigma_scaled ** 2) * np.sqrt(t)
|
|
569
|
-
adjustment = mu_scaled - expected_risk_neutral_mean
|
|
570
|
-
adj_returns = returns - adjustment
|
|
571
|
-
|
|
572
|
-
# Create PDF with KDE
|
|
573
|
-
kde = stats.gaussian_kde(adj_returns, bw_method=bandwidth)
|
|
574
|
-
pdf_lm = kde(LM)
|
|
575
|
-
|
|
576
|
-
# Normalize the PDF
|
|
577
|
-
pdf_lm = pdf_lm / np.trapz(pdf_lm, LM)
|
|
578
|
-
|
|
579
|
-
# Transform to other domains
|
|
580
|
-
pdf_m = pdf_lm / M
|
|
581
|
-
pdf_k = pdf_lm / K
|
|
582
|
-
pdf_r = pdf_lm / (1 + R)
|
|
583
|
-
|
|
584
|
-
# Calculate CDF
|
|
585
|
-
cdf = np.cumsum(pdf_lm * dx)
|
|
586
|
-
cdf = cdf / cdf[-1]
|
|
587
|
-
|
|
588
|
-
return {
|
|
589
|
-
'log_moneyness': pdf_lm,
|
|
590
|
-
'moneyness': pdf_m,
|
|
591
|
-
'returns': pdf_r,
|
|
592
|
-
'strikes': pdf_k,
|
|
593
|
-
'cdf': cdf
|
|
594
|
-
}
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
@catch_exception
|
|
598
|
-
def calculate_volatility_density(vol_model: Dict[str, Any],
|
|
599
|
-
s: float,
|
|
600
|
-
t: float,
|
|
601
|
-
r: float,
|
|
602
|
-
n_periods: int,
|
|
603
|
-
tau_days: float,
|
|
604
|
-
domains: Dict[str, np.ndarray],
|
|
605
|
-
simulations: int = 5000,
|
|
606
|
-
bandwidth: str = 'silverman') -> Tuple[Dict[str, np.ndarray], Dict[str, Any]]:
|
|
607
|
-
"""
|
|
608
|
-
Calculate historical density using volatility model simulation.
|
|
117
|
+
# Calculate log returns
|
|
118
|
+
returns = np.log(df_hist['close'] / df_hist['close'].shift(1)).dropna().values
|
|
609
119
|
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
t : float
|
|
617
|
-
Time to maturity in years
|
|
618
|
-
r : float
|
|
619
|
-
Risk-free rate
|
|
620
|
-
n_periods : int
|
|
621
|
-
Number of periods to scale returns
|
|
622
|
-
tau_days : float
|
|
623
|
-
Days to maturity
|
|
624
|
-
domains : Dict[str, np.ndarray]
|
|
625
|
-
Domain arrays
|
|
626
|
-
simulations : int
|
|
627
|
-
Number of Monte Carlo simulations
|
|
628
|
-
bandwidth : str
|
|
629
|
-
KDE bandwidth method
|
|
120
|
+
# Filter historical data based on n_periods
|
|
121
|
+
if len(returns) < n_periods:
|
|
122
|
+
logger.warning(f"Not enough historical data, using all {len(returns)} points available")
|
|
123
|
+
dte_returns = returns
|
|
124
|
+
else:
|
|
125
|
+
dte_returns = returns[-n_periods:]
|
|
630
126
|
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
Dictionary of PDFs in different domains and model parameters
|
|
635
|
-
"""
|
|
636
|
-
# Extract domains
|
|
637
|
-
LM = domains['log_moneyness']
|
|
638
|
-
M = domains['moneyness']
|
|
639
|
-
R = domains['returns']
|
|
640
|
-
K = domains['strikes']
|
|
641
|
-
dx = domains['dx']
|
|
127
|
+
# Calculate scaled parameters for normal distribution
|
|
128
|
+
mu_scaled = np.mean(dte_returns) * np.sqrt(n_periods)
|
|
129
|
+
sigma_scaled = np.std(dte_returns) * np.sqrt(n_periods)
|
|
642
130
|
|
|
643
|
-
#
|
|
644
|
-
|
|
645
|
-
simulated_returns, simulated_mu = generate_volatility_paths(
|
|
646
|
-
vol_model,
|
|
647
|
-
horizon,
|
|
648
|
-
simulations
|
|
649
|
-
)
|
|
650
|
-
|
|
651
|
-
# Scale the simulated returns to match target time horizon
|
|
652
|
-
scaling_factor = np.sqrt(n_periods / tau_days)
|
|
653
|
-
scaled_returns = simulated_returns * scaling_factor
|
|
654
|
-
|
|
655
|
-
# Risk-neutral adjustment
|
|
656
|
-
mu_scaled = scaled_returns.mean()
|
|
657
|
-
sigma_scaled = scaled_returns.std()
|
|
658
|
-
expected_risk_neutral_mean = (r - 0.5 * (sigma_scaled / 100) ** 2) * 100 * np.sqrt(t)
|
|
131
|
+
# Apply Girsanov adjustment to shift to risk-neutral measure
|
|
132
|
+
expected_risk_neutral_mean = (r - 0.5 * sigma_scaled ** 2) * np.sqrt(t)
|
|
659
133
|
adjustment = mu_scaled - expected_risk_neutral_mean
|
|
660
|
-
|
|
134
|
+
mu_rn = mu_scaled - adjustment
|
|
661
135
|
|
|
662
|
-
#
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
# Convert to moneyness domain (x-domain)
|
|
666
|
-
simulated_moneyness = s / simulated_prices
|
|
667
|
-
|
|
668
|
-
# Calculate PDF with KDE
|
|
669
|
-
kde = stats.gaussian_kde(simulated_moneyness, bw_method=bandwidth)
|
|
670
|
-
pdf_m = kde(M)
|
|
136
|
+
# Calculate PDF using normal distribution in log-moneyness domain
|
|
137
|
+
pdf_lm = stats.norm.pdf(LM, loc=mu_rn, scale=sigma_scaled)
|
|
671
138
|
|
|
672
139
|
# Normalize the PDF
|
|
673
|
-
|
|
140
|
+
pdf_lm = normalize_density(pdf_lm, dx)
|
|
674
141
|
|
|
675
142
|
# Transform to other domains
|
|
676
|
-
|
|
677
|
-
pdf_k = pdf_lm / K
|
|
678
|
-
pdf_r = pdf_lm / (1 + R)
|
|
679
|
-
|
|
680
|
-
# Calculate CDF
|
|
681
|
-
cdf = np.cumsum(pdf_lm * dx)
|
|
682
|
-
cdf = cdf / cdf[-1]
|
|
143
|
+
pdfs = transform_to_domains(pdf_lm, domains)
|
|
683
144
|
|
|
684
|
-
|
|
685
|
-
avg_params = vol_model['avg_params']
|
|
686
|
-
param_names = vol_model['param_names']
|
|
687
|
-
model_params = {name.replace('[1]', ''): value for name, value in zip(param_names, avg_params)}
|
|
688
|
-
model_params['model_type'] = vol_model['model_type']
|
|
689
|
-
model_params['distribution'] = vol_model['distribution']
|
|
690
|
-
|
|
691
|
-
# Add persistence for GARCH models
|
|
692
|
-
if vol_model['model_type'] == 'garch':
|
|
693
|
-
model_params['persistence'] = model_params.get('alpha', 0) + model_params.get('beta', 0)
|
|
694
|
-
|
|
695
|
-
return {
|
|
696
|
-
'log_moneyness': pdf_lm,
|
|
697
|
-
'moneyness': pdf_m,
|
|
698
|
-
'returns': pdf_r,
|
|
699
|
-
'strikes': pdf_k,
|
|
700
|
-
'cdf': cdf
|
|
701
|
-
}, model_params
|
|
145
|
+
return pdfs
|
|
702
146
|
|
|
703
147
|
|
|
704
148
|
@catch_exception
|
|
705
149
|
def get_hd_surface(model_results: pd.DataFrame,
|
|
706
150
|
df_hist: pd.DataFrame,
|
|
707
151
|
domain_params: Tuple[float, float, int] = (-1.5, 1.5, 1000),
|
|
708
|
-
return_domain: str = 'log_moneyness',
|
|
709
|
-
method: str = 'garch',
|
|
710
|
-
distribution: str = 'normal',
|
|
711
|
-
window_length: str = '30d',
|
|
712
|
-
n_fits: int = 400,
|
|
713
|
-
simulations: int = 5000,
|
|
714
|
-
bandwidth: str = 'silverman') -> Dict[str, Any]:
|
|
152
|
+
return_domain: str = 'log_moneyness') -> Dict[str, Any]:
|
|
715
153
|
"""
|
|
716
|
-
Generate historical density surface
|
|
154
|
+
Generate historical density surface using normal distributions.
|
|
717
155
|
|
|
718
156
|
Parameters:
|
|
719
157
|
-----------
|
|
@@ -725,18 +163,6 @@ def get_hd_surface(model_results: pd.DataFrame,
|
|
|
725
163
|
(min_log_moneyness, max_log_moneyness, num_points)
|
|
726
164
|
return_domain : str
|
|
727
165
|
Domain for results ('log_moneyness', 'moneyness', 'returns', 'strikes')
|
|
728
|
-
method : str
|
|
729
|
-
Method for HD estimation ('garch', 'egarch', 'basic')
|
|
730
|
-
distribution : str
|
|
731
|
-
Distribution for volatility models ('normal', 'studentst', 'skewstudent')
|
|
732
|
-
window_length : str
|
|
733
|
-
Length of sliding windows for model fitting (e.g., '30d')
|
|
734
|
-
n_fits : int
|
|
735
|
-
Number of sliding windows for model fitting
|
|
736
|
-
simulations : int
|
|
737
|
-
Number of Monte Carlo simulations
|
|
738
|
-
bandwidth : str
|
|
739
|
-
KDE bandwidth method
|
|
740
166
|
|
|
741
167
|
Returns:
|
|
742
168
|
--------
|
|
@@ -752,46 +178,14 @@ def get_hd_surface(model_results: pd.DataFrame,
|
|
|
752
178
|
if len(df_hist) < 2:
|
|
753
179
|
raise VolyError("Not enough data points in df_hist")
|
|
754
180
|
|
|
755
|
-
# Determine granularity from data
|
|
756
|
-
minutes_diff = (df_hist.index[1] - df_hist.index[0]).total_seconds() / 60
|
|
757
|
-
minutes_per_period = max(1, int(minutes_diff))
|
|
758
|
-
|
|
759
|
-
# Validate method and distribution
|
|
760
|
-
valid_methods = ['garch', 'egarch', 'basic']
|
|
761
|
-
valid_distributions = ['normal', 'studentst', 'skewstudent']
|
|
762
|
-
|
|
763
|
-
method = method.lower()
|
|
764
|
-
distribution = distribution.lower()
|
|
765
|
-
|
|
766
|
-
if method not in valid_methods:
|
|
767
|
-
raise VolyError(f"Invalid method: {method}. Must be one of {valid_methods}")
|
|
768
|
-
|
|
769
|
-
if method in ['garch', 'egarch'] and distribution not in valid_distributions:
|
|
770
|
-
raise VolyError(f"Invalid distribution: {distribution}. Must be one of {valid_distributions}")
|
|
771
|
-
|
|
772
181
|
# Validate return domain
|
|
773
182
|
valid_domains = ['log_moneyness', 'moneyness', 'returns', 'strikes']
|
|
774
183
|
if return_domain not in valid_domains:
|
|
775
184
|
raise VolyError(f"Invalid return_domain: {return_domain}. Must be one of {valid_domains}")
|
|
776
185
|
|
|
777
|
-
#
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
# Fit volatility model if needed
|
|
782
|
-
vol_model = None
|
|
783
|
-
if method in ['garch', 'egarch']:
|
|
784
|
-
model_type = method
|
|
785
|
-
logger.info(f"Using {model_type.upper()} with {distribution} distribution")
|
|
786
|
-
|
|
787
|
-
vol_model = fit_volatility_model(
|
|
788
|
-
log_returns=log_returns,
|
|
789
|
-
df_hist=df_hist,
|
|
790
|
-
model_type=model_type,
|
|
791
|
-
distribution=distribution,
|
|
792
|
-
window_length=window_length,
|
|
793
|
-
n_fits=n_fits
|
|
794
|
-
)
|
|
186
|
+
# Determine granularity from data (minutes between data points)
|
|
187
|
+
time_diff = (df_hist.index[1] - df_hist.index[0]).total_seconds() / 60
|
|
188
|
+
minutes_per_period = max(1, int(time_diff))
|
|
795
189
|
|
|
796
190
|
# Initialize result containers
|
|
797
191
|
pdf_surface = {}
|
|
@@ -804,67 +198,34 @@ def get_hd_surface(model_results: pd.DataFrame,
|
|
|
804
198
|
try:
|
|
805
199
|
# Get parameters for this maturity
|
|
806
200
|
s = model_results.loc[i, 's'] # Spot price
|
|
807
|
-
r = model_results.loc[i, 'r'] # Risk-free rate
|
|
808
201
|
t = model_results.loc[i, 't'] # Time to maturity in years
|
|
202
|
+
r = model_results.loc[i, 'r'] # Risk-free rate
|
|
809
203
|
|
|
810
|
-
# Calculate
|
|
811
|
-
|
|
812
|
-
n_periods = max(1, int(
|
|
813
|
-
|
|
814
|
-
logger.info(f"Processing HD for maturity {i} (t={t:.4f} years, {tau_days:.2f} days)")
|
|
204
|
+
# Calculate relevant periods for this maturity
|
|
205
|
+
dte = t * 365.25 # Days to expiry
|
|
206
|
+
n_periods = max(1, int(dte * 24 * 60 / minutes_per_period))
|
|
815
207
|
|
|
816
208
|
# Prepare domains
|
|
817
|
-
domains = prepare_domains(domain_params, s
|
|
818
|
-
|
|
819
|
-
# Calculate density
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
logger.warning(f"Volatility model fitting failed, skipping maturity {i}")
|
|
834
|
-
continue
|
|
835
|
-
|
|
836
|
-
pdfs, model_params = calculate_volatility_density(
|
|
837
|
-
vol_model=vol_model,
|
|
838
|
-
s=s,
|
|
839
|
-
t=t,
|
|
840
|
-
r=r,
|
|
841
|
-
n_periods=n_periods,
|
|
842
|
-
tau_days=tau_days,
|
|
843
|
-
domains=domains,
|
|
844
|
-
simulations=simulations,
|
|
845
|
-
bandwidth=bandwidth
|
|
846
|
-
)
|
|
847
|
-
|
|
848
|
-
# Get domain arrays for output
|
|
849
|
-
if return_domain == 'log_moneyness':
|
|
850
|
-
x = domains['log_moneyness']
|
|
851
|
-
pdf = pdfs['log_moneyness']
|
|
852
|
-
elif return_domain == 'moneyness':
|
|
853
|
-
x = domains['moneyness']
|
|
854
|
-
pdf = pdfs['moneyness']
|
|
855
|
-
elif return_domain == 'returns':
|
|
856
|
-
x = domains['returns']
|
|
857
|
-
pdf = pdfs['returns']
|
|
858
|
-
elif return_domain == 'strikes':
|
|
859
|
-
x = domains['strikes']
|
|
860
|
-
pdf = pdfs['strikes']
|
|
861
|
-
|
|
862
|
-
# Calculate statistical moments
|
|
863
|
-
moments = get_all_moments(x, pdf, model_params)
|
|
209
|
+
domains = prepare_domains(domain_params, s)
|
|
210
|
+
|
|
211
|
+
# Calculate density
|
|
212
|
+
pdfs = calculate_normal_hd(
|
|
213
|
+
df_hist=df_hist,
|
|
214
|
+
t=t,
|
|
215
|
+
r=r,
|
|
216
|
+
n_periods=n_periods,
|
|
217
|
+
domains=domains
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
# Select results for the requested domain
|
|
221
|
+
pdf, cdf, x = select_domain_results(pdfs, domains, return_domain)
|
|
222
|
+
|
|
223
|
+
# Calculate moments
|
|
224
|
+
moments = get_all_moments(x, pdf)
|
|
864
225
|
|
|
865
226
|
# Store results
|
|
866
227
|
pdf_surface[i] = pdf
|
|
867
|
-
cdf_surface[i] =
|
|
228
|
+
cdf_surface[i] = cdf
|
|
868
229
|
x_surface[i] = x
|
|
869
230
|
all_moments[i] = moments
|
|
870
231
|
|
|
@@ -878,7 +239,7 @@ def get_hd_surface(model_results: pd.DataFrame,
|
|
|
878
239
|
# Create DataFrame with moments
|
|
879
240
|
moments = pd.DataFrame(all_moments).T
|
|
880
241
|
|
|
881
|
-
logger.info(
|
|
242
|
+
logger.info("Historical density calculation complete using normal distribution")
|
|
882
243
|
|
|
883
244
|
return {
|
|
884
245
|
'pdf_surface': pdf_surface,
|