voly 0.0.151__py3-none-any.whl → 0.0.153__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- voly/client.py +42 -27
- voly/core/hd.py +63 -701
- voly/core/rnd.py +34 -157
- voly/utils/density.py +155 -0
- {voly-0.0.151.dist-info → voly-0.0.153.dist-info}/METADATA +1 -1
- {voly-0.0.151.dist-info → voly-0.0.153.dist-info}/RECORD +9 -8
- {voly-0.0.151.dist-info → voly-0.0.153.dist-info}/WHEEL +1 -1
- {voly-0.0.151.dist-info → voly-0.0.153.dist-info}/licenses/LICENSE +0 -0
- {voly-0.0.151.dist-info → voly-0.0.153.dist-info}/top_level.txt +0 -0
voly/core/hd.py
CHANGED
|
@@ -1,21 +1,18 @@
|
|
|
1
1
|
"""
|
|
2
|
-
This module handles calculating historical densities from
|
|
3
|
-
|
|
2
|
+
This module handles calculating historical densities from time series of prices
|
|
3
|
+
and converting them to probability distributions.
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
6
|
import ccxt
|
|
7
|
-
import pandas as pd
|
|
8
7
|
import numpy as np
|
|
8
|
+
import pandas as pd
|
|
9
9
|
import datetime as dt
|
|
10
|
+
from typing import Dict, Tuple, Any, Optional, List
|
|
10
11
|
from scipy import stats
|
|
11
|
-
from typing import Dict, List, Tuple, Optional, Union, Any, Callable
|
|
12
12
|
from voly.utils.logger import logger, catch_exception
|
|
13
13
|
from voly.exceptions import VolyError
|
|
14
14
|
from voly.core.rnd import get_all_moments
|
|
15
|
-
from voly.
|
|
16
|
-
from voly.models import SVIModel
|
|
17
|
-
from voly.core.fit import fit_model
|
|
18
|
-
from arch import arch_model
|
|
15
|
+
from voly.utils.density import prepare_domains, normalize_density, transform_to_domains, select_domain_results
|
|
19
16
|
|
|
20
17
|
|
|
21
18
|
@catch_exception
|
|
@@ -64,10 +61,13 @@ def get_historical_data(currency: str,
|
|
|
64
61
|
ohlcv_list = []
|
|
65
62
|
ohlcv = exchange.fetch_ohlcv(symbol, granularity, since=from_ts, limit=1000)
|
|
66
63
|
ohlcv_list.append(ohlcv)
|
|
64
|
+
|
|
67
65
|
while True:
|
|
68
66
|
from_ts = ohlcv[-1][0]
|
|
69
67
|
new_ohlcv = exchange.fetch_ohlcv(symbol, granularity, since=from_ts, limit=1000)
|
|
70
|
-
|
|
68
|
+
if len(new_ohlcv) <= 1: # No new data or just one overlapping candle
|
|
69
|
+
break
|
|
70
|
+
ohlcv.extend(new_ohlcv[1:]) # Skip the first one to avoid duplicates
|
|
71
71
|
if len(new_ohlcv) != 1000:
|
|
72
72
|
break
|
|
73
73
|
|
|
@@ -76,6 +76,7 @@ def get_historical_data(currency: str,
|
|
|
76
76
|
df_hist['date'] = pd.to_datetime(df_hist['date'], unit='ms')
|
|
77
77
|
df_hist.set_index('date', inplace=True)
|
|
78
78
|
df_hist = df_hist.sort_index(ascending=True)
|
|
79
|
+
df_hist = df_hist[~df_hist.index.duplicated(keep='last')].sort_index()
|
|
79
80
|
|
|
80
81
|
logger.info(f"Data fetched successfully: {len(df_hist)} rows from {df_hist.index[0]} to {df_hist.index[-1]}")
|
|
81
82
|
|
|
@@ -83,426 +84,13 @@ def get_historical_data(currency: str,
|
|
|
83
84
|
|
|
84
85
|
|
|
85
86
|
@catch_exception
|
|
86
|
-
def
|
|
87
|
+
def calculate_normal_hd(df_hist: pd.DataFrame,
|
|
88
|
+
t: float,
|
|
89
|
+
r: float,
|
|
90
|
+
n_periods: int,
|
|
91
|
+
domains: Dict[str, np.ndarray]) -> Dict[str, np.ndarray]:
|
|
87
92
|
"""
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
Parameters:
|
|
91
|
-
-----------
|
|
92
|
-
window_length : str
|
|
93
|
-
Window length in days, formatted as '7d', '30d', etc.
|
|
94
|
-
df_hist : pd.DataFrame
|
|
95
|
-
Historical data DataFrame with datetime index
|
|
96
|
-
|
|
97
|
-
Returns:
|
|
98
|
-
--------
|
|
99
|
-
int
|
|
100
|
-
Number of data points corresponding to the window length
|
|
101
|
-
"""
|
|
102
|
-
# Validate inputs
|
|
103
|
-
if not isinstance(window_length, str) or not window_length.endswith('d'):
|
|
104
|
-
raise VolyError("window_length should be in format '7d', '30d', etc.")
|
|
105
|
-
|
|
106
|
-
if len(df_hist) < 2:
|
|
107
|
-
raise VolyError("Historical data must contain at least 2 points to calculate granularity")
|
|
108
|
-
|
|
109
|
-
# Extract number of days
|
|
110
|
-
days = int(window_length[:-1])
|
|
111
|
-
|
|
112
|
-
# Calculate average time delta between data points
|
|
113
|
-
avg_delta = (df_hist.index[-1] - df_hist.index[0]).total_seconds() / (len(df_hist) - 1)
|
|
114
|
-
|
|
115
|
-
# Convert to days and calculate points per window
|
|
116
|
-
days_per_point = avg_delta / (24 * 60 * 60)
|
|
117
|
-
n_points = int(days / days_per_point)
|
|
118
|
-
|
|
119
|
-
# Ensure minimum number of points
|
|
120
|
-
return max(n_points, 10)
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
def get_param_names(model_type: str, distribution: str) -> List[str]:
|
|
124
|
-
"""
|
|
125
|
-
Get parameter names for a volatility model and distribution.
|
|
126
|
-
|
|
127
|
-
Parameters:
|
|
128
|
-
-----------
|
|
129
|
-
model_type : str
|
|
130
|
-
Type of volatility model ('garch' or 'egarch')
|
|
131
|
-
distribution : str
|
|
132
|
-
Distribution type ('normal', 'studentst', or 'skewstudent')
|
|
133
|
-
|
|
134
|
-
Returns:
|
|
135
|
-
--------
|
|
136
|
-
List[str]
|
|
137
|
-
List of parameter names
|
|
138
|
-
"""
|
|
139
|
-
# GARCH(1,1) parameters
|
|
140
|
-
if model_type.lower() == 'garch':
|
|
141
|
-
if distribution.lower() == 'normal':
|
|
142
|
-
return ['mu', 'omega', 'alpha[1]', 'beta[1]']
|
|
143
|
-
elif distribution.lower() == 'studentst':
|
|
144
|
-
return ['mu', 'omega', 'alpha[1]', 'beta[1]', 'nu']
|
|
145
|
-
elif distribution.lower() == 'skewstudent':
|
|
146
|
-
return ['mu', 'omega', 'alpha[1]', 'beta[1]', 'nu', 'lambda']
|
|
147
|
-
|
|
148
|
-
# EGARCH(1,1,1) parameters
|
|
149
|
-
elif model_type.lower() == 'egarch':
|
|
150
|
-
if distribution.lower() == 'normal':
|
|
151
|
-
return ['mu', 'omega', 'alpha[1]', 'gamma[1]', 'beta[1]']
|
|
152
|
-
elif distribution.lower() == 'studentst':
|
|
153
|
-
return ['mu', 'omega', 'alpha[1]', 'gamma[1]', 'beta[1]', 'nu']
|
|
154
|
-
elif distribution.lower() == 'skewstudent':
|
|
155
|
-
return ['mu', 'omega', 'alpha[1]', 'gamma[1]', 'beta[1]', 'nu', 'lambda']
|
|
156
|
-
|
|
157
|
-
raise VolyError(f"Invalid model_type '{model_type}' or distribution '{distribution}'")
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
@catch_exception
|
|
161
|
-
def fit_volatility_model(log_returns: np.ndarray,
|
|
162
|
-
df_hist: pd.DataFrame,
|
|
163
|
-
model_type: str = 'garch',
|
|
164
|
-
distribution: str = 'normal',
|
|
165
|
-
window_length: str = '30d',
|
|
166
|
-
n_fits: int = 400) -> Dict[str, Any]:
|
|
167
|
-
"""
|
|
168
|
-
Fit a volatility model (GARCH or EGARCH) to historical returns.
|
|
169
|
-
|
|
170
|
-
Parameters:
|
|
171
|
-
-----------
|
|
172
|
-
log_returns : np.ndarray
|
|
173
|
-
Array of log returns (percent)
|
|
174
|
-
df_hist : pd.DataFrame
|
|
175
|
-
Historical price data
|
|
176
|
-
model_type : str
|
|
177
|
-
Type of volatility model ('garch' or 'egarch')
|
|
178
|
-
distribution : str
|
|
179
|
-
Distribution type ('normal', 'studentst', or 'skewstudent')
|
|
180
|
-
window_length : str
|
|
181
|
-
Length of sliding window in days (e.g., '30d')
|
|
182
|
-
n_fits : int
|
|
183
|
-
Number of sliding windows to fit
|
|
184
|
-
|
|
185
|
-
Returns:
|
|
186
|
-
--------
|
|
187
|
-
Dict[str, Any]
|
|
188
|
-
Dictionary with model parameters and fitting results
|
|
189
|
-
"""
|
|
190
|
-
# Parse window length
|
|
191
|
-
window_points = parse_window_length(window_length, df_hist)
|
|
192
|
-
|
|
193
|
-
# Validate data
|
|
194
|
-
if len(log_returns) < window_points + n_fits:
|
|
195
|
-
raise VolyError(f"Not enough data points. Need at least {window_points + n_fits}, got {len(log_returns)}")
|
|
196
|
-
|
|
197
|
-
# Adjust window sizes to avoid overfitting
|
|
198
|
-
n_fits = min(n_fits, max(100, len(log_returns) // 3))
|
|
199
|
-
window_points = min(window_points, max(20, len(log_returns) // 3))
|
|
200
|
-
|
|
201
|
-
# Calculate start and end indices for sliding windows
|
|
202
|
-
start_idx = window_points + n_fits
|
|
203
|
-
end_idx = n_fits
|
|
204
|
-
|
|
205
|
-
# Get parameter names for the model
|
|
206
|
-
param_names = get_param_names(model_type, distribution)
|
|
207
|
-
n_params = len(param_names)
|
|
208
|
-
|
|
209
|
-
# Initialize arrays for parameters and innovations
|
|
210
|
-
parameters = np.zeros((n_fits, n_params))
|
|
211
|
-
z_process = []
|
|
212
|
-
|
|
213
|
-
logger.info(f"Fitting {model_type.upper()} model with {distribution} distribution "
|
|
214
|
-
f"using {n_fits} windows of {window_length}")
|
|
215
|
-
|
|
216
|
-
# Fit models with sliding windows
|
|
217
|
-
for i in range(n_fits):
|
|
218
|
-
# Log progress
|
|
219
|
-
if i % (n_fits // 10) == 0:
|
|
220
|
-
logger.info(f"Fitting progress: {i}/{n_fits}")
|
|
221
|
-
|
|
222
|
-
# Check if we have enough data for this window
|
|
223
|
-
if end_idx - i - 1 < 0 or start_idx - i - 1 > len(log_returns):
|
|
224
|
-
continue
|
|
225
|
-
|
|
226
|
-
# Extract window data
|
|
227
|
-
window = log_returns[end_idx - i - 1:start_idx - i - 1]
|
|
228
|
-
|
|
229
|
-
# Skip invalid windows
|
|
230
|
-
if len(window) < 10 or np.isnan(window).any() or np.isinf(window).any():
|
|
231
|
-
continue
|
|
232
|
-
|
|
233
|
-
# Mean-center the data for numerical stability
|
|
234
|
-
data = window - np.mean(window)
|
|
235
|
-
|
|
236
|
-
try:
|
|
237
|
-
# Configure and fit model
|
|
238
|
-
if model_type.lower() == 'garch':
|
|
239
|
-
model = arch_model(data, vol='GARCH', p=1, q=1, dist=distribution.lower())
|
|
240
|
-
else: # egarch
|
|
241
|
-
model = arch_model(data, vol='EGARCH', p=1, o=1, q=1, dist=distribution.lower())
|
|
242
|
-
|
|
243
|
-
# Fit with optimization settings
|
|
244
|
-
fit_result = model.fit(disp='off', options={'maxiter': 1000})
|
|
245
|
-
|
|
246
|
-
# Extract parameters
|
|
247
|
-
params_dict = fit_result.params.to_dict()
|
|
248
|
-
param_values = [params_dict.get(param, 0) for param in param_names]
|
|
249
|
-
parameters[i, :] = param_values
|
|
250
|
-
|
|
251
|
-
# Extract standardized residuals (innovations)
|
|
252
|
-
residuals = fit_result.resid
|
|
253
|
-
conditional_vol = fit_result.conditional_volatility
|
|
254
|
-
|
|
255
|
-
if len(residuals) > 0 and len(conditional_vol) > 0:
|
|
256
|
-
z_t = residuals[-1] / conditional_vol[-1]
|
|
257
|
-
if not np.isnan(z_t) and not np.isinf(z_t):
|
|
258
|
-
z_process.append(z_t)
|
|
259
|
-
|
|
260
|
-
except Exception as e:
|
|
261
|
-
logger.warning(f"Model fit failed for window {i}: {str(e)}")
|
|
262
|
-
|
|
263
|
-
# Check if we have enough successful fits
|
|
264
|
-
if len(z_process) < n_fits / 2:
|
|
265
|
-
raise VolyError(f"Too many model fits failed ({len(z_process)}/{n_fits}). Check your data.")
|
|
266
|
-
|
|
267
|
-
# Remove failed fits
|
|
268
|
-
valid_rows = ~np.all(parameters == 0, axis=1)
|
|
269
|
-
parameters = parameters[valid_rows]
|
|
270
|
-
|
|
271
|
-
# Calculate average parameters and standard deviations
|
|
272
|
-
avg_params = np.mean(parameters, axis=0)
|
|
273
|
-
std_params = np.std(parameters, axis=0)
|
|
274
|
-
|
|
275
|
-
return {
|
|
276
|
-
'model_type': model_type,
|
|
277
|
-
'distribution': distribution,
|
|
278
|
-
'parameters': parameters,
|
|
279
|
-
'avg_params': avg_params,
|
|
280
|
-
'std_params': std_params,
|
|
281
|
-
'z_process': np.array(z_process),
|
|
282
|
-
'param_names': param_names
|
|
283
|
-
}
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
@catch_exception
|
|
287
|
-
def create_innovation_sampler(vol_model: Dict[str, Any]) -> Callable:
|
|
288
|
-
"""
|
|
289
|
-
Create a function to sample innovations based on the volatility model.
|
|
290
|
-
|
|
291
|
-
Parameters:
|
|
292
|
-
-----------
|
|
293
|
-
vol_model : Dict[str, Any]
|
|
294
|
-
Volatility model information from fit_volatility_model()
|
|
295
|
-
|
|
296
|
-
Returns:
|
|
297
|
-
--------
|
|
298
|
-
Callable
|
|
299
|
-
Function that returns random innovations when called
|
|
300
|
-
"""
|
|
301
|
-
distribution = vol_model['distribution']
|
|
302
|
-
z_process = vol_model['z_process']
|
|
303
|
-
|
|
304
|
-
if distribution.lower() == 'normal':
|
|
305
|
-
# Use standard normal for normal distribution
|
|
306
|
-
def sample_innovation(size=1):
|
|
307
|
-
return np.random.normal(0, 1, size=size)
|
|
308
|
-
else:
|
|
309
|
-
# Use KDE for non-normal distributions to capture empirical distribution
|
|
310
|
-
kde = stats.gaussian_kde(z_process, bw_method='silverman')
|
|
311
|
-
z_range = np.linspace(min(z_process), max(z_process), 1000)
|
|
312
|
-
z_prob = kde(z_range)
|
|
313
|
-
z_prob = z_prob / np.sum(z_prob)
|
|
314
|
-
|
|
315
|
-
def sample_innovation(size=1):
|
|
316
|
-
return np.random.choice(z_range, size=size, p=z_prob)
|
|
317
|
-
|
|
318
|
-
return sample_innovation
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
@catch_exception
|
|
322
|
-
def generate_volatility_paths(vol_model: Dict[str, Any],
|
|
323
|
-
horizon: int,
|
|
324
|
-
simulations: int = 5000) -> Tuple[np.ndarray, float]:
|
|
325
|
-
"""
|
|
326
|
-
Simulate future price paths using a fitted volatility model.
|
|
327
|
-
|
|
328
|
-
Parameters:
|
|
329
|
-
-----------
|
|
330
|
-
vol_model : Dict[str, Any]
|
|
331
|
-
Volatility model information from fit_volatility_model()
|
|
332
|
-
horizon : int
|
|
333
|
-
Number of time steps to simulate
|
|
334
|
-
simulations : int
|
|
335
|
-
Number of paths to simulate
|
|
336
|
-
|
|
337
|
-
Returns:
|
|
338
|
-
--------
|
|
339
|
-
Tuple[np.ndarray, float]
|
|
340
|
-
Array of simulated returns and the drift term
|
|
341
|
-
"""
|
|
342
|
-
# Extract model information
|
|
343
|
-
parameters = vol_model['parameters']
|
|
344
|
-
model_type = vol_model['model_type']
|
|
345
|
-
distribution = vol_model['distribution']
|
|
346
|
-
param_names = vol_model['param_names']
|
|
347
|
-
|
|
348
|
-
# Get mean parameters
|
|
349
|
-
pars = vol_model['avg_params'].copy()
|
|
350
|
-
bounds = vol_model['std_params'].copy()
|
|
351
|
-
|
|
352
|
-
# Create parameter dictionary for easier access
|
|
353
|
-
param_dict = {name: value for name, value in zip(param_names, pars)}
|
|
354
|
-
|
|
355
|
-
# Log parameters
|
|
356
|
-
param_str = ", ".join([f"{name}={param_dict.get(name, 0):.6f}" for name in param_names])
|
|
357
|
-
logger.info(f"{model_type.upper()} parameters: {param_str}")
|
|
358
|
-
|
|
359
|
-
# Create innovation sampler
|
|
360
|
-
sample_innovation = create_innovation_sampler(vol_model)
|
|
361
|
-
|
|
362
|
-
# Initialize results array
|
|
363
|
-
simulated_returns = np.zeros(simulations)
|
|
364
|
-
mu = param_dict.get('mu', 0)
|
|
365
|
-
|
|
366
|
-
logger.info(f"Simulating {simulations} paths for horizon {horizon}")
|
|
367
|
-
|
|
368
|
-
# Simulate paths
|
|
369
|
-
for i in range(simulations):
|
|
370
|
-
# Log progress
|
|
371
|
-
if (i + 1) % (simulations // 10) == 0:
|
|
372
|
-
logger.info(f"Simulation progress: {i + 1}/{simulations}")
|
|
373
|
-
|
|
374
|
-
# Vary parameters periodically for robustness
|
|
375
|
-
if (i + 1) % (simulations // 20) == 0:
|
|
376
|
-
# Create parameter variations based on their estimated distribution
|
|
377
|
-
sim_params = {}
|
|
378
|
-
for j, (name, par, bound) in enumerate(zip(param_names, pars, bounds)):
|
|
379
|
-
var = bound ** 2 / max(len(parameters), 1)
|
|
380
|
-
# Generate new parameter from normal distribution around the mean
|
|
381
|
-
new_par = np.random.normal(par, np.sqrt(var))
|
|
382
|
-
|
|
383
|
-
# Apply constraints to ensure valid parameters
|
|
384
|
-
if name == 'omega':
|
|
385
|
-
new_par = max(new_par, 1e-6) # Must be positive
|
|
386
|
-
elif name in ['alpha[1]', 'beta[1]']:
|
|
387
|
-
new_par = max(min(new_par, 0.999), 0.001) # Between 0 and 1
|
|
388
|
-
elif name == 'nu':
|
|
389
|
-
new_par = max(new_par, 2.1) # Degrees of freedom > 2
|
|
390
|
-
|
|
391
|
-
sim_params[name] = new_par
|
|
392
|
-
else:
|
|
393
|
-
sim_params = param_dict.copy()
|
|
394
|
-
|
|
395
|
-
# Initialize volatility based on model type
|
|
396
|
-
if model_type.lower() == 'garch':
|
|
397
|
-
# Extract GARCH parameters
|
|
398
|
-
omega = sim_params.get('omega', 0)
|
|
399
|
-
alpha = sim_params.get('alpha[1]', 0)
|
|
400
|
-
beta = sim_params.get('beta[1]', 0)
|
|
401
|
-
|
|
402
|
-
# Initialize with unconditional variance
|
|
403
|
-
persistence = alpha + beta
|
|
404
|
-
sigma2 = omega / (1 - persistence) if persistence < 1 else omega / 0.99
|
|
405
|
-
|
|
406
|
-
else: # egarch
|
|
407
|
-
# Extract EGARCH parameters
|
|
408
|
-
omega = sim_params.get('omega', 0)
|
|
409
|
-
beta = sim_params.get('beta[1]', 0)
|
|
410
|
-
|
|
411
|
-
# Initialize log variance
|
|
412
|
-
log_sigma2 = omega / (1 - beta) if beta < 1 else omega / 0.99
|
|
413
|
-
sigma2 = np.exp(log_sigma2)
|
|
414
|
-
|
|
415
|
-
# Initialize return sum
|
|
416
|
-
returns_sum = 0
|
|
417
|
-
|
|
418
|
-
# Simulate path
|
|
419
|
-
for _ in range(horizon):
|
|
420
|
-
# Sample innovation
|
|
421
|
-
z = sample_innovation()
|
|
422
|
-
|
|
423
|
-
# Update returns and volatility based on model type
|
|
424
|
-
if model_type.lower() == 'garch':
|
|
425
|
-
# Calculate return
|
|
426
|
-
e = z * np.sqrt(sigma2)
|
|
427
|
-
returns_sum += e + mu
|
|
428
|
-
|
|
429
|
-
# Update GARCH volatility
|
|
430
|
-
sigma2 = (sim_params.get('omega', 0) +
|
|
431
|
-
sim_params.get('alpha[1]', 0) * e ** 2 +
|
|
432
|
-
sim_params.get('beta[1]', 0) * sigma2)
|
|
433
|
-
|
|
434
|
-
else: # egarch
|
|
435
|
-
# Calculate return
|
|
436
|
-
e = z * np.sqrt(sigma2)
|
|
437
|
-
returns_sum += e + mu
|
|
438
|
-
|
|
439
|
-
# Extract EGARCH parameters
|
|
440
|
-
gamma = sim_params.get('gamma[1]', 0)
|
|
441
|
-
alpha = sim_params.get('alpha[1]', 0)
|
|
442
|
-
beta = sim_params.get('beta[1]', 0)
|
|
443
|
-
omega = sim_params.get('omega', 0)
|
|
444
|
-
|
|
445
|
-
# Update EGARCH volatility
|
|
446
|
-
abs_z = abs(z)
|
|
447
|
-
log_sigma2 = omega + beta * log_sigma2 + alpha * (abs_z - np.sqrt(2 / np.pi)) + gamma * z
|
|
448
|
-
sigma2 = np.exp(log_sigma2)
|
|
449
|
-
|
|
450
|
-
# Store final return
|
|
451
|
-
simulated_returns[i] = returns_sum
|
|
452
|
-
|
|
453
|
-
return simulated_returns, mu * horizon
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
@catch_exception
|
|
457
|
-
def prepare_domains(domain_params: Tuple[float, float, int],
|
|
458
|
-
s: float,
|
|
459
|
-
return_domain: str) -> Dict[str, np.ndarray]:
|
|
460
|
-
"""
|
|
461
|
-
Prepare domain arrays for different representations.
|
|
462
|
-
|
|
463
|
-
Parameters:
|
|
464
|
-
-----------
|
|
465
|
-
domain_params : Tuple[float, float, int]
|
|
466
|
-
(min_log_moneyness, max_log_moneyness, num_points)
|
|
467
|
-
s : float
|
|
468
|
-
Spot price
|
|
469
|
-
return_domain : str
|
|
470
|
-
Domain for results
|
|
471
|
-
|
|
472
|
-
Returns:
|
|
473
|
-
--------
|
|
474
|
-
Dict[str, np.ndarray]
|
|
475
|
-
Dictionary of domain arrays
|
|
476
|
-
"""
|
|
477
|
-
# Create log-moneyness grid
|
|
478
|
-
LM = np.linspace(domain_params[0], domain_params[1], domain_params[2])
|
|
479
|
-
|
|
480
|
-
# Calculate other domains
|
|
481
|
-
M = np.exp(LM) # Moneyness
|
|
482
|
-
R = M - 1 # Returns
|
|
483
|
-
K = s / M # Strike prices
|
|
484
|
-
|
|
485
|
-
# Calculate grid spacing
|
|
486
|
-
dx = LM[1] - LM[0]
|
|
487
|
-
|
|
488
|
-
return {
|
|
489
|
-
'log_moneyness': LM,
|
|
490
|
-
'moneyness': M,
|
|
491
|
-
'returns': R,
|
|
492
|
-
'strikes': K,
|
|
493
|
-
'dx': dx
|
|
494
|
-
}
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
@catch_exception
|
|
498
|
-
def calculate_basic_density(df_hist: pd.DataFrame,
|
|
499
|
-
t: float,
|
|
500
|
-
r: float,
|
|
501
|
-
n_periods: int,
|
|
502
|
-
domains: Dict[str, np.ndarray],
|
|
503
|
-
bandwidth: str = 'silverman') -> Dict[str, np.ndarray]:
|
|
504
|
-
"""
|
|
505
|
-
Calculate historical density using KDE of historical returns.
|
|
93
|
+
Calculate historical density using a normal distribution based on historical returns.
|
|
506
94
|
|
|
507
95
|
Parameters:
|
|
508
96
|
-----------
|
|
@@ -516,203 +104,54 @@ def calculate_basic_density(df_hist: pd.DataFrame,
|
|
|
516
104
|
Number of periods to scale returns
|
|
517
105
|
domains : Dict[str, np.ndarray]
|
|
518
106
|
Domain arrays
|
|
519
|
-
bandwidth : str
|
|
520
|
-
KDE bandwidth method
|
|
521
107
|
|
|
522
108
|
Returns:
|
|
523
109
|
--------
|
|
524
110
|
Dict[str, np.ndarray]
|
|
525
111
|
Dictionary of PDFs in different domains
|
|
526
112
|
"""
|
|
527
|
-
# Extract
|
|
113
|
+
# Extract log-moneyness domain
|
|
528
114
|
LM = domains['log_moneyness']
|
|
529
|
-
M = domains['moneyness']
|
|
530
|
-
R = domains['returns']
|
|
531
|
-
K = domains['strikes']
|
|
532
115
|
dx = domains['dx']
|
|
533
116
|
|
|
534
|
-
#
|
|
535
|
-
|
|
536
|
-
start_date = pd.Timestamp.now() - pd.Timedelta(days=lookback_days)
|
|
537
|
-
maturity_hist = df_hist[df_hist.index >= start_date].copy()
|
|
538
|
-
|
|
539
|
-
# Better diagnostics for debugging
|
|
540
|
-
if len(maturity_hist) < 2:
|
|
541
|
-
n_available = len(df_hist)
|
|
542
|
-
earliest = df_hist.index[0] if n_available > 0 else "N/A"
|
|
543
|
-
latest = df_hist.index[-1] if n_available > 0 else "N/A"
|
|
544
|
-
|
|
545
|
-
logger.warning(f"Insufficient data for t={t:.4f} years ({lookback_days:.2f} days lookback)")
|
|
546
|
-
logger.warning(f"Available data: {n_available} points from {earliest} to {latest}")
|
|
547
|
-
logger.warning(f"Required start date: {start_date}")
|
|
548
|
-
|
|
549
|
-
# Try using all available data as fallback
|
|
550
|
-
if n_available >= 2:
|
|
551
|
-
logger.warning(f"Using all available {n_available} data points as fallback")
|
|
552
|
-
maturity_hist = df_hist.copy()
|
|
553
|
-
else:
|
|
554
|
-
raise VolyError(f"Not enough historical data for maturity (t={t:.4f})")
|
|
555
|
-
|
|
556
|
-
# Calculate scaled returns
|
|
557
|
-
maturity_hist['log_returns'] = np.log(maturity_hist['close'] / maturity_hist['close'].shift(1)) * np.sqrt(n_periods)
|
|
558
|
-
maturity_hist = maturity_hist.dropna()
|
|
559
|
-
returns = maturity_hist['log_returns'].values
|
|
560
|
-
|
|
561
|
-
if len(returns) < 2:
|
|
562
|
-
raise VolyError(f"Not enough valid returns for maturity (t={t:.4f})")
|
|
563
|
-
|
|
564
|
-
# Girsanov adjustment to shift to risk-neutral measure
|
|
565
|
-
mu_scaled = returns.mean()
|
|
566
|
-
sigma_scaled = returns.std()
|
|
567
|
-
expected_risk_neutral_mean = (r - 0.5 * sigma_scaled ** 2) * np.sqrt(t)
|
|
568
|
-
adjustment = mu_scaled - expected_risk_neutral_mean
|
|
569
|
-
adj_returns = returns - adjustment
|
|
570
|
-
|
|
571
|
-
# Create PDF with KDE
|
|
572
|
-
kde = stats.gaussian_kde(adj_returns, bw_method=bandwidth)
|
|
573
|
-
pdf_lm = kde(LM)
|
|
574
|
-
|
|
575
|
-
# Normalize the PDF
|
|
576
|
-
pdf_lm = pdf_lm / np.trapz(pdf_lm, LM)
|
|
577
|
-
|
|
578
|
-
# Transform to other domains
|
|
579
|
-
pdf_m = pdf_lm / M
|
|
580
|
-
pdf_k = pdf_lm / K
|
|
581
|
-
pdf_r = pdf_lm / (1 + R)
|
|
582
|
-
|
|
583
|
-
# Calculate CDF
|
|
584
|
-
cdf = np.cumsum(pdf_lm * dx)
|
|
585
|
-
cdf = cdf / cdf[-1]
|
|
586
|
-
|
|
587
|
-
return {
|
|
588
|
-
'log_moneyness': pdf_lm,
|
|
589
|
-
'moneyness': pdf_m,
|
|
590
|
-
'returns': pdf_r,
|
|
591
|
-
'strikes': pdf_k,
|
|
592
|
-
'cdf': cdf
|
|
593
|
-
}
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
@catch_exception
|
|
597
|
-
def calculate_volatility_density(vol_model: Dict[str, Any],
|
|
598
|
-
s: float,
|
|
599
|
-
t: float,
|
|
600
|
-
r: float,
|
|
601
|
-
n_periods: int,
|
|
602
|
-
tau_days: float,
|
|
603
|
-
domains: Dict[str, np.ndarray],
|
|
604
|
-
simulations: int = 5000,
|
|
605
|
-
bandwidth: str = 'silverman') -> Tuple[Dict[str, np.ndarray], Dict[str, Any]]:
|
|
606
|
-
"""
|
|
607
|
-
Calculate historical density using volatility model simulation.
|
|
117
|
+
# Calculate log returns
|
|
118
|
+
returns = np.log(df_hist['close'] / df_hist['close'].shift(1)).dropna().values
|
|
608
119
|
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
t : float
|
|
616
|
-
Time to maturity in years
|
|
617
|
-
r : float
|
|
618
|
-
Risk-free rate
|
|
619
|
-
n_periods : int
|
|
620
|
-
Number of periods to scale returns
|
|
621
|
-
tau_days : float
|
|
622
|
-
Days to maturity
|
|
623
|
-
domains : Dict[str, np.ndarray]
|
|
624
|
-
Domain arrays
|
|
625
|
-
simulations : int
|
|
626
|
-
Number of Monte Carlo simulations
|
|
627
|
-
bandwidth : str
|
|
628
|
-
KDE bandwidth method
|
|
120
|
+
# Filter historical data based on n_periods
|
|
121
|
+
if len(returns) < n_periods:
|
|
122
|
+
logger.warning(f"Not enough historical data, using all {len(returns)} points available")
|
|
123
|
+
dte_returns = returns
|
|
124
|
+
else:
|
|
125
|
+
dte_returns = returns[-n_periods:]
|
|
629
126
|
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
Dictionary of PDFs in different domains and model parameters
|
|
634
|
-
"""
|
|
635
|
-
# Extract domains
|
|
636
|
-
LM = domains['log_moneyness']
|
|
637
|
-
M = domains['moneyness']
|
|
638
|
-
R = domains['returns']
|
|
639
|
-
K = domains['strikes']
|
|
640
|
-
dx = domains['dx']
|
|
127
|
+
# Calculate scaled parameters for normal distribution
|
|
128
|
+
mu_scaled = np.mean(dte_returns) * np.sqrt(n_periods)
|
|
129
|
+
sigma_scaled = np.std(dte_returns) * np.sqrt(n_periods)
|
|
641
130
|
|
|
642
|
-
#
|
|
643
|
-
|
|
644
|
-
simulated_returns, simulated_mu = generate_volatility_paths(
|
|
645
|
-
vol_model,
|
|
646
|
-
horizon,
|
|
647
|
-
simulations
|
|
648
|
-
)
|
|
649
|
-
|
|
650
|
-
# Scale the simulated returns to match target time horizon
|
|
651
|
-
scaling_factor = np.sqrt(n_periods / tau_days)
|
|
652
|
-
scaled_returns = simulated_returns * scaling_factor
|
|
653
|
-
|
|
654
|
-
# Risk-neutral adjustment
|
|
655
|
-
mu_scaled = scaled_returns.mean()
|
|
656
|
-
sigma_scaled = scaled_returns.std()
|
|
657
|
-
expected_risk_neutral_mean = (r - 0.5 * (sigma_scaled / 100) ** 2) * 100 * np.sqrt(t)
|
|
131
|
+
# Apply Girsanov adjustment to shift to risk-neutral measure
|
|
132
|
+
expected_risk_neutral_mean = (r - 0.5 * sigma_scaled ** 2) * np.sqrt(t)
|
|
658
133
|
adjustment = mu_scaled - expected_risk_neutral_mean
|
|
659
|
-
|
|
134
|
+
mu_rn = mu_scaled - adjustment
|
|
660
135
|
|
|
661
|
-
#
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
# Convert to moneyness domain (x-domain)
|
|
665
|
-
simulated_moneyness = s / simulated_prices
|
|
666
|
-
|
|
667
|
-
# Calculate PDF with KDE
|
|
668
|
-
kde = stats.gaussian_kde(simulated_moneyness, bw_method=bandwidth)
|
|
669
|
-
pdf_m = kde(M)
|
|
136
|
+
# Calculate PDF using normal distribution in log-moneyness domain
|
|
137
|
+
pdf_lm = stats.norm.pdf(LM, loc=mu_rn, scale=sigma_scaled)
|
|
670
138
|
|
|
671
139
|
# Normalize the PDF
|
|
672
|
-
|
|
140
|
+
pdf_lm = normalize_density(pdf_lm, dx)
|
|
673
141
|
|
|
674
142
|
# Transform to other domains
|
|
675
|
-
|
|
676
|
-
pdf_k = pdf_lm / K
|
|
677
|
-
pdf_r = pdf_lm / (1 + R)
|
|
678
|
-
|
|
679
|
-
# Calculate CDF
|
|
680
|
-
cdf = np.cumsum(pdf_lm * dx)
|
|
681
|
-
cdf = cdf / cdf[-1]
|
|
143
|
+
pdfs = transform_to_domains(pdf_lm, domains)
|
|
682
144
|
|
|
683
|
-
|
|
684
|
-
avg_params = vol_model['avg_params']
|
|
685
|
-
param_names = vol_model['param_names']
|
|
686
|
-
model_params = {name.replace('[1]', ''): value for name, value in zip(param_names, avg_params)}
|
|
687
|
-
model_params['model_type'] = vol_model['model_type']
|
|
688
|
-
model_params['distribution'] = vol_model['distribution']
|
|
689
|
-
|
|
690
|
-
# Add persistence for GARCH models
|
|
691
|
-
if vol_model['model_type'] == 'garch':
|
|
692
|
-
model_params['persistence'] = model_params.get('alpha', 0) + model_params.get('beta', 0)
|
|
693
|
-
|
|
694
|
-
return {
|
|
695
|
-
'log_moneyness': pdf_lm,
|
|
696
|
-
'moneyness': pdf_m,
|
|
697
|
-
'returns': pdf_r,
|
|
698
|
-
'strikes': pdf_k,
|
|
699
|
-
'cdf': cdf
|
|
700
|
-
}, model_params
|
|
145
|
+
return pdfs
|
|
701
146
|
|
|
702
147
|
|
|
703
148
|
@catch_exception
|
|
704
149
|
def get_hd_surface(model_results: pd.DataFrame,
|
|
705
150
|
df_hist: pd.DataFrame,
|
|
706
151
|
domain_params: Tuple[float, float, int] = (-1.5, 1.5, 1000),
|
|
707
|
-
return_domain: str = 'log_moneyness',
|
|
708
|
-
method: str = 'garch',
|
|
709
|
-
distribution: str = 'normal',
|
|
710
|
-
window_length: str = '30d',
|
|
711
|
-
n_fits: int = 400,
|
|
712
|
-
simulations: int = 5000,
|
|
713
|
-
bandwidth: str = 'silverman') -> Dict[str, Any]:
|
|
152
|
+
return_domain: str = 'log_moneyness') -> Dict[str, Any]:
|
|
714
153
|
"""
|
|
715
|
-
Generate historical density surface
|
|
154
|
+
Generate historical density surface using normal distributions.
|
|
716
155
|
|
|
717
156
|
Parameters:
|
|
718
157
|
-----------
|
|
@@ -724,18 +163,6 @@ def get_hd_surface(model_results: pd.DataFrame,
|
|
|
724
163
|
(min_log_moneyness, max_log_moneyness, num_points)
|
|
725
164
|
return_domain : str
|
|
726
165
|
Domain for results ('log_moneyness', 'moneyness', 'returns', 'strikes')
|
|
727
|
-
method : str
|
|
728
|
-
Method for HD estimation ('garch', 'egarch', 'basic')
|
|
729
|
-
distribution : str
|
|
730
|
-
Distribution for volatility models ('normal', 'studentst', 'skewstudent')
|
|
731
|
-
window_length : str
|
|
732
|
-
Length of sliding windows for model fitting (e.g., '30d')
|
|
733
|
-
n_fits : int
|
|
734
|
-
Number of sliding windows for model fitting
|
|
735
|
-
simulations : int
|
|
736
|
-
Number of Monte Carlo simulations
|
|
737
|
-
bandwidth : str
|
|
738
|
-
KDE bandwidth method
|
|
739
166
|
|
|
740
167
|
Returns:
|
|
741
168
|
--------
|
|
@@ -751,46 +178,14 @@ def get_hd_surface(model_results: pd.DataFrame,
|
|
|
751
178
|
if len(df_hist) < 2:
|
|
752
179
|
raise VolyError("Not enough data points in df_hist")
|
|
753
180
|
|
|
754
|
-
# Determine granularity from data
|
|
755
|
-
minutes_diff = (df_hist.index[1] - df_hist.index[0]).total_seconds() / 60
|
|
756
|
-
minutes_per_period = max(1, int(minutes_diff))
|
|
757
|
-
|
|
758
|
-
# Validate method and distribution
|
|
759
|
-
valid_methods = ['garch', 'egarch', 'basic']
|
|
760
|
-
valid_distributions = ['normal', 'studentst', 'skewstudent']
|
|
761
|
-
|
|
762
|
-
method = method.lower()
|
|
763
|
-
distribution = distribution.lower()
|
|
764
|
-
|
|
765
|
-
if method not in valid_methods:
|
|
766
|
-
raise VolyError(f"Invalid method: {method}. Must be one of {valid_methods}")
|
|
767
|
-
|
|
768
|
-
if method in ['garch', 'egarch'] and distribution not in valid_distributions:
|
|
769
|
-
raise VolyError(f"Invalid distribution: {distribution}. Must be one of {valid_distributions}")
|
|
770
|
-
|
|
771
181
|
# Validate return domain
|
|
772
182
|
valid_domains = ['log_moneyness', 'moneyness', 'returns', 'strikes']
|
|
773
183
|
if return_domain not in valid_domains:
|
|
774
184
|
raise VolyError(f"Invalid return_domain: {return_domain}. Must be one of {valid_domains}")
|
|
775
185
|
|
|
776
|
-
#
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
# Fit volatility model if needed
|
|
781
|
-
vol_model = None
|
|
782
|
-
if method in ['garch', 'egarch']:
|
|
783
|
-
model_type = method
|
|
784
|
-
logger.info(f"Using {model_type.upper()} with {distribution} distribution")
|
|
785
|
-
|
|
786
|
-
vol_model = fit_volatility_model(
|
|
787
|
-
log_returns=log_returns,
|
|
788
|
-
df_hist=df_hist,
|
|
789
|
-
model_type=model_type,
|
|
790
|
-
distribution=distribution,
|
|
791
|
-
window_length=window_length,
|
|
792
|
-
n_fits=n_fits
|
|
793
|
-
)
|
|
186
|
+
# Determine granularity from data (minutes between data points)
|
|
187
|
+
time_diff = (df_hist.index[1] - df_hist.index[0]).total_seconds() / 60
|
|
188
|
+
minutes_per_period = max(1, int(time_diff))
|
|
794
189
|
|
|
795
190
|
# Initialize result containers
|
|
796
191
|
pdf_surface = {}
|
|
@@ -803,67 +198,34 @@ def get_hd_surface(model_results: pd.DataFrame,
|
|
|
803
198
|
try:
|
|
804
199
|
# Get parameters for this maturity
|
|
805
200
|
s = model_results.loc[i, 's'] # Spot price
|
|
806
|
-
r = model_results.loc[i, 'r'] # Risk-free rate
|
|
807
201
|
t = model_results.loc[i, 't'] # Time to maturity in years
|
|
202
|
+
r = model_results.loc[i, 'r'] # Risk-free rate
|
|
808
203
|
|
|
809
|
-
# Calculate
|
|
810
|
-
|
|
811
|
-
n_periods = max(1, int(
|
|
812
|
-
|
|
813
|
-
logger.info(f"Processing HD for maturity {i} (t={t:.4f} years, {tau_days:.2f} days)")
|
|
204
|
+
# Calculate relevant periods for this maturity
|
|
205
|
+
dte = t * 365.25 # Days to expiry
|
|
206
|
+
n_periods = max(1, int(dte * 24 * 60 / minutes_per_period))
|
|
814
207
|
|
|
815
208
|
# Prepare domains
|
|
816
|
-
domains = prepare_domains(domain_params, s
|
|
817
|
-
|
|
818
|
-
# Calculate density
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
logger.warning(f"Volatility model fitting failed, skipping maturity {i}")
|
|
833
|
-
continue
|
|
834
|
-
|
|
835
|
-
pdfs, model_params = calculate_volatility_density(
|
|
836
|
-
vol_model=vol_model,
|
|
837
|
-
s=s,
|
|
838
|
-
t=t,
|
|
839
|
-
r=r,
|
|
840
|
-
n_periods=n_periods,
|
|
841
|
-
tau_days=tau_days,
|
|
842
|
-
domains=domains,
|
|
843
|
-
simulations=simulations,
|
|
844
|
-
bandwidth=bandwidth
|
|
845
|
-
)
|
|
846
|
-
|
|
847
|
-
# Get domain arrays for output
|
|
848
|
-
if return_domain == 'log_moneyness':
|
|
849
|
-
x = domains['log_moneyness']
|
|
850
|
-
pdf = pdfs['log_moneyness']
|
|
851
|
-
elif return_domain == 'moneyness':
|
|
852
|
-
x = domains['moneyness']
|
|
853
|
-
pdf = pdfs['moneyness']
|
|
854
|
-
elif return_domain == 'returns':
|
|
855
|
-
x = domains['returns']
|
|
856
|
-
pdf = pdfs['returns']
|
|
857
|
-
elif return_domain == 'strikes':
|
|
858
|
-
x = domains['strikes']
|
|
859
|
-
pdf = pdfs['strikes']
|
|
860
|
-
|
|
861
|
-
# Calculate statistical moments
|
|
862
|
-
moments = get_all_moments(x, pdf, model_params)
|
|
209
|
+
domains = prepare_domains(domain_params, s)
|
|
210
|
+
|
|
211
|
+
# Calculate density
|
|
212
|
+
pdfs = calculate_normal_hd(
|
|
213
|
+
df_hist=df_hist,
|
|
214
|
+
t=t,
|
|
215
|
+
r=r,
|
|
216
|
+
n_periods=n_periods,
|
|
217
|
+
domains=domains
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
# Select results for the requested domain
|
|
221
|
+
pdf, cdf, x = select_domain_results(pdfs, domains, return_domain)
|
|
222
|
+
|
|
223
|
+
# Calculate moments
|
|
224
|
+
moments = get_all_moments(x, pdf)
|
|
863
225
|
|
|
864
226
|
# Store results
|
|
865
227
|
pdf_surface[i] = pdf
|
|
866
|
-
cdf_surface[i] =
|
|
228
|
+
cdf_surface[i] = cdf
|
|
867
229
|
x_surface[i] = x
|
|
868
230
|
all_moments[i] = moments
|
|
869
231
|
|
|
@@ -877,7 +239,7 @@ def get_hd_surface(model_results: pd.DataFrame,
|
|
|
877
239
|
# Create DataFrame with moments
|
|
878
240
|
moments = pd.DataFrame(all_moments).T
|
|
879
241
|
|
|
880
|
-
logger.info(
|
|
242
|
+
logger.info("Historical density calculation complete using normal distribution")
|
|
881
243
|
|
|
882
244
|
return {
|
|
883
245
|
'pdf_surface': pdf_surface,
|