voly 0.0.145__py3-none-any.whl → 0.0.147__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- voly/client.py +9 -4
- voly/core/hd.py +647 -393
- voly/core/rnd.py +386 -272
- {voly-0.0.145.dist-info → voly-0.0.147.dist-info}/METADATA +1 -1
- {voly-0.0.145.dist-info → voly-0.0.147.dist-info}/RECORD +8 -8
- {voly-0.0.145.dist-info → voly-0.0.147.dist-info}/WHEEL +0 -0
- {voly-0.0.145.dist-info → voly-0.0.147.dist-info}/licenses/LICENSE +0 -0
- {voly-0.0.145.dist-info → voly-0.0.147.dist-info}/top_level.txt +0 -0
voly/core/hd.py
CHANGED
|
@@ -8,7 +8,7 @@ import pandas as pd
|
|
|
8
8
|
import numpy as np
|
|
9
9
|
import datetime as dt
|
|
10
10
|
from scipy import stats
|
|
11
|
-
from typing import Dict, List, Tuple, Optional, Union, Any
|
|
11
|
+
from typing import Dict, List, Tuple, Optional, Union, Any, Callable
|
|
12
12
|
from voly.utils.logger import logger, catch_exception
|
|
13
13
|
from voly.exceptions import VolyError
|
|
14
14
|
from voly.core.rnd import get_all_moments
|
|
@@ -16,29 +16,35 @@ from voly.formulas import iv, get_domain
|
|
|
16
16
|
from voly.models import SVIModel
|
|
17
17
|
from voly.core.fit import fit_model
|
|
18
18
|
from arch import arch_model
|
|
19
|
-
from arch.univariate import GARCH, EGARCH
|
|
20
19
|
|
|
21
20
|
|
|
22
21
|
@catch_exception
|
|
23
|
-
def get_historical_data(currency
|
|
22
|
+
def get_historical_data(currency: str,
|
|
23
|
+
lookback_days: str,
|
|
24
|
+
granularity: str,
|
|
25
|
+
exchange_name: str) -> pd.DataFrame:
|
|
24
26
|
"""
|
|
25
27
|
Fetch historical OHLCV data for a cryptocurrency.
|
|
26
28
|
|
|
27
29
|
Parameters:
|
|
28
|
-
|
|
30
|
+
-----------
|
|
29
31
|
currency : str
|
|
30
|
-
The cryptocurrency to fetch data for (e.g., 'BTC', 'ETH')
|
|
32
|
+
The cryptocurrency to fetch data for (e.g., 'BTC', 'ETH')
|
|
31
33
|
lookback_days : str
|
|
32
34
|
The lookback period in days, formatted as '90d', '30d', etc.
|
|
33
35
|
granularity : str
|
|
34
|
-
The time interval for data points (e.g., '15m', '1h', '1d')
|
|
36
|
+
The time interval for data points (e.g., '15m', '1h', '1d')
|
|
35
37
|
exchange_name : str
|
|
36
|
-
The exchange to fetch data from (default: 'binance')
|
|
38
|
+
The exchange to fetch data from (default: 'binance')
|
|
37
39
|
|
|
38
40
|
Returns:
|
|
39
|
-
|
|
40
|
-
|
|
41
|
+
--------
|
|
42
|
+
pd.DataFrame
|
|
43
|
+
Historical price data with OHLCV columns and datetime index
|
|
41
44
|
"""
|
|
45
|
+
# Validate inputs
|
|
46
|
+
if not lookback_days.endswith('d'):
|
|
47
|
+
raise VolyError("lookback_days should be in format '90d', '30d', etc.")
|
|
42
48
|
|
|
43
49
|
try:
|
|
44
50
|
# Get the exchange class from ccxt
|
|
@@ -48,252 +54,277 @@ def get_historical_data(currency, lookback_days, granularity, exchange_name):
|
|
|
48
54
|
raise VolyError(f"Exchange '{exchange_name}' not found in ccxt. Please check the exchange name.")
|
|
49
55
|
|
|
50
56
|
# Form the trading pair symbol
|
|
51
|
-
symbol = currency
|
|
57
|
+
symbol = f"{currency}/USDT"
|
|
52
58
|
|
|
53
59
|
# Convert lookback_days to timestamp
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
date_start = (dt.datetime.now() - dt.timedelta(days=days_ago)).strftime('%Y-%m-%d %H:%M:%S')
|
|
57
|
-
else:
|
|
58
|
-
raise VolyError("lookback_days should be in format '90d', '30d', etc.")
|
|
59
|
-
|
|
60
|
+
days_ago = int(lookback_days[:-1])
|
|
61
|
+
date_start = (dt.datetime.now() - dt.timedelta(days=days_ago)).strftime('%Y-%m-%d %H:%M:%S')
|
|
60
62
|
from_ts = exchange.parse8601(date_start)
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
63
|
+
|
|
64
|
+
# Fetch data with pagination
|
|
65
|
+
ohlcv = []
|
|
66
|
+
last_ts = from_ts
|
|
67
|
+
|
|
68
|
+
logger.info(f"Fetching {currency} historical data from {exchange_name} for past {days_ago} days")
|
|
69
|
+
|
|
64
70
|
while True:
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
if len(new_ohlcv) != 1000:
|
|
71
|
+
batch = exchange.fetch_ohlcv(symbol, granularity, since=last_ts, limit=1000)
|
|
72
|
+
|
|
73
|
+
if not batch or len(batch) == 0:
|
|
69
74
|
break
|
|
70
75
|
|
|
76
|
+
if len(ohlcv) > 0 and batch[0][0] == ohlcv[-1][0]:
|
|
77
|
+
# Skip first element if it's a duplicate of the last from previous batch
|
|
78
|
+
batch = batch[1:]
|
|
79
|
+
|
|
80
|
+
if not batch:
|
|
81
|
+
break
|
|
82
|
+
|
|
83
|
+
ohlcv.extend(batch)
|
|
84
|
+
last_ts = batch[-1][0]
|
|
85
|
+
|
|
86
|
+
if len(batch) < 1000:
|
|
87
|
+
break
|
|
88
|
+
|
|
89
|
+
logger.debug(f"Fetched {len(batch)} candles, total now: {len(ohlcv)}")
|
|
90
|
+
|
|
71
91
|
# Convert to DataFrame
|
|
72
92
|
df_hist = pd.DataFrame(ohlcv, columns=['date', 'open', 'high', 'low', 'close', 'volume'])
|
|
73
93
|
df_hist['date'] = pd.to_datetime(df_hist['date'], unit='ms')
|
|
74
94
|
df_hist.set_index('date', inplace=True)
|
|
75
95
|
df_hist = df_hist.sort_index(ascending=True)
|
|
76
96
|
|
|
77
|
-
|
|
97
|
+
logger.info(f"Data fetched successfully: {len(df_hist)} rows from {df_hist.index[0]} to {df_hist.index[-1]}")
|
|
78
98
|
|
|
79
99
|
return df_hist
|
|
80
100
|
|
|
81
101
|
|
|
82
102
|
@catch_exception
|
|
83
|
-
def parse_window_length(window_length, df_hist):
|
|
103
|
+
def parse_window_length(window_length: str, df_hist: pd.DataFrame) -> int:
|
|
84
104
|
"""
|
|
85
|
-
|
|
105
|
+
Convert window length string (e.g., '30d') to number of data points.
|
|
86
106
|
|
|
87
107
|
Parameters:
|
|
88
108
|
-----------
|
|
89
109
|
window_length : str
|
|
90
110
|
Window length in days, formatted as '7d', '30d', etc.
|
|
91
111
|
df_hist : pd.DataFrame
|
|
92
|
-
Historical data DataFrame with datetime index
|
|
112
|
+
Historical data DataFrame with datetime index
|
|
93
113
|
|
|
94
114
|
Returns:
|
|
95
115
|
--------
|
|
96
116
|
int
|
|
97
|
-
Number of data points corresponding to the window length
|
|
117
|
+
Number of data points corresponding to the window length
|
|
98
118
|
"""
|
|
99
|
-
|
|
119
|
+
# Validate inputs
|
|
120
|
+
if not isinstance(window_length, str) or not window_length.endswith('d'):
|
|
100
121
|
raise VolyError("window_length should be in format '7d', '30d', etc.")
|
|
101
122
|
|
|
123
|
+
if len(df_hist) < 2:
|
|
124
|
+
raise VolyError("Historical data must contain at least 2 points to calculate granularity")
|
|
125
|
+
|
|
102
126
|
# Extract number of days
|
|
103
127
|
days = int(window_length[:-1])
|
|
104
128
|
|
|
105
|
-
# Calculate time delta between
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
129
|
+
# Calculate average time delta between data points
|
|
130
|
+
avg_delta = (df_hist.index[-1] - df_hist.index[0]).total_seconds() / (len(df_hist) - 1)
|
|
131
|
+
|
|
132
|
+
# Convert to days and calculate points per window
|
|
133
|
+
days_per_point = avg_delta / (24 * 60 * 60)
|
|
134
|
+
n_points = int(days / days_per_point)
|
|
135
|
+
|
|
136
|
+
# Ensure minimum number of points
|
|
137
|
+
return max(n_points, 10)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def get_param_names(model_type: str, distribution: str) -> List[str]:
|
|
141
|
+
"""
|
|
142
|
+
Get parameter names for a volatility model and distribution.
|
|
143
|
+
|
|
144
|
+
Parameters:
|
|
145
|
+
-----------
|
|
146
|
+
model_type : str
|
|
147
|
+
Type of volatility model ('garch' or 'egarch')
|
|
148
|
+
distribution : str
|
|
149
|
+
Distribution type ('normal', 'studentst', or 'skewstudent')
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
--------
|
|
153
|
+
List[str]
|
|
154
|
+
List of parameter names
|
|
155
|
+
"""
|
|
156
|
+
# GARCH(1,1) parameters
|
|
157
|
+
if model_type.lower() == 'garch':
|
|
158
|
+
if distribution.lower() == 'normal':
|
|
159
|
+
return ['mu', 'omega', 'alpha[1]', 'beta[1]']
|
|
160
|
+
elif distribution.lower() == 'studentst':
|
|
161
|
+
return ['mu', 'omega', 'alpha[1]', 'beta[1]', 'nu']
|
|
162
|
+
elif distribution.lower() == 'skewstudent':
|
|
163
|
+
return ['mu', 'omega', 'alpha[1]', 'beta[1]', 'nu', 'lambda']
|
|
164
|
+
|
|
165
|
+
# EGARCH(1,1,1) parameters
|
|
166
|
+
elif model_type.lower() == 'egarch':
|
|
167
|
+
if distribution.lower() == 'normal':
|
|
168
|
+
return ['mu', 'omega', 'alpha[1]', 'gamma[1]', 'beta[1]']
|
|
169
|
+
elif distribution.lower() == 'studentst':
|
|
170
|
+
return ['mu', 'omega', 'alpha[1]', 'gamma[1]', 'beta[1]', 'nu']
|
|
171
|
+
elif distribution.lower() == 'skewstudent':
|
|
172
|
+
return ['mu', 'omega', 'alpha[1]', 'gamma[1]', 'beta[1]', 'nu', 'lambda']
|
|
173
|
+
|
|
174
|
+
raise VolyError(f"Invalid model_type '{model_type}' or distribution '{distribution}'")
|
|
115
175
|
|
|
116
176
|
|
|
117
177
|
@catch_exception
|
|
118
|
-
def fit_volatility_model(log_returns
|
|
119
|
-
|
|
178
|
+
def fit_volatility_model(log_returns: np.ndarray,
|
|
179
|
+
df_hist: pd.DataFrame,
|
|
180
|
+
model_type: str = 'garch',
|
|
181
|
+
distribution: str = 'normal',
|
|
182
|
+
window_length: str = '30d',
|
|
183
|
+
n_fits: int = 400) -> Dict[str, Any]:
|
|
120
184
|
"""
|
|
121
|
-
Fit a volatility model (GARCH or EGARCH) to
|
|
185
|
+
Fit a volatility model (GARCH or EGARCH) to historical returns.
|
|
122
186
|
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
187
|
+
Parameters:
|
|
188
|
+
-----------
|
|
189
|
+
log_returns : np.ndarray
|
|
190
|
+
Array of log returns (percent)
|
|
191
|
+
df_hist : pd.DataFrame
|
|
192
|
+
Historical price data
|
|
193
|
+
model_type : str
|
|
194
|
+
Type of volatility model ('garch' or 'egarch')
|
|
195
|
+
distribution : str
|
|
196
|
+
Distribution type ('normal', 'studentst', or 'skewstudent')
|
|
197
|
+
window_length : str
|
|
198
|
+
Length of sliding window in days (e.g., '30d')
|
|
199
|
+
n_fits : int
|
|
200
|
+
Number of sliding windows to fit
|
|
130
201
|
|
|
131
202
|
Returns:
|
|
132
|
-
|
|
203
|
+
--------
|
|
204
|
+
Dict[str, Any]
|
|
205
|
+
Dictionary with model parameters and fitting results
|
|
133
206
|
"""
|
|
134
207
|
# Parse window length
|
|
135
208
|
window_points = parse_window_length(window_length, df_hist)
|
|
136
209
|
|
|
210
|
+
# Validate data
|
|
137
211
|
if len(log_returns) < window_points + n_fits:
|
|
138
212
|
raise VolyError(f"Not enough data points. Need at least {window_points + n_fits}, got {len(log_returns)}")
|
|
139
213
|
|
|
140
|
-
# Adjust window sizes
|
|
141
|
-
n_fits = min(n_fits, len(log_returns) // 3)
|
|
142
|
-
window_points = min(window_points, len(log_returns) // 3)
|
|
214
|
+
# Adjust window sizes to avoid overfitting
|
|
215
|
+
n_fits = min(n_fits, max(100, len(log_returns) // 3))
|
|
216
|
+
window_points = min(window_points, max(20, len(log_returns) // 3))
|
|
143
217
|
|
|
144
|
-
start
|
|
145
|
-
|
|
218
|
+
# Calculate start and end indices for sliding windows
|
|
219
|
+
start_idx = window_points + n_fits
|
|
220
|
+
end_idx = n_fits
|
|
146
221
|
|
|
147
|
-
#
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
n_params = 4 # mu, omega, alpha, beta
|
|
151
|
-
elif distribution.lower() == 'studentst':
|
|
152
|
-
n_params = 5 # mu, omega, alpha, beta, nu
|
|
153
|
-
else: # skewstudent
|
|
154
|
-
n_params = 6 # mu, omega, alpha, beta, nu, lambda (skew)
|
|
155
|
-
else: # egarch
|
|
156
|
-
if distribution.lower() == 'normal':
|
|
157
|
-
n_params = 5 # mu, omega, alpha, gamma, beta
|
|
158
|
-
elif distribution.lower() == 'studentst':
|
|
159
|
-
n_params = 6 # mu, omega, alpha, gamma, beta, nu
|
|
160
|
-
else: # skewstudent
|
|
161
|
-
n_params = 7 # mu, omega, alpha, gamma, beta, nu, lambda (skew)
|
|
222
|
+
# Get parameter names for the model
|
|
223
|
+
param_names = get_param_names(model_type, distribution)
|
|
224
|
+
n_params = len(param_names)
|
|
162
225
|
|
|
226
|
+
# Initialize arrays for parameters and innovations
|
|
163
227
|
parameters = np.zeros((n_fits, n_params))
|
|
164
228
|
z_process = []
|
|
165
229
|
|
|
166
|
-
logger.info(f"Fitting {model_type.upper()} model with {distribution} distribution
|
|
230
|
+
logger.info(f"Fitting {model_type.upper()} model with {distribution} distribution "
|
|
231
|
+
f"using {n_fits} windows of {window_length}")
|
|
167
232
|
|
|
233
|
+
# Fit models with sliding windows
|
|
168
234
|
for i in range(n_fits):
|
|
169
|
-
|
|
235
|
+
# Log progress
|
|
236
|
+
if i % (n_fits // 10) == 0:
|
|
237
|
+
logger.info(f"Fitting progress: {i}/{n_fits}")
|
|
238
|
+
|
|
239
|
+
# Check if we have enough data for this window
|
|
240
|
+
if end_idx - i - 1 < 0 or start_idx - i - 1 > len(log_returns):
|
|
241
|
+
continue
|
|
242
|
+
|
|
243
|
+
# Extract window data
|
|
244
|
+
window = log_returns[end_idx - i - 1:start_idx - i - 1]
|
|
245
|
+
|
|
246
|
+
# Skip invalid windows
|
|
247
|
+
if len(window) < 10 or np.isnan(window).any() or np.isinf(window).any():
|
|
248
|
+
continue
|
|
249
|
+
|
|
250
|
+
# Mean-center the data for numerical stability
|
|
170
251
|
data = window - np.mean(window)
|
|
171
252
|
|
|
172
253
|
try:
|
|
173
|
-
# Configure
|
|
254
|
+
# Configure and fit model
|
|
174
255
|
if model_type.lower() == 'garch':
|
|
175
256
|
model = arch_model(data, vol='GARCH', p=1, q=1, dist=distribution.lower())
|
|
176
257
|
else: # egarch
|
|
177
258
|
model = arch_model(data, vol='EGARCH', p=1, o=1, q=1, dist=distribution.lower())
|
|
178
259
|
|
|
179
|
-
|
|
260
|
+
# Fit with optimization settings
|
|
261
|
+
fit_result = model.fit(disp='off', options={'maxiter': 1000})
|
|
180
262
|
|
|
181
|
-
# Extract parameters
|
|
263
|
+
# Extract parameters
|
|
182
264
|
params_dict = fit_result.params.to_dict()
|
|
265
|
+
param_values = [params_dict.get(param, 0) for param in param_names]
|
|
266
|
+
parameters[i, :] = param_values
|
|
183
267
|
|
|
184
|
-
|
|
185
|
-
mu = params_dict.get("mu", 0)
|
|
186
|
-
omega = params_dict.get("omega", 0)
|
|
187
|
-
alpha = params_dict.get("alpha[1]", 0)
|
|
188
|
-
beta = params_dict.get("beta[1]", 0)
|
|
189
|
-
|
|
190
|
-
if distribution.lower() == 'normal':
|
|
191
|
-
parameters[i, :] = [mu, omega, alpha, beta]
|
|
192
|
-
elif distribution.lower() == 'studentst':
|
|
193
|
-
nu = params_dict.get("nu", 0)
|
|
194
|
-
parameters[i, :] = [mu, omega, alpha, beta, nu]
|
|
195
|
-
else: # skewstudent
|
|
196
|
-
nu = params_dict.get("nu", 0)
|
|
197
|
-
lam = params_dict.get("lambda", 0)
|
|
198
|
-
parameters[i, :] = [mu, omega, alpha, beta, nu, lam]
|
|
199
|
-
else: # egarch
|
|
200
|
-
mu = params_dict.get("mu", 0)
|
|
201
|
-
omega = params_dict.get("omega", 0)
|
|
202
|
-
alpha = params_dict.get("alpha[1]", 0)
|
|
203
|
-
gamma = params_dict.get("gamma[1]", 0)
|
|
204
|
-
beta = params_dict.get("beta[1]", 0)
|
|
205
|
-
|
|
206
|
-
if distribution.lower() == 'normal':
|
|
207
|
-
parameters[i, :] = [mu, omega, alpha, gamma, beta]
|
|
208
|
-
elif distribution.lower() == 'studentst':
|
|
209
|
-
nu = params_dict.get("nu", 0)
|
|
210
|
-
parameters[i, :] = [mu, omega, alpha, gamma, beta, nu]
|
|
211
|
-
else: # skewstudent
|
|
212
|
-
nu = params_dict.get("nu", 0)
|
|
213
|
-
lam = params_dict.get("lambda", 0)
|
|
214
|
-
parameters[i, :] = [mu, omega, alpha, gamma, beta, nu, lam]
|
|
215
|
-
|
|
216
|
-
# Get last innovation
|
|
268
|
+
# Extract standardized residuals (innovations)
|
|
217
269
|
residuals = fit_result.resid
|
|
218
270
|
conditional_vol = fit_result.conditional_volatility
|
|
219
|
-
|
|
220
|
-
|
|
271
|
+
|
|
272
|
+
if len(residuals) > 0 and len(conditional_vol) > 0:
|
|
273
|
+
z_t = residuals[-1] / conditional_vol[-1]
|
|
274
|
+
if not np.isnan(z_t) and not np.isinf(z_t):
|
|
275
|
+
z_process.append(z_t)
|
|
221
276
|
|
|
222
277
|
except Exception as e:
|
|
223
278
|
logger.warning(f"Model fit failed for window {i}: {str(e)}")
|
|
224
279
|
|
|
225
|
-
#
|
|
280
|
+
# Check if we have enough successful fits
|
|
226
281
|
if len(z_process) < n_fits / 2:
|
|
227
|
-
raise VolyError("Too many model fits failed. Check your data.")
|
|
282
|
+
raise VolyError(f"Too many model fits failed ({len(z_process)}/{n_fits}). Check your data.")
|
|
283
|
+
|
|
284
|
+
# Remove failed fits
|
|
285
|
+
valid_rows = ~np.all(parameters == 0, axis=1)
|
|
286
|
+
parameters = parameters[valid_rows]
|
|
228
287
|
|
|
288
|
+
# Calculate average parameters and standard deviations
|
|
229
289
|
avg_params = np.mean(parameters, axis=0)
|
|
230
290
|
std_params = np.std(parameters, axis=0)
|
|
231
291
|
|
|
232
292
|
return {
|
|
293
|
+
'model_type': model_type,
|
|
294
|
+
'distribution': distribution,
|
|
233
295
|
'parameters': parameters,
|
|
234
296
|
'avg_params': avg_params,
|
|
235
297
|
'std_params': std_params,
|
|
236
298
|
'z_process': np.array(z_process),
|
|
237
|
-
'
|
|
238
|
-
'distribution': distribution,
|
|
239
|
-
'param_names': get_param_names(model_type, distribution)
|
|
299
|
+
'param_names': param_names
|
|
240
300
|
}
|
|
241
301
|
|
|
242
302
|
|
|
243
|
-
def get_param_names(model_type, distribution):
|
|
244
|
-
"""Get parameter names based on model type and distribution."""
|
|
245
|
-
if model_type.lower() == 'garch':
|
|
246
|
-
if distribution.lower() == 'normal':
|
|
247
|
-
return ['mu', 'omega', 'alpha', 'beta']
|
|
248
|
-
elif distribution.lower() == 'studentst':
|
|
249
|
-
return ['mu', 'omega', 'alpha', 'beta', 'nu']
|
|
250
|
-
else: # skewstudent
|
|
251
|
-
return ['mu', 'omega', 'alpha', 'beta', 'nu', 'lambda']
|
|
252
|
-
else: # egarch
|
|
253
|
-
if distribution.lower() == 'normal':
|
|
254
|
-
return ['mu', 'omega', 'alpha', 'gamma', 'beta']
|
|
255
|
-
elif distribution.lower() == 'studentst':
|
|
256
|
-
return ['mu', 'omega', 'alpha', 'gamma', 'beta', 'nu']
|
|
257
|
-
else: # skewstudent
|
|
258
|
-
return ['mu', 'omega', 'alpha', 'gamma', 'beta', 'nu', 'lambda']
|
|
259
|
-
|
|
260
|
-
|
|
261
303
|
@catch_exception
|
|
262
|
-
def
|
|
304
|
+
def create_innovation_sampler(vol_model: Dict[str, Any]) -> Callable:
|
|
263
305
|
"""
|
|
264
|
-
|
|
306
|
+
Create a function to sample innovations based on the volatility model.
|
|
265
307
|
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
variate_parameters: Whether to vary parameters between simulations
|
|
308
|
+
Parameters:
|
|
309
|
+
-----------
|
|
310
|
+
vol_model : Dict[str, Any]
|
|
311
|
+
Volatility model information from fit_volatility_model()
|
|
271
312
|
|
|
272
313
|
Returns:
|
|
273
|
-
|
|
314
|
+
--------
|
|
315
|
+
Callable
|
|
316
|
+
Function that returns random innovations when called
|
|
274
317
|
"""
|
|
275
|
-
parameters = vol_model['parameters']
|
|
276
|
-
z_process = vol_model['z_process']
|
|
277
|
-
model_type = vol_model['model_type']
|
|
278
318
|
distribution = vol_model['distribution']
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
# Use mean parameters as starting point
|
|
282
|
-
pars = vol_model['avg_params'].copy()
|
|
283
|
-
bounds = vol_model['std_params'].copy()
|
|
284
|
-
|
|
285
|
-
# Log parameters
|
|
286
|
-
param_str = ", ".join([f"{name}={par:.6f}" for name, par in zip(param_names, pars)])
|
|
287
|
-
logger.info(f"{model_type.upper()} parameters: {param_str}")
|
|
319
|
+
z_process = vol_model['z_process']
|
|
288
320
|
|
|
289
|
-
# Create KDE for innovations based on distribution
|
|
290
321
|
if distribution.lower() == 'normal':
|
|
291
322
|
# Use standard normal for normal distribution
|
|
292
323
|
def sample_innovation(size=1):
|
|
293
324
|
return np.random.normal(0, 1, size=size)
|
|
294
325
|
else:
|
|
295
326
|
# Use KDE for non-normal distributions to capture empirical distribution
|
|
296
|
-
kde = stats.gaussian_kde(z_process, bw_method='silverman')
|
|
327
|
+
kde = stats.gaussian_kde(z_process, bw_method='silverman')
|
|
297
328
|
z_range = np.linspace(min(z_process), max(z_process), 1000)
|
|
298
329
|
z_prob = kde(z_range)
|
|
299
330
|
z_prob = z_prob / np.sum(z_prob)
|
|
@@ -301,52 +332,104 @@ def simulate_volatility_paths(vol_model, horizon, simulations=5000, variate_para
|
|
|
301
332
|
def sample_innovation(size=1):
|
|
302
333
|
return np.random.choice(z_range, size=size, p=z_prob)
|
|
303
334
|
|
|
304
|
-
|
|
335
|
+
return sample_innovation
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
@catch_exception
|
|
339
|
+
def generate_volatility_paths(vol_model: Dict[str, Any],
|
|
340
|
+
horizon: int,
|
|
341
|
+
simulations: int = 5000) -> Tuple[np.ndarray, float]:
|
|
342
|
+
"""
|
|
343
|
+
Simulate future price paths using a fitted volatility model.
|
|
344
|
+
|
|
345
|
+
Parameters:
|
|
346
|
+
-----------
|
|
347
|
+
vol_model : Dict[str, Any]
|
|
348
|
+
Volatility model information from fit_volatility_model()
|
|
349
|
+
horizon : int
|
|
350
|
+
Number of time steps to simulate
|
|
351
|
+
simulations : int
|
|
352
|
+
Number of paths to simulate
|
|
353
|
+
|
|
354
|
+
Returns:
|
|
355
|
+
--------
|
|
356
|
+
Tuple[np.ndarray, float]
|
|
357
|
+
Array of simulated returns and the drift term
|
|
358
|
+
"""
|
|
359
|
+
# Extract model information
|
|
360
|
+
parameters = vol_model['parameters']
|
|
361
|
+
model_type = vol_model['model_type']
|
|
362
|
+
distribution = vol_model['distribution']
|
|
363
|
+
param_names = vol_model['param_names']
|
|
364
|
+
|
|
365
|
+
# Get mean parameters
|
|
366
|
+
pars = vol_model['avg_params'].copy()
|
|
367
|
+
bounds = vol_model['std_params'].copy()
|
|
368
|
+
|
|
369
|
+
# Create parameter dictionary for easier access
|
|
370
|
+
param_dict = {name: value for name, value in zip(param_names, pars)}
|
|
371
|
+
|
|
372
|
+
# Log parameters
|
|
373
|
+
param_str = ", ".join([f"{name}={param_dict.get(name, 0):.6f}" for name in param_names])
|
|
374
|
+
logger.info(f"{model_type.upper()} parameters: {param_str}")
|
|
375
|
+
|
|
376
|
+
# Create innovation sampler
|
|
377
|
+
sample_innovation = create_innovation_sampler(vol_model)
|
|
378
|
+
|
|
379
|
+
# Initialize results array
|
|
305
380
|
simulated_returns = np.zeros(simulations)
|
|
381
|
+
mu = param_dict.get('mu', 0)
|
|
382
|
+
|
|
383
|
+
logger.info(f"Simulating {simulations} paths for horizon {horizon}")
|
|
306
384
|
|
|
385
|
+
# Simulate paths
|
|
307
386
|
for i in range(simulations):
|
|
387
|
+
# Log progress
|
|
308
388
|
if (i + 1) % (simulations // 10) == 0:
|
|
309
389
|
logger.info(f"Simulation progress: {i + 1}/{simulations}")
|
|
310
390
|
|
|
311
|
-
#
|
|
312
|
-
if
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
#
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
391
|
+
# Vary parameters periodically for robustness
|
|
392
|
+
if (i + 1) % (simulations // 20) == 0:
|
|
393
|
+
# Create parameter variations based on their estimated distribution
|
|
394
|
+
sim_params = {}
|
|
395
|
+
for j, (name, par, bound) in enumerate(zip(param_names, pars, bounds)):
|
|
396
|
+
var = bound ** 2 / max(len(parameters), 1)
|
|
397
|
+
# Generate new parameter from normal distribution around the mean
|
|
398
|
+
new_par = np.random.normal(par, np.sqrt(var))
|
|
399
|
+
|
|
400
|
+
# Apply constraints to ensure valid parameters
|
|
401
|
+
if name == 'omega':
|
|
402
|
+
new_par = max(new_par, 1e-6) # Must be positive
|
|
403
|
+
elif name in ['alpha[1]', 'beta[1]']:
|
|
404
|
+
new_par = max(min(new_par, 0.999), 0.001) # Between 0 and 1
|
|
405
|
+
elif name == 'nu':
|
|
406
|
+
new_par = max(new_par, 2.1) # Degrees of freedom > 2
|
|
407
|
+
|
|
408
|
+
sim_params[name] = new_par
|
|
322
409
|
else:
|
|
323
|
-
|
|
410
|
+
sim_params = param_dict.copy()
|
|
324
411
|
|
|
325
|
-
# Initialize
|
|
412
|
+
# Initialize volatility based on model type
|
|
326
413
|
if model_type.lower() == 'garch':
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
414
|
+
# Extract GARCH parameters
|
|
415
|
+
omega = sim_params.get('omega', 0)
|
|
416
|
+
alpha = sim_params.get('alpha[1]', 0)
|
|
417
|
+
beta = sim_params.get('beta[1]', 0)
|
|
418
|
+
|
|
419
|
+
# Initialize with unconditional variance
|
|
420
|
+
persistence = alpha + beta
|
|
421
|
+
sigma2 = omega / (1 - persistence) if persistence < 1 else omega / 0.99
|
|
422
|
+
|
|
336
423
|
else: # egarch
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
sigma2 = np.exp(log_sigma2)
|
|
345
|
-
else: # skewstudent
|
|
346
|
-
mu, omega, alpha, gamma, beta, nu, lam = sim_pars
|
|
347
|
-
log_sigma2 = omega / (1 - beta)
|
|
348
|
-
sigma2 = np.exp(log_sigma2)
|
|
424
|
+
# Extract EGARCH parameters
|
|
425
|
+
omega = sim_params.get('omega', 0)
|
|
426
|
+
beta = sim_params.get('beta[1]', 0)
|
|
427
|
+
|
|
428
|
+
# Initialize log variance
|
|
429
|
+
log_sigma2 = omega / (1 - beta) if beta < 1 else omega / 0.99
|
|
430
|
+
sigma2 = np.exp(log_sigma2)
|
|
349
431
|
|
|
432
|
+
# Initialize return sum
|
|
350
433
|
returns_sum = 0
|
|
351
434
|
|
|
352
435
|
# Simulate path
|
|
@@ -354,116 +437,364 @@ def simulate_volatility_paths(vol_model, horizon, simulations=5000, variate_para
|
|
|
354
437
|
# Sample innovation
|
|
355
438
|
z = sample_innovation()
|
|
356
439
|
|
|
357
|
-
# Update
|
|
440
|
+
# Update returns and volatility based on model type
|
|
358
441
|
if model_type.lower() == 'garch':
|
|
359
442
|
# Calculate return
|
|
360
443
|
e = z * np.sqrt(sigma2)
|
|
361
444
|
returns_sum += e + mu
|
|
362
445
|
|
|
363
446
|
# Update GARCH volatility
|
|
364
|
-
sigma2 = omega
|
|
447
|
+
sigma2 = (sim_params.get('omega', 0) +
|
|
448
|
+
sim_params.get('alpha[1]', 0) * e ** 2 +
|
|
449
|
+
sim_params.get('beta[1]', 0) * sigma2)
|
|
450
|
+
|
|
365
451
|
else: # egarch
|
|
366
452
|
# Calculate return
|
|
367
453
|
e = z * np.sqrt(sigma2)
|
|
368
454
|
returns_sum += e + mu
|
|
369
455
|
|
|
456
|
+
# Extract EGARCH parameters
|
|
457
|
+
gamma = sim_params.get('gamma[1]', 0)
|
|
458
|
+
alpha = sim_params.get('alpha[1]', 0)
|
|
459
|
+
beta = sim_params.get('beta[1]', 0)
|
|
460
|
+
omega = sim_params.get('omega', 0)
|
|
461
|
+
|
|
370
462
|
# Update EGARCH volatility
|
|
371
463
|
abs_z = abs(z)
|
|
372
464
|
log_sigma2 = omega + beta * log_sigma2 + alpha * (abs_z - np.sqrt(2 / np.pi)) + gamma * z
|
|
373
465
|
sigma2 = np.exp(log_sigma2)
|
|
374
466
|
|
|
467
|
+
# Store final return
|
|
375
468
|
simulated_returns[i] = returns_sum
|
|
376
469
|
|
|
377
470
|
return simulated_returns, mu * horizon
|
|
378
471
|
|
|
379
472
|
|
|
473
|
+
@catch_exception
|
|
474
|
+
def prepare_domains(domain_params: Tuple[float, float, int],
|
|
475
|
+
s: float,
|
|
476
|
+
return_domain: str) -> Dict[str, np.ndarray]:
|
|
477
|
+
"""
|
|
478
|
+
Prepare domain arrays for different representations.
|
|
479
|
+
|
|
480
|
+
Parameters:
|
|
481
|
+
-----------
|
|
482
|
+
domain_params : Tuple[float, float, int]
|
|
483
|
+
(min_log_moneyness, max_log_moneyness, num_points)
|
|
484
|
+
s : float
|
|
485
|
+
Spot price
|
|
486
|
+
return_domain : str
|
|
487
|
+
Domain for results
|
|
488
|
+
|
|
489
|
+
Returns:
|
|
490
|
+
--------
|
|
491
|
+
Dict[str, np.ndarray]
|
|
492
|
+
Dictionary of domain arrays
|
|
493
|
+
"""
|
|
494
|
+
# Create log-moneyness grid
|
|
495
|
+
LM = np.linspace(domain_params[0], domain_params[1], domain_params[2])
|
|
496
|
+
|
|
497
|
+
# Calculate other domains
|
|
498
|
+
M = np.exp(LM) # Moneyness
|
|
499
|
+
R = M - 1 # Returns
|
|
500
|
+
K = s / M # Strike prices
|
|
501
|
+
|
|
502
|
+
# Calculate grid spacing
|
|
503
|
+
dx = LM[1] - LM[0]
|
|
504
|
+
|
|
505
|
+
return {
|
|
506
|
+
'log_moneyness': LM,
|
|
507
|
+
'moneyness': M,
|
|
508
|
+
'returns': R,
|
|
509
|
+
'strikes': K,
|
|
510
|
+
'dx': dx
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
|
|
514
|
+
@catch_exception
|
|
515
|
+
def calculate_basic_density(df_hist: pd.DataFrame,
|
|
516
|
+
t: float,
|
|
517
|
+
r: float,
|
|
518
|
+
n_periods: int,
|
|
519
|
+
domains: Dict[str, np.ndarray],
|
|
520
|
+
bandwidth: str = 'silverman') -> Dict[str, np.ndarray]:
|
|
521
|
+
"""
|
|
522
|
+
Calculate historical density using KDE of historical returns.
|
|
523
|
+
|
|
524
|
+
Parameters:
|
|
525
|
+
-----------
|
|
526
|
+
df_hist : pd.DataFrame
|
|
527
|
+
Historical price data
|
|
528
|
+
t : float
|
|
529
|
+
Time to maturity in years
|
|
530
|
+
r : float
|
|
531
|
+
Risk-free rate
|
|
532
|
+
n_periods : int
|
|
533
|
+
Number of periods to scale returns
|
|
534
|
+
domains : Dict[str, np.ndarray]
|
|
535
|
+
Domain arrays
|
|
536
|
+
bandwidth : str
|
|
537
|
+
KDE bandwidth method
|
|
538
|
+
|
|
539
|
+
Returns:
|
|
540
|
+
--------
|
|
541
|
+
Dict[str, np.ndarray]
|
|
542
|
+
Dictionary of PDFs in different domains
|
|
543
|
+
"""
|
|
544
|
+
# Extract domains
|
|
545
|
+
LM = domains['log_moneyness']
|
|
546
|
+
M = domains['moneyness']
|
|
547
|
+
R = domains['returns']
|
|
548
|
+
K = domains['strikes']
|
|
549
|
+
dx = domains['dx']
|
|
550
|
+
|
|
551
|
+
# Filter historical data for the maturity's lookback period
|
|
552
|
+
start_date = pd.Timestamp.now() - pd.Timedelta(days=int(t * 365.25))
|
|
553
|
+
maturity_hist = df_hist[df_hist.index >= start_date].copy()
|
|
554
|
+
|
|
555
|
+
if len(maturity_hist) < 10:
|
|
556
|
+
raise VolyError(f"Not enough historical data for maturity (t={t:.4f})")
|
|
557
|
+
|
|
558
|
+
# Calculate scaled returns
|
|
559
|
+
maturity_hist['log_returns'] = np.log(maturity_hist['close'] / maturity_hist['close'].shift(1)) * np.sqrt(n_periods)
|
|
560
|
+
maturity_hist = maturity_hist.dropna()
|
|
561
|
+
returns = maturity_hist['log_returns'].values
|
|
562
|
+
|
|
563
|
+
if len(returns) < 2:
|
|
564
|
+
raise VolyError(f"Not enough valid returns for maturity (t={t:.4f})")
|
|
565
|
+
|
|
566
|
+
# Girsanov adjustment to shift to risk-neutral measure
|
|
567
|
+
mu_scaled = returns.mean()
|
|
568
|
+
sigma_scaled = returns.std()
|
|
569
|
+
expected_risk_neutral_mean = (r - 0.5 * sigma_scaled ** 2) * np.sqrt(t)
|
|
570
|
+
adjustment = mu_scaled - expected_risk_neutral_mean
|
|
571
|
+
adj_returns = returns - adjustment
|
|
572
|
+
|
|
573
|
+
# Create PDF with KDE
|
|
574
|
+
kde = stats.gaussian_kde(adj_returns, bw_method=bandwidth)
|
|
575
|
+
pdf_lm = kde(LM)
|
|
576
|
+
|
|
577
|
+
# Normalize the PDF
|
|
578
|
+
pdf_lm = pdf_lm / np.trapz(pdf_lm, LM)
|
|
579
|
+
|
|
580
|
+
# Transform to other domains
|
|
581
|
+
pdf_m = pdf_lm / M
|
|
582
|
+
pdf_k = pdf_lm / K
|
|
583
|
+
pdf_r = pdf_lm / (1 + R)
|
|
584
|
+
|
|
585
|
+
# Calculate CDF
|
|
586
|
+
cdf = np.cumsum(pdf_lm * dx)
|
|
587
|
+
cdf = cdf / cdf[-1]
|
|
588
|
+
|
|
589
|
+
return {
|
|
590
|
+
'log_moneyness': pdf_lm,
|
|
591
|
+
'moneyness': pdf_m,
|
|
592
|
+
'returns': pdf_r,
|
|
593
|
+
'strikes': pdf_k,
|
|
594
|
+
'cdf': cdf
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
|
|
598
|
+
@catch_exception
|
|
599
|
+
def calculate_volatility_density(vol_model: Dict[str, Any],
|
|
600
|
+
s: float,
|
|
601
|
+
t: float,
|
|
602
|
+
r: float,
|
|
603
|
+
n_periods: int,
|
|
604
|
+
tau_days: float,
|
|
605
|
+
domains: Dict[str, np.ndarray],
|
|
606
|
+
simulations: int = 5000,
|
|
607
|
+
bandwidth: str = 'silverman') -> Tuple[Dict[str, np.ndarray], Dict[str, Any]]:
|
|
608
|
+
"""
|
|
609
|
+
Calculate historical density using volatility model simulation.
|
|
610
|
+
|
|
611
|
+
Parameters:
|
|
612
|
+
-----------
|
|
613
|
+
vol_model : Dict[str, Any]
|
|
614
|
+
Volatility model from fit_volatility_model()
|
|
615
|
+
s : float
|
|
616
|
+
Spot price
|
|
617
|
+
t : float
|
|
618
|
+
Time to maturity in years
|
|
619
|
+
r : float
|
|
620
|
+
Risk-free rate
|
|
621
|
+
n_periods : int
|
|
622
|
+
Number of periods to scale returns
|
|
623
|
+
tau_days : float
|
|
624
|
+
Days to maturity
|
|
625
|
+
domains : Dict[str, np.ndarray]
|
|
626
|
+
Domain arrays
|
|
627
|
+
simulations : int
|
|
628
|
+
Number of Monte Carlo simulations
|
|
629
|
+
bandwidth : str
|
|
630
|
+
KDE bandwidth method
|
|
631
|
+
|
|
632
|
+
Returns:
|
|
633
|
+
--------
|
|
634
|
+
Tuple[Dict[str, np.ndarray], Dict[str, Any]]
|
|
635
|
+
Dictionary of PDFs in different domains and model parameters
|
|
636
|
+
"""
|
|
637
|
+
# Extract domains
|
|
638
|
+
LM = domains['log_moneyness']
|
|
639
|
+
M = domains['moneyness']
|
|
640
|
+
R = domains['returns']
|
|
641
|
+
K = domains['strikes']
|
|
642
|
+
dx = domains['dx']
|
|
643
|
+
|
|
644
|
+
# Simulate paths with the volatility model
|
|
645
|
+
horizon = max(1, int(tau_days))
|
|
646
|
+
simulated_returns, simulated_mu = generate_volatility_paths(
|
|
647
|
+
vol_model,
|
|
648
|
+
horizon,
|
|
649
|
+
simulations
|
|
650
|
+
)
|
|
651
|
+
|
|
652
|
+
# Scale the simulated returns to match target time horizon
|
|
653
|
+
scaling_factor = np.sqrt(n_periods / tau_days)
|
|
654
|
+
scaled_returns = simulated_returns * scaling_factor
|
|
655
|
+
|
|
656
|
+
# Risk-neutral adjustment
|
|
657
|
+
mu_scaled = scaled_returns.mean()
|
|
658
|
+
sigma_scaled = scaled_returns.std()
|
|
659
|
+
expected_risk_neutral_mean = (r - 0.5 * (sigma_scaled / 100) ** 2) * 100 * np.sqrt(t)
|
|
660
|
+
adjustment = mu_scaled - expected_risk_neutral_mean
|
|
661
|
+
risk_neutral_returns = scaled_returns - adjustment
|
|
662
|
+
|
|
663
|
+
# Convert to terminal prices
|
|
664
|
+
simulated_prices = s * np.exp(risk_neutral_returns / 100)
|
|
665
|
+
|
|
666
|
+
# Convert to moneyness domain (x-domain)
|
|
667
|
+
simulated_moneyness = s / simulated_prices
|
|
668
|
+
|
|
669
|
+
# Calculate PDF with KDE
|
|
670
|
+
kde = stats.gaussian_kde(simulated_moneyness, bw_method=bandwidth)
|
|
671
|
+
pdf_m = kde(M)
|
|
672
|
+
|
|
673
|
+
# Normalize the PDF
|
|
674
|
+
pdf_m = pdf_m / np.trapz(pdf_m, M)
|
|
675
|
+
|
|
676
|
+
# Transform to other domains
|
|
677
|
+
pdf_lm = pdf_m * M
|
|
678
|
+
pdf_k = pdf_lm / K
|
|
679
|
+
pdf_r = pdf_lm / (1 + R)
|
|
680
|
+
|
|
681
|
+
# Calculate CDF
|
|
682
|
+
cdf = np.cumsum(pdf_lm * dx)
|
|
683
|
+
cdf = cdf / cdf[-1]
|
|
684
|
+
|
|
685
|
+
# Prepare model parameters for moments
|
|
686
|
+
avg_params = vol_model['avg_params']
|
|
687
|
+
param_names = vol_model['param_names']
|
|
688
|
+
model_params = {name.replace('[1]', ''): value for name, value in zip(param_names, avg_params)}
|
|
689
|
+
model_params['model_type'] = vol_model['model_type']
|
|
690
|
+
model_params['distribution'] = vol_model['distribution']
|
|
691
|
+
|
|
692
|
+
# Add persistence for GARCH models
|
|
693
|
+
if vol_model['model_type'] == 'garch':
|
|
694
|
+
model_params['persistence'] = model_params.get('alpha', 0) + model_params.get('beta', 0)
|
|
695
|
+
|
|
696
|
+
return {
|
|
697
|
+
'log_moneyness': pdf_lm,
|
|
698
|
+
'moneyness': pdf_m,
|
|
699
|
+
'returns': pdf_r,
|
|
700
|
+
'strikes': pdf_k,
|
|
701
|
+
'cdf': cdf
|
|
702
|
+
}, model_params
|
|
703
|
+
|
|
704
|
+
|
|
705
|
+
@catch_exception
|
|
380
706
|
def get_hd_surface(model_results: pd.DataFrame,
|
|
381
707
|
df_hist: pd.DataFrame,
|
|
382
708
|
domain_params: Tuple[float, float, int] = (-1.5, 1.5, 1000),
|
|
383
709
|
return_domain: str = 'log_moneyness',
|
|
384
|
-
method: str = '
|
|
385
|
-
model_type: str = 'garch',
|
|
710
|
+
method: str = 'garch',
|
|
386
711
|
distribution: str = 'normal',
|
|
387
|
-
|
|
712
|
+
window_length: str = '30d',
|
|
713
|
+
n_fits: int = 400,
|
|
714
|
+
simulations: int = 5000,
|
|
715
|
+
bandwidth: str = 'silverman') -> Dict[str, Any]:
|
|
388
716
|
"""
|
|
389
717
|
Generate historical density surface from historical price data.
|
|
390
718
|
|
|
391
719
|
Parameters:
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
720
|
+
-----------
|
|
721
|
+
model_results : pd.DataFrame
|
|
722
|
+
DataFrame with model parameters and maturities
|
|
723
|
+
df_hist : pd.DataFrame
|
|
724
|
+
DataFrame with historical price data
|
|
725
|
+
domain_params : Tuple[float, float, int]
|
|
726
|
+
(min_log_moneyness, max_log_moneyness, num_points)
|
|
727
|
+
return_domain : str
|
|
728
|
+
Domain for results ('log_moneyness', 'moneyness', 'returns', 'strikes')
|
|
729
|
+
method : str
|
|
730
|
+
Method for HD estimation ('garch', 'egarch', 'basic')
|
|
731
|
+
distribution : str
|
|
732
|
+
Distribution for volatility models ('normal', 'studentst', 'skewstudent')
|
|
733
|
+
window_length : str
|
|
734
|
+
Length of sliding windows for model fitting (e.g., '30d')
|
|
735
|
+
n_fits : int
|
|
736
|
+
Number of sliding windows for model fitting
|
|
737
|
+
simulations : int
|
|
738
|
+
Number of Monte Carlo simulations
|
|
739
|
+
bandwidth : str
|
|
740
|
+
KDE bandwidth method
|
|
408
741
|
|
|
409
742
|
Returns:
|
|
410
|
-
|
|
743
|
+
--------
|
|
744
|
+
Dict[str, Any]
|
|
745
|
+
Dictionary with pdf_surface, cdf_surface, x_surface, and moments
|
|
411
746
|
"""
|
|
412
|
-
#
|
|
747
|
+
# Validate inputs
|
|
413
748
|
required_columns = ['s', 't', 'r']
|
|
414
749
|
missing_columns = [col for col in required_columns if col not in model_results.columns]
|
|
415
750
|
if missing_columns:
|
|
416
751
|
raise VolyError(f"Required columns missing in model_results: {missing_columns}")
|
|
417
752
|
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
# Calculate minutes between consecutive timestamps
|
|
421
|
-
minutes_diff = (df_hist.index[1] - df_hist.index[0]).total_seconds() / 60
|
|
422
|
-
minutes_per_period = int(minutes_diff)
|
|
423
|
-
else:
|
|
424
|
-
raise VolyError("Cannot determine granularity from df_hist.")
|
|
753
|
+
if len(df_hist) < 2:
|
|
754
|
+
raise VolyError("Not enough data points in df_hist")
|
|
425
755
|
|
|
426
|
-
#
|
|
427
|
-
|
|
756
|
+
# Determine granularity from data
|
|
757
|
+
minutes_diff = (df_hist.index[1] - df_hist.index[0]).total_seconds() / 60
|
|
758
|
+
minutes_per_period = max(1, int(minutes_diff))
|
|
759
|
+
|
|
760
|
+
# Validate method and distribution
|
|
761
|
+
valid_methods = ['garch', 'egarch', 'basic']
|
|
428
762
|
valid_distributions = ['normal', 'studentst', 'skewstudent']
|
|
429
763
|
|
|
430
|
-
|
|
431
|
-
|
|
764
|
+
method = method.lower()
|
|
765
|
+
distribution = distribution.lower()
|
|
766
|
+
|
|
767
|
+
if method not in valid_methods:
|
|
768
|
+
raise VolyError(f"Invalid method: {method}. Must be one of {valid_methods}")
|
|
432
769
|
|
|
433
|
-
if distribution
|
|
770
|
+
if method in ['garch', 'egarch'] and distribution not in valid_distributions:
|
|
434
771
|
raise VolyError(f"Invalid distribution: {distribution}. Must be one of {valid_distributions}")
|
|
435
772
|
|
|
436
|
-
#
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
window_length = kwargs.get('window_length', '30d')
|
|
441
|
-
variate_parameters = kwargs.get('variate_parameters', True)
|
|
442
|
-
bandwidth = kwargs.get('bandwidth', 'silverman')
|
|
443
|
-
logger.info(
|
|
444
|
-
f"Using {model_type.upper()} method with {distribution} distribution, {n_fits} fits, {simulations} simulations")
|
|
445
|
-
elif method == 'hist_returns':
|
|
446
|
-
bandwidth = kwargs.get('bandwidth', 'silverman')
|
|
447
|
-
logger.info(f"Using returns-based KDE method with bandwidth {bandwidth}")
|
|
448
|
-
else:
|
|
449
|
-
raise VolyError(f"Unknown method: {method}. Use 'hist_returns', 'arch_returns'.")
|
|
773
|
+
# Validate return domain
|
|
774
|
+
valid_domains = ['log_moneyness', 'moneyness', 'returns', 'strikes']
|
|
775
|
+
if return_domain not in valid_domains:
|
|
776
|
+
raise VolyError(f"Invalid return_domain: {return_domain}. Must be one of {valid_domains}")
|
|
450
777
|
|
|
451
|
-
# Calculate log returns
|
|
778
|
+
# Calculate log returns
|
|
452
779
|
log_returns = np.log(df_hist['close'] / df_hist['close'].shift(1)) * 100
|
|
453
780
|
log_returns = log_returns.dropna().values
|
|
454
781
|
|
|
455
|
-
# Fit volatility model
|
|
782
|
+
# Fit volatility model if needed
|
|
456
783
|
vol_model = None
|
|
457
|
-
if method
|
|
784
|
+
if method in ['garch', 'egarch']:
|
|
785
|
+
model_type = method
|
|
786
|
+
logger.info(f"Using {model_type.upper()} with {distribution} distribution")
|
|
787
|
+
|
|
458
788
|
vol_model = fit_volatility_model(
|
|
459
|
-
log_returns,
|
|
460
|
-
df_hist,
|
|
789
|
+
log_returns=log_returns,
|
|
790
|
+
df_hist=df_hist,
|
|
461
791
|
model_type=model_type,
|
|
462
792
|
distribution=distribution,
|
|
463
793
|
window_length=window_length,
|
|
464
794
|
n_fits=n_fits
|
|
465
795
|
)
|
|
466
796
|
|
|
797
|
+
# Initialize result containers
|
|
467
798
|
pdf_surface = {}
|
|
468
799
|
cdf_surface = {}
|
|
469
800
|
x_surface = {}
|
|
@@ -471,161 +802,84 @@ def get_hd_surface(model_results: pd.DataFrame,
|
|
|
471
802
|
|
|
472
803
|
# Process each maturity
|
|
473
804
|
for i in model_results.index:
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
#
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
#
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
# Risk-neutral adjustment
|
|
542
|
-
mu_scaled = scaled_returns.mean()
|
|
543
|
-
sigma_scaled = scaled_returns.std()
|
|
544
|
-
expected_risk_neutral_mean = (r - 0.5 * (sigma_scaled / 100) ** 2) * 100 * np.sqrt(t)
|
|
545
|
-
adjustment = mu_scaled - expected_risk_neutral_mean
|
|
546
|
-
risk_neutral_returns = scaled_returns - adjustment
|
|
547
|
-
|
|
548
|
-
# Convert to terminal prices
|
|
549
|
-
simulated_prices = s * np.exp(risk_neutral_returns / 100)
|
|
550
|
-
|
|
551
|
-
# Convert to moneyness domain
|
|
552
|
-
simulated_moneyness = s / simulated_prices
|
|
553
|
-
|
|
554
|
-
# Perform KDE to get PDF
|
|
555
|
-
kde = stats.gaussian_kde(simulated_moneyness, bw_method=bandwidth)
|
|
556
|
-
pdf_values = kde(M)
|
|
557
|
-
|
|
558
|
-
# Include volatility model params in moments
|
|
559
|
-
avg_params = vol_model['avg_params']
|
|
560
|
-
param_names = vol_model['param_names']
|
|
561
|
-
model_params = {name: value for name, value in zip(param_names, avg_params)}
|
|
562
|
-
model_params['model_type'] = model_type
|
|
563
|
-
model_params['distribution'] = distribution
|
|
564
|
-
|
|
565
|
-
# Add persistence for GARCH-type models
|
|
566
|
-
if model_type.lower() == 'garch':
|
|
567
|
-
model_params['persistence'] = model_params.get('alpha', 0) + model_params.get('beta', 0)
|
|
568
|
-
else:
|
|
569
|
-
continue # Skip this maturity if method is invalid
|
|
570
|
-
|
|
571
|
-
# Ensure density integrates to 1
|
|
572
|
-
dx = LM[1] - LM[0]
|
|
573
|
-
total_area = np.sum(pdf_values * dx)
|
|
574
|
-
if total_area <= 0:
|
|
575
|
-
logger.warning(f"Invalid density (area <= 0) for maturity {i}, skipping.")
|
|
576
|
-
continue
|
|
805
|
+
try:
|
|
806
|
+
# Get parameters for this maturity
|
|
807
|
+
s = model_results.loc[i, 's'] # Spot price
|
|
808
|
+
r = model_results.loc[i, 'r'] # Risk-free rate
|
|
809
|
+
t = model_results.loc[i, 't'] # Time to maturity in years
|
|
810
|
+
|
|
811
|
+
# Calculate time scaling parameters
|
|
812
|
+
tau_days = t * 365.25 # Days to expiry
|
|
813
|
+
n_periods = max(1, int(tau_days * 24 * 60 / minutes_per_period)) # Number of periods
|
|
814
|
+
|
|
815
|
+
logger.info(f"Processing HD for maturity {i} (t={t:.4f} years, {tau_days:.2f} days)")
|
|
816
|
+
|
|
817
|
+
# Prepare domains
|
|
818
|
+
domains = prepare_domains(domain_params, s, return_domain)
|
|
819
|
+
|
|
820
|
+
# Calculate density based on method
|
|
821
|
+
if method == 'basic':
|
|
822
|
+
pdfs = calculate_basic_density(
|
|
823
|
+
df_hist=df_hist,
|
|
824
|
+
t=t,
|
|
825
|
+
r=r,
|
|
826
|
+
n_periods=n_periods,
|
|
827
|
+
domains=domains,
|
|
828
|
+
bandwidth=bandwidth
|
|
829
|
+
)
|
|
830
|
+
model_params = None
|
|
831
|
+
|
|
832
|
+
else: # 'garch' or 'egarch'
|
|
833
|
+
if vol_model is None:
|
|
834
|
+
logger.warning(f"Volatility model fitting failed, skipping maturity {i}")
|
|
835
|
+
continue
|
|
836
|
+
|
|
837
|
+
pdfs, model_params = calculate_volatility_density(
|
|
838
|
+
vol_model=vol_model,
|
|
839
|
+
s=s,
|
|
840
|
+
t=t,
|
|
841
|
+
r=r,
|
|
842
|
+
n_periods=n_periods,
|
|
843
|
+
tau_days=tau_days,
|
|
844
|
+
domains=domains,
|
|
845
|
+
simulations=simulations,
|
|
846
|
+
bandwidth=bandwidth
|
|
847
|
+
)
|
|
848
|
+
|
|
849
|
+
# Get domain arrays for output
|
|
850
|
+
if return_domain == 'log_moneyness':
|
|
851
|
+
x = domains['log_moneyness']
|
|
852
|
+
pdf = pdfs['log_moneyness']
|
|
853
|
+
elif return_domain == 'moneyness':
|
|
854
|
+
x = domains['moneyness']
|
|
855
|
+
pdf = pdfs['moneyness']
|
|
856
|
+
elif return_domain == 'returns':
|
|
857
|
+
x = domains['returns']
|
|
858
|
+
pdf = pdfs['returns']
|
|
859
|
+
elif return_domain == 'strikes':
|
|
860
|
+
x = domains['strikes']
|
|
861
|
+
pdf = pdfs['strikes']
|
|
862
|
+
|
|
863
|
+
# Calculate statistical moments
|
|
864
|
+
moments = get_all_moments(x, pdf, model_params)
|
|
865
|
+
|
|
866
|
+
# Store results
|
|
867
|
+
pdf_surface[i] = pdf
|
|
868
|
+
cdf_surface[i] = pdfs['cdf']
|
|
869
|
+
x_surface[i] = x
|
|
870
|
+
all_moments[i] = moments
|
|
577
871
|
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
# Common processing for both methods
|
|
581
|
-
|
|
582
|
-
# Transform densities to various domains
|
|
583
|
-
if method == 'hist_returns':
|
|
584
|
-
pdf_lm = pdf_values
|
|
585
|
-
pdf_m = pdf_lm / M
|
|
586
|
-
pdf_k = pdf_lm / K
|
|
587
|
-
pdf_r = pdf_lm / (1 + R)
|
|
588
|
-
else: # volatility models
|
|
589
|
-
pdf_m = pdf_values
|
|
590
|
-
pdf_lm = pdf_m * M
|
|
591
|
-
pdf_k = pdf_lm / K
|
|
592
|
-
pdf_r = pdf_lm / (1 + R)
|
|
593
|
-
|
|
594
|
-
# Calculate CDF
|
|
595
|
-
cdf = np.cumsum(pdf_lm * dx)
|
|
596
|
-
cdf = np.minimum(cdf / cdf[-1], 1.0)
|
|
597
|
-
|
|
598
|
-
# Select appropriate domain and calculate moments
|
|
599
|
-
if return_domain == 'log_moneyness':
|
|
600
|
-
x = LM
|
|
601
|
-
pdf = pdf_lm
|
|
602
|
-
moments = get_all_moments(x, pdf, model_params if method == 'arch_returns' else None)
|
|
603
|
-
elif return_domain == 'moneyness':
|
|
604
|
-
x = M
|
|
605
|
-
pdf = pdf_m
|
|
606
|
-
moments = get_all_moments(x, pdf, model_params if method == 'arch_returns' else None)
|
|
607
|
-
elif return_domain == 'returns':
|
|
608
|
-
x = R
|
|
609
|
-
pdf = pdf_r
|
|
610
|
-
moments = get_all_moments(x, pdf, model_params if method == 'arch_returns' else None)
|
|
611
|
-
elif return_domain == 'strikes':
|
|
612
|
-
x = K
|
|
613
|
-
pdf = pdf_k
|
|
614
|
-
moments = get_all_moments(x, pdf, model_params if method == 'arch_returns' else None)
|
|
615
|
-
else:
|
|
616
|
-
raise VolyError(f"Unsupported return_domain: {return_domain}")
|
|
872
|
+
except Exception as e:
|
|
873
|
+
logger.warning(f"Failed to calculate HD for maturity {i}: {str(e)}")
|
|
617
874
|
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
x_surface[i] = x
|
|
622
|
-
all_moments[i] = moments
|
|
875
|
+
# Check if we have any valid results
|
|
876
|
+
if not pdf_surface:
|
|
877
|
+
raise VolyError("No valid densities could be calculated. Check your input data.")
|
|
623
878
|
|
|
624
879
|
# Create DataFrame with moments
|
|
625
880
|
moments = pd.DataFrame(all_moments).T
|
|
626
881
|
|
|
627
|
-
logger.info(
|
|
628
|
-
f"Historical density calculation complete using {method} method with {model_type} model and {distribution} distribution")
|
|
882
|
+
logger.info(f"Historical density calculation complete using {method} method")
|
|
629
883
|
|
|
630
884
|
return {
|
|
631
885
|
'pdf_surface': pdf_surface,
|