voly 0.0.144__tar.gz → 0.0.146__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {voly-0.0.144/src/voly.egg-info → voly-0.0.146}/PKG-INFO +1 -1
- {voly-0.0.144 → voly-0.0.146}/pyproject.toml +2 -2
- {voly-0.0.144 → voly-0.0.146}/src/voly/core/hd.py +256 -217
- {voly-0.0.144 → voly-0.0.146/src/voly.egg-info}/PKG-INFO +1 -1
- {voly-0.0.144 → voly-0.0.146}/LICENSE +0 -0
- {voly-0.0.144 → voly-0.0.146}/README.md +0 -0
- {voly-0.0.144 → voly-0.0.146}/setup.cfg +0 -0
- {voly-0.0.144 → voly-0.0.146}/setup.py +0 -0
- {voly-0.0.144 → voly-0.0.146}/src/voly/__init__.py +0 -0
- {voly-0.0.144 → voly-0.0.146}/src/voly/client.py +0 -0
- {voly-0.0.144 → voly-0.0.146}/src/voly/core/__init__.py +0 -0
- {voly-0.0.144 → voly-0.0.146}/src/voly/core/charts.py +0 -0
- {voly-0.0.144 → voly-0.0.146}/src/voly/core/data.py +0 -0
- {voly-0.0.144 → voly-0.0.146}/src/voly/core/fit.py +0 -0
- {voly-0.0.144 → voly-0.0.146}/src/voly/core/interpolate.py +0 -0
- {voly-0.0.144 → voly-0.0.146}/src/voly/core/rnd.py +0 -0
- {voly-0.0.144 → voly-0.0.146}/src/voly/exceptions.py +0 -0
- {voly-0.0.144 → voly-0.0.146}/src/voly/formulas.py +0 -0
- {voly-0.0.144 → voly-0.0.146}/src/voly/models.py +0 -0
- {voly-0.0.144 → voly-0.0.146}/src/voly/utils/__init__.py +0 -0
- {voly-0.0.144 → voly-0.0.146}/src/voly/utils/logger.py +0 -0
- {voly-0.0.144 → voly-0.0.146}/src/voly.egg-info/SOURCES.txt +0 -0
- {voly-0.0.144 → voly-0.0.146}/src/voly.egg-info/dependency_links.txt +0 -0
- {voly-0.0.144 → voly-0.0.146}/src/voly.egg-info/requires.txt +0 -0
- {voly-0.0.144 → voly-0.0.146}/src/voly.egg-info/top_level.txt +0 -0
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "voly"
|
|
7
|
-
version = "0.0.
|
|
7
|
+
version = "0.0.146"
|
|
8
8
|
description = "Options & volatility research package"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
authors = [
|
|
@@ -60,7 +60,7 @@ line_length = 100
|
|
|
60
60
|
multi_line_output = 3
|
|
61
61
|
|
|
62
62
|
[tool.mypy]
|
|
63
|
-
python_version = "0.0.
|
|
63
|
+
python_version = "0.0.146"
|
|
64
64
|
warn_return_any = true
|
|
65
65
|
warn_unused_configs = true
|
|
66
66
|
disallow_untyped_defs = true
|
|
@@ -16,11 +16,13 @@ from voly.formulas import iv, get_domain
|
|
|
16
16
|
from voly.models import SVIModel
|
|
17
17
|
from voly.core.fit import fit_model
|
|
18
18
|
from arch import arch_model
|
|
19
|
-
from arch.univariate import GARCH, EGARCH
|
|
20
19
|
|
|
21
20
|
|
|
22
21
|
@catch_exception
|
|
23
|
-
def get_historical_data(currency
|
|
22
|
+
def get_historical_data(currency: str,
|
|
23
|
+
lookback_days: str,
|
|
24
|
+
granularity: str,
|
|
25
|
+
exchange_name: str) -> pd.DataFrame:
|
|
24
26
|
"""
|
|
25
27
|
Fetch historical OHLCV data for a cryptocurrency.
|
|
26
28
|
|
|
@@ -37,9 +39,8 @@ def get_historical_data(currency, lookback_days, granularity, exchange_name):
|
|
|
37
39
|
|
|
38
40
|
Returns:
|
|
39
41
|
-------
|
|
40
|
-
|
|
42
|
+
pd.DataFrame: Historical price data with OHLCV columns.
|
|
41
43
|
"""
|
|
42
|
-
|
|
43
44
|
try:
|
|
44
45
|
# Get the exchange class from ccxt
|
|
45
46
|
exchange_class = getattr(ccxt, exchange_name.lower())
|
|
@@ -61,11 +62,15 @@ def get_historical_data(currency, lookback_days, granularity, exchange_name):
|
|
|
61
62
|
ohlcv_list = []
|
|
62
63
|
ohlcv = exchange.fetch_ohlcv(symbol, granularity, since=from_ts, limit=1000)
|
|
63
64
|
ohlcv_list.append(ohlcv)
|
|
64
|
-
|
|
65
|
+
|
|
66
|
+
# Fetch all available data within the lookback period
|
|
67
|
+
while len(ohlcv) == 1000:
|
|
65
68
|
from_ts = ohlcv[-1][0]
|
|
66
69
|
new_ohlcv = exchange.fetch_ohlcv(symbol, granularity, since=from_ts, limit=1000)
|
|
67
|
-
|
|
68
|
-
|
|
70
|
+
if len(new_ohlcv) <= 1:
|
|
71
|
+
break
|
|
72
|
+
ohlcv.extend(new_ohlcv[1:]) # Skip first element to avoid duplication
|
|
73
|
+
if len(new_ohlcv) < 1000:
|
|
69
74
|
break
|
|
70
75
|
|
|
71
76
|
# Convert to DataFrame
|
|
@@ -74,13 +79,13 @@ def get_historical_data(currency, lookback_days, granularity, exchange_name):
|
|
|
74
79
|
df_hist.set_index('date', inplace=True)
|
|
75
80
|
df_hist = df_hist.sort_index(ascending=True)
|
|
76
81
|
|
|
77
|
-
|
|
82
|
+
logger.info(f"Data fetched successfully: {len(df_hist)} rows from {df_hist.index[0]} to {df_hist.index[-1]}")
|
|
78
83
|
|
|
79
84
|
return df_hist
|
|
80
85
|
|
|
81
86
|
|
|
82
87
|
@catch_exception
|
|
83
|
-
def parse_window_length(window_length, df_hist):
|
|
88
|
+
def parse_window_length(window_length: str, df_hist: pd.DataFrame) -> int:
|
|
84
89
|
"""
|
|
85
90
|
Parse window length from string format (e.g., '7d', '30d') to number of data points.
|
|
86
91
|
|
|
@@ -96,7 +101,7 @@ def parse_window_length(window_length, df_hist):
|
|
|
96
101
|
int
|
|
97
102
|
Number of data points corresponding to the window length.
|
|
98
103
|
"""
|
|
99
|
-
if not window_length.endswith('d'):
|
|
104
|
+
if not isinstance(window_length, str) or not window_length.endswith('d'):
|
|
100
105
|
raise VolyError("window_length should be in format '7d', '30d', etc.")
|
|
101
106
|
|
|
102
107
|
# Extract number of days
|
|
@@ -115,21 +120,34 @@ def parse_window_length(window_length, df_hist):
|
|
|
115
120
|
|
|
116
121
|
|
|
117
122
|
@catch_exception
|
|
118
|
-
def fit_volatility_model(log_returns
|
|
119
|
-
|
|
123
|
+
def fit_volatility_model(log_returns: np.ndarray,
|
|
124
|
+
df_hist: pd.DataFrame,
|
|
125
|
+
model_type: str = 'garch',
|
|
126
|
+
distribution: str = 'normal',
|
|
127
|
+
window_length: str = '30d',
|
|
128
|
+
n_fits: int = 400) -> Dict[str, Any]:
|
|
120
129
|
"""
|
|
121
130
|
Fit a volatility model (GARCH or EGARCH) to log returns.
|
|
122
131
|
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
132
|
+
Parameters:
|
|
133
|
+
-----------
|
|
134
|
+
log_returns : np.ndarray
|
|
135
|
+
Array of log returns
|
|
136
|
+
df_hist : pd.DataFrame
|
|
137
|
+
DataFrame with historical price data
|
|
138
|
+
model_type : str
|
|
139
|
+
Type of volatility model ('garch' or 'egarch')
|
|
140
|
+
distribution : str
|
|
141
|
+
Distribution type ('normal', 'studentst', or 'skewstudent')
|
|
142
|
+
window_length : str
|
|
143
|
+
Length of each window as a string (e.g., '30d')
|
|
144
|
+
n_fits : int
|
|
145
|
+
Number of sliding windows
|
|
130
146
|
|
|
131
147
|
Returns:
|
|
132
|
-
|
|
148
|
+
--------
|
|
149
|
+
Dict[str, Any]
|
|
150
|
+
Dictionary with model parameters and processes
|
|
133
151
|
"""
|
|
134
152
|
# Parse window length
|
|
135
153
|
window_points = parse_window_length(window_length, df_hist)
|
|
@@ -137,36 +155,38 @@ def fit_volatility_model(log_returns, df_hist, model_type='garch', distribution=
|
|
|
137
155
|
if len(log_returns) < window_points + n_fits:
|
|
138
156
|
raise VolyError(f"Not enough data points. Need at least {window_points + n_fits}, got {len(log_returns)}")
|
|
139
157
|
|
|
140
|
-
# Adjust window sizes if necessary
|
|
141
|
-
n_fits = min(n_fits, len(log_returns) // 3)
|
|
142
|
-
window_points = min(window_points, len(log_returns) // 3)
|
|
158
|
+
# Adjust window sizes if necessary to avoid over-fitting
|
|
159
|
+
n_fits = min(n_fits, max(100, len(log_returns) // 3))
|
|
160
|
+
window_points = min(window_points, max(20, len(log_returns) // 3))
|
|
143
161
|
|
|
144
162
|
start = window_points + n_fits
|
|
145
163
|
end = n_fits
|
|
146
164
|
|
|
147
165
|
# Different number of parameters based on model type and distribution
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
n_params = 4 # mu, omega, alpha, beta
|
|
151
|
-
elif distribution.lower() == 'studentst':
|
|
152
|
-
n_params = 5 # mu, omega, alpha, beta, nu
|
|
153
|
-
else: # skewstudent
|
|
154
|
-
n_params = 6 # mu, omega, alpha, beta, nu, lambda (skew)
|
|
155
|
-
else: # egarch
|
|
156
|
-
if distribution.lower() == 'normal':
|
|
157
|
-
n_params = 5 # mu, omega, alpha, gamma, beta
|
|
158
|
-
elif distribution.lower() == 'studentst':
|
|
159
|
-
n_params = 6 # mu, omega, alpha, gamma, beta, nu
|
|
160
|
-
else: # skewstudent
|
|
161
|
-
n_params = 7 # mu, omega, alpha, gamma, beta, nu, lambda (skew)
|
|
166
|
+
param_names = get_param_names(model_type, distribution)
|
|
167
|
+
n_params = len(param_names)
|
|
162
168
|
|
|
163
169
|
parameters = np.zeros((n_fits, n_params))
|
|
164
170
|
z_process = []
|
|
165
171
|
|
|
166
|
-
logger.info(
|
|
172
|
+
logger.info(
|
|
173
|
+
f"Fitting {model_type.upper()} model with {distribution} distribution using {n_fits} windows of {window_length}...")
|
|
167
174
|
|
|
168
175
|
for i in range(n_fits):
|
|
176
|
+
if i % (n_fits // 10) == 0:
|
|
177
|
+
logger.info(f"Fitting progress: {i}/{n_fits}")
|
|
178
|
+
|
|
179
|
+
# Skip if we don't have enough data
|
|
180
|
+
if end - i - 1 < 0 or start - i - 1 > len(log_returns):
|
|
181
|
+
continue
|
|
182
|
+
|
|
169
183
|
window = log_returns[end - i - 1:start - i - 1]
|
|
184
|
+
|
|
185
|
+
# Skip windows that are too small or have invalid data
|
|
186
|
+
if len(window) < 10 or np.isnan(window).any() or np.isinf(window).any():
|
|
187
|
+
continue
|
|
188
|
+
|
|
189
|
+
# Mean-center the data to improve numerical stability
|
|
170
190
|
data = window - np.mean(window)
|
|
171
191
|
|
|
172
192
|
try:
|
|
@@ -176,101 +196,102 @@ def fit_volatility_model(log_returns, df_hist, model_type='garch', distribution=
|
|
|
176
196
|
else: # egarch
|
|
177
197
|
model = arch_model(data, vol='EGARCH', p=1, o=1, q=1, dist=distribution.lower())
|
|
178
198
|
|
|
179
|
-
fit_result = model.fit(disp='off')
|
|
199
|
+
fit_result = model.fit(disp='off', options={'maxiter': 1000})
|
|
180
200
|
|
|
181
201
|
# Extract parameters based on model type and distribution
|
|
182
202
|
params_dict = fit_result.params.to_dict()
|
|
183
203
|
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
if distribution.lower() == 'normal':
|
|
191
|
-
parameters[i, :] = [mu, omega, alpha, beta]
|
|
192
|
-
elif distribution.lower() == 'studentst':
|
|
193
|
-
nu = params_dict.get("nu", 0)
|
|
194
|
-
parameters[i, :] = [mu, omega, alpha, beta, nu]
|
|
195
|
-
else: # skewstudent
|
|
196
|
-
nu = params_dict.get("nu", 0)
|
|
197
|
-
lam = params_dict.get("lambda", 0)
|
|
198
|
-
parameters[i, :] = [mu, omega, alpha, beta, nu, lam]
|
|
199
|
-
else: # egarch
|
|
200
|
-
mu = params_dict.get("mu", 0)
|
|
201
|
-
omega = params_dict.get("omega", 0)
|
|
202
|
-
alpha = params_dict.get("alpha[1]", 0)
|
|
203
|
-
gamma = params_dict.get("gamma[1]", 0)
|
|
204
|
-
beta = params_dict.get("beta[1]", 0)
|
|
205
|
-
|
|
206
|
-
if distribution.lower() == 'normal':
|
|
207
|
-
parameters[i, :] = [mu, omega, alpha, gamma, beta]
|
|
208
|
-
elif distribution.lower() == 'studentst':
|
|
209
|
-
nu = params_dict.get("nu", 0)
|
|
210
|
-
parameters[i, :] = [mu, omega, alpha, gamma, beta, nu]
|
|
211
|
-
else: # skewstudent
|
|
212
|
-
nu = params_dict.get("nu", 0)
|
|
213
|
-
lam = params_dict.get("lambda", 0)
|
|
214
|
-
parameters[i, :] = [mu, omega, alpha, gamma, beta, nu, lam]
|
|
215
|
-
|
|
216
|
-
# Get last innovation
|
|
204
|
+
# Extract parameter values in correct order
|
|
205
|
+
param_values = [params_dict.get(param, 0) for param in param_names]
|
|
206
|
+
parameters[i, :] = param_values
|
|
207
|
+
|
|
208
|
+
# Get last innovation (standardized residual)
|
|
217
209
|
residuals = fit_result.resid
|
|
218
210
|
conditional_vol = fit_result.conditional_volatility
|
|
219
|
-
|
|
220
|
-
|
|
211
|
+
|
|
212
|
+
if len(residuals) > 0 and len(conditional_vol) > 0:
|
|
213
|
+
z_t = residuals[-1] / conditional_vol[-1]
|
|
214
|
+
if not np.isnan(z_t) and not np.isinf(z_t):
|
|
215
|
+
z_process.append(z_t)
|
|
221
216
|
|
|
222
217
|
except Exception as e:
|
|
223
218
|
logger.warning(f"Model fit failed for window {i}: {str(e)}")
|
|
224
219
|
|
|
225
220
|
# Clean up any failed fits
|
|
226
221
|
if len(z_process) < n_fits / 2:
|
|
227
|
-
raise VolyError("Too many model fits failed. Check your data.")
|
|
222
|
+
raise VolyError(f"Too many model fits failed ({len(z_process)}/{n_fits}). Check your data.")
|
|
223
|
+
|
|
224
|
+
# Filter out rows with zeros (failed fits)
|
|
225
|
+
valid_rows = ~np.all(parameters == 0, axis=1)
|
|
226
|
+
parameters = parameters[valid_rows]
|
|
228
227
|
|
|
228
|
+
# Calculate average parameters and standard deviations
|
|
229
229
|
avg_params = np.mean(parameters, axis=0)
|
|
230
230
|
std_params = np.std(parameters, axis=0)
|
|
231
231
|
|
|
232
232
|
return {
|
|
233
|
+
'model_type': model_type,
|
|
234
|
+
'distribution': distribution,
|
|
233
235
|
'parameters': parameters,
|
|
234
236
|
'avg_params': avg_params,
|
|
235
237
|
'std_params': std_params,
|
|
236
238
|
'z_process': np.array(z_process),
|
|
237
|
-
'
|
|
238
|
-
'distribution': distribution,
|
|
239
|
-
'param_names': get_param_names(model_type, distribution)
|
|
239
|
+
'param_names': param_names
|
|
240
240
|
}
|
|
241
241
|
|
|
242
242
|
|
|
243
|
-
def get_param_names(model_type, distribution):
|
|
244
|
-
"""
|
|
243
|
+
def get_param_names(model_type: str, distribution: str) -> List[str]:
|
|
244
|
+
"""
|
|
245
|
+
Get parameter names based on model type and distribution.
|
|
246
|
+
|
|
247
|
+
Parameters:
|
|
248
|
+
-----------
|
|
249
|
+
model_type : str
|
|
250
|
+
Type of volatility model ('garch' or 'egarch')
|
|
251
|
+
distribution : str
|
|
252
|
+
Distribution type ('normal', 'studentst', or 'skewstudent')
|
|
253
|
+
|
|
254
|
+
Returns:
|
|
255
|
+
--------
|
|
256
|
+
List[str]
|
|
257
|
+
List of parameter names
|
|
258
|
+
"""
|
|
245
259
|
if model_type.lower() == 'garch':
|
|
246
260
|
if distribution.lower() == 'normal':
|
|
247
|
-
return ['mu', 'omega', 'alpha', 'beta']
|
|
261
|
+
return ['mu', 'omega', 'alpha[1]', 'beta[1]']
|
|
248
262
|
elif distribution.lower() == 'studentst':
|
|
249
|
-
return ['mu', 'omega', 'alpha', 'beta', 'nu']
|
|
263
|
+
return ['mu', 'omega', 'alpha[1]', 'beta[1]', 'nu']
|
|
250
264
|
else: # skewstudent
|
|
251
|
-
return ['mu', 'omega', 'alpha', 'beta', 'nu', 'lambda']
|
|
265
|
+
return ['mu', 'omega', 'alpha[1]', 'beta[1]', 'nu', 'lambda']
|
|
252
266
|
else: # egarch
|
|
253
267
|
if distribution.lower() == 'normal':
|
|
254
|
-
return ['mu', 'omega', 'alpha', 'gamma', 'beta']
|
|
268
|
+
return ['mu', 'omega', 'alpha[1]', 'gamma[1]', 'beta[1]']
|
|
255
269
|
elif distribution.lower() == 'studentst':
|
|
256
|
-
return ['mu', 'omega', 'alpha', 'gamma', 'beta', 'nu']
|
|
270
|
+
return ['mu', 'omega', 'alpha[1]', 'gamma[1]', 'beta[1]', 'nu']
|
|
257
271
|
else: # skewstudent
|
|
258
|
-
return ['mu', 'omega', 'alpha', 'gamma', 'beta', 'nu', 'lambda']
|
|
272
|
+
return ['mu', 'omega', 'alpha[1]', 'gamma[1]', 'beta[1]', 'nu', 'lambda']
|
|
259
273
|
|
|
260
274
|
|
|
261
275
|
@catch_exception
|
|
262
|
-
def simulate_volatility_paths(vol_model
|
|
276
|
+
def simulate_volatility_paths(vol_model: Dict[str, Any],
|
|
277
|
+
horizon: int,
|
|
278
|
+
simulations: int = 5000) -> Tuple[np.ndarray, float]:
|
|
263
279
|
"""
|
|
264
280
|
Simulate future paths using a fitted volatility model.
|
|
265
281
|
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
282
|
+
Parameters:
|
|
283
|
+
-----------
|
|
284
|
+
vol_model : Dict[str, Any]
|
|
285
|
+
Dict with volatility model parameters
|
|
286
|
+
horizon : int
|
|
287
|
+
Number of steps to simulate
|
|
288
|
+
simulations : int
|
|
289
|
+
Number of paths to simulate
|
|
271
290
|
|
|
272
291
|
Returns:
|
|
273
|
-
|
|
292
|
+
--------
|
|
293
|
+
Tuple[np.ndarray, float]
|
|
294
|
+
Simulated returns and drift
|
|
274
295
|
"""
|
|
275
296
|
parameters = vol_model['parameters']
|
|
276
297
|
z_process = vol_model['z_process']
|
|
@@ -282,18 +303,21 @@ def simulate_volatility_paths(vol_model, horizon, simulations=5000, variate_para
|
|
|
282
303
|
pars = vol_model['avg_params'].copy()
|
|
283
304
|
bounds = vol_model['std_params'].copy()
|
|
284
305
|
|
|
285
|
-
#
|
|
286
|
-
|
|
306
|
+
# Create dictionary for easier parameter access
|
|
307
|
+
param_dict = {name: value for name, value in zip(param_names, pars)}
|
|
308
|
+
|
|
309
|
+
# Log parameters in a structured way
|
|
310
|
+
param_str = ", ".join([f"{name}={param_dict.get(name, 0):.6f}" for name in param_names])
|
|
287
311
|
logger.info(f"{model_type.upper()} parameters: {param_str}")
|
|
288
312
|
|
|
289
|
-
# Create
|
|
313
|
+
# Create sampling function based on distribution
|
|
290
314
|
if distribution.lower() == 'normal':
|
|
291
315
|
# Use standard normal for normal distribution
|
|
292
316
|
def sample_innovation(size=1):
|
|
293
317
|
return np.random.normal(0, 1, size=size)
|
|
294
318
|
else:
|
|
295
319
|
# Use KDE for non-normal distributions to capture empirical distribution
|
|
296
|
-
kde = stats.gaussian_kde(z_process, bw_method='silverman') # original code
|
|
320
|
+
kde = stats.gaussian_kde(z_process, bw_method='silverman') # original code didnt have bw_method
|
|
297
321
|
z_range = np.linspace(min(z_process), max(z_process), 1000)
|
|
298
322
|
z_prob = kde(z_range)
|
|
299
323
|
z_prob = z_prob / np.sum(z_prob)
|
|
@@ -303,65 +327,65 @@ def simulate_volatility_paths(vol_model, horizon, simulations=5000, variate_para
|
|
|
303
327
|
|
|
304
328
|
# Simulate paths
|
|
305
329
|
simulated_returns = np.zeros(simulations)
|
|
330
|
+
mu = param_dict.get('mu', 0)
|
|
306
331
|
|
|
307
332
|
for i in range(simulations):
|
|
308
333
|
if (i + 1) % (simulations // 10) == 0:
|
|
309
334
|
logger.info(f"Simulation progress: {i + 1}/{simulations}")
|
|
310
335
|
|
|
311
|
-
# Optionally vary parameters
|
|
312
|
-
if
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
#
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
336
|
+
# Optionally vary parameters between simulations
|
|
337
|
+
if (i + 1) % (simulations // 20) == 0:
|
|
338
|
+
# Create parameter variations based on their estimated distribution
|
|
339
|
+
sim_params = {}
|
|
340
|
+
for j, (name, par, bound) in enumerate(zip(param_names, pars, bounds)):
|
|
341
|
+
var = bound ** 2 / max(len(parameters), 1)
|
|
342
|
+
# Generate new parameter from normal distribution around the mean
|
|
343
|
+
new_par = np.random.normal(par, np.sqrt(var))
|
|
344
|
+
|
|
345
|
+
# Apply constraints to ensure valid parameters
|
|
346
|
+
if name == 'omega':
|
|
347
|
+
new_par = max(new_par, 1e-6) # Must be positive
|
|
348
|
+
elif name in ['alpha[1]', 'beta[1]']:
|
|
349
|
+
new_par = max(min(new_par, 0.999), 0.001) # Between 0 and 1
|
|
350
|
+
elif name == 'nu':
|
|
351
|
+
new_par = max(new_par, 2.1) # Degrees of freedom > 2
|
|
352
|
+
|
|
353
|
+
sim_params[name] = new_par
|
|
322
354
|
else:
|
|
323
|
-
|
|
355
|
+
sim_params = param_dict.copy()
|
|
324
356
|
|
|
325
|
-
# Initialize
|
|
357
|
+
# Initialize volatility based on model type
|
|
326
358
|
if model_type.lower() == 'garch':
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
else: # skewstudent
|
|
334
|
-
mu, omega, alpha, beta, nu, lam = sim_pars
|
|
335
|
-
sigma2 = omega / (1 - alpha - beta)
|
|
359
|
+
omega = sim_params.get('omega', 0)
|
|
360
|
+
alpha = sim_params.get('alpha[1]', 0)
|
|
361
|
+
beta = sim_params.get('beta[1]', 0)
|
|
362
|
+
|
|
363
|
+
# Initialize GARCH volatility (unconditional variance)
|
|
364
|
+
sigma2 = omega / (1 - alpha - beta) if alpha + beta < 1 else omega / 0.99
|
|
336
365
|
else: # egarch
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
log_sigma2 = omega / (1 - beta)
|
|
344
|
-
sigma2 = np.exp(log_sigma2)
|
|
345
|
-
else: # skewstudent
|
|
346
|
-
mu, omega, alpha, gamma, beta, nu, lam = sim_pars
|
|
347
|
-
log_sigma2 = omega / (1 - beta)
|
|
348
|
-
sigma2 = np.exp(log_sigma2)
|
|
366
|
+
omega = sim_params.get('omega', 0)
|
|
367
|
+
beta = sim_params.get('beta[1]', 0)
|
|
368
|
+
|
|
369
|
+
# Initialize EGARCH volatility
|
|
370
|
+
log_sigma2 = omega / (1 - beta) if beta < 1 else omega / 0.99
|
|
371
|
+
sigma2 = np.exp(log_sigma2)
|
|
349
372
|
|
|
350
373
|
returns_sum = 0
|
|
351
374
|
|
|
352
|
-
# Simulate path
|
|
375
|
+
# Simulate path step by step
|
|
353
376
|
for _ in range(horizon):
|
|
354
|
-
# Sample innovation
|
|
377
|
+
# Sample a random innovation
|
|
355
378
|
z = sample_innovation()
|
|
356
379
|
|
|
357
|
-
# Update
|
|
380
|
+
# Update returns and volatility based on model type
|
|
358
381
|
if model_type.lower() == 'garch':
|
|
359
382
|
# Calculate return
|
|
360
383
|
e = z * np.sqrt(sigma2)
|
|
361
384
|
returns_sum += e + mu
|
|
362
385
|
|
|
363
386
|
# Update GARCH volatility
|
|
364
|
-
sigma2 = omega + alpha * e ** 2 + beta
|
|
387
|
+
sigma2 = sim_params.get('omega', 0) + sim_params.get('alpha[1]', 0) * e ** 2 + sim_params.get('beta[1]',
|
|
388
|
+
0) * sigma2
|
|
365
389
|
else: # egarch
|
|
366
390
|
# Calculate return
|
|
367
391
|
e = z * np.sqrt(sigma2)
|
|
@@ -369,6 +393,12 @@ def simulate_volatility_paths(vol_model, horizon, simulations=5000, variate_para
|
|
|
369
393
|
|
|
370
394
|
# Update EGARCH volatility
|
|
371
395
|
abs_z = abs(z)
|
|
396
|
+
gamma = sim_params.get('gamma[1]', 0)
|
|
397
|
+
alpha = sim_params.get('alpha[1]', 0)
|
|
398
|
+
beta = sim_params.get('beta[1]', 0)
|
|
399
|
+
omega = sim_params.get('omega', 0)
|
|
400
|
+
|
|
401
|
+
# EGARCH update equation
|
|
372
402
|
log_sigma2 = omega + beta * log_sigma2 + alpha * (abs_z - np.sqrt(2 / np.pi)) + gamma * z
|
|
373
403
|
sigma2 = np.exp(log_sigma2)
|
|
374
404
|
|
|
@@ -381,89 +411,95 @@ def get_hd_surface(model_results: pd.DataFrame,
|
|
|
381
411
|
df_hist: pd.DataFrame,
|
|
382
412
|
domain_params: Tuple[float, float, int] = (-1.5, 1.5, 1000),
|
|
383
413
|
return_domain: str = 'log_moneyness',
|
|
384
|
-
method: str = '
|
|
385
|
-
model_type: str = 'garch',
|
|
414
|
+
method: str = 'garch',
|
|
386
415
|
distribution: str = 'normal',
|
|
387
|
-
|
|
416
|
+
window_length: str = '30d',
|
|
417
|
+
n_fits: int = 400,
|
|
418
|
+
simulations: int = 5000,
|
|
419
|
+
bandwidth: str = 'silverman') -> Dict[str, Any]:
|
|
388
420
|
"""
|
|
389
421
|
Generate historical density surface from historical price data.
|
|
390
422
|
|
|
391
423
|
Parameters:
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
424
|
+
-----------
|
|
425
|
+
model_results : pd.DataFrame
|
|
426
|
+
DataFrame with model parameters and maturities
|
|
427
|
+
df_hist : pd.DataFrame
|
|
428
|
+
DataFrame with historical price data
|
|
429
|
+
domain_params : Tuple[float, float, int]
|
|
430
|
+
Tuple of (min, max, num_points) for x-domain
|
|
431
|
+
return_domain : str
|
|
432
|
+
Domain for x-axis values ('log_moneyness', 'moneyness', 'returns', 'strikes')
|
|
433
|
+
method : str
|
|
434
|
+
Method to use for HD estimation:
|
|
435
|
+
- 'garch': GARCH(1,1) model
|
|
436
|
+
- 'egarch': EGARCH(1,1,1) model with asymmetry
|
|
437
|
+
- 'basic': Simple histogram/KDE of historical returns
|
|
438
|
+
distribution : str
|
|
439
|
+
Distribution to use for volatility models ('normal', 'studentst', or 'skewstudent')
|
|
440
|
+
window_length : str
|
|
441
|
+
Length of sliding windows as string (e.g., '30d')
|
|
442
|
+
n_fits : int
|
|
443
|
+
Number of sliding windows for volatility model fitting
|
|
444
|
+
simulations : int
|
|
445
|
+
Number of Monte Carlo simulations for volatility models
|
|
446
|
+
bandwidth : str
|
|
447
|
+
KDE bandwidth method (default: 'silverman')
|
|
408
448
|
|
|
409
449
|
Returns:
|
|
450
|
+
--------
|
|
451
|
+
Dict[str, Any]
|
|
410
452
|
Dictionary containing pdf_surface, cdf_surface, x_surface, and moments
|
|
411
453
|
"""
|
|
412
|
-
#
|
|
454
|
+
# Validate inputs
|
|
413
455
|
required_columns = ['s', 't', 'r']
|
|
414
456
|
missing_columns = [col for col in required_columns if col not in model_results.columns]
|
|
415
457
|
if missing_columns:
|
|
416
458
|
raise VolyError(f"Required columns missing in model_results: {missing_columns}")
|
|
417
459
|
|
|
460
|
+
if len(df_hist) < 2:
|
|
461
|
+
raise VolyError("Not enough data points in df_hist")
|
|
462
|
+
|
|
418
463
|
# Determine granularity from df_hist
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
minutes_diff = (df_hist.index[1] - df_hist.index[0]).total_seconds() / 60
|
|
422
|
-
minutes_per_period = int(minutes_diff)
|
|
423
|
-
else:
|
|
424
|
-
raise VolyError("Cannot determine granularity from df_hist.")
|
|
464
|
+
minutes_diff = (df_hist.index[1] - df_hist.index[0]).total_seconds() / 60
|
|
465
|
+
minutes_per_period = max(1, int(minutes_diff))
|
|
425
466
|
|
|
426
|
-
# Validate
|
|
427
|
-
|
|
467
|
+
# Validate method and model parameters
|
|
468
|
+
valid_methods = ['garch', 'egarch', 'basic']
|
|
428
469
|
valid_distributions = ['normal', 'studentst', 'skewstudent']
|
|
429
470
|
|
|
430
|
-
|
|
431
|
-
|
|
471
|
+
method = method.lower()
|
|
472
|
+
distribution = distribution.lower()
|
|
432
473
|
|
|
433
|
-
if
|
|
434
|
-
raise VolyError(f"Invalid
|
|
474
|
+
if method not in valid_methods:
|
|
475
|
+
raise VolyError(f"Invalid method: {method}. Must be one of {valid_methods}")
|
|
435
476
|
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
n_fits = kwargs.get('n_fits', 400)
|
|
439
|
-
simulations = kwargs.get('simulations', 5000)
|
|
440
|
-
window_length = kwargs.get('window_length', '30d')
|
|
441
|
-
variate_parameters = kwargs.get('variate_parameters', True)
|
|
442
|
-
bandwidth = kwargs.get('bandwidth', 'silverman')
|
|
443
|
-
logger.info(
|
|
444
|
-
f"Using {model_type.upper()} method with {distribution} distribution, {n_fits} fits, {simulations} simulations")
|
|
445
|
-
elif method == 'hist_returns':
|
|
446
|
-
bandwidth = kwargs.get('bandwidth', 'silverman')
|
|
447
|
-
logger.info(f"Using returns-based KDE method with bandwidth {bandwidth}")
|
|
448
|
-
else:
|
|
449
|
-
raise VolyError(f"Unknown method: {method}. Use 'hist_returns', 'arch_returns'.")
|
|
477
|
+
if method in ['garch', 'egarch'] and distribution not in valid_distributions:
|
|
478
|
+
raise VolyError(f"Invalid distribution: {distribution}. Must be one of {valid_distributions}")
|
|
450
479
|
|
|
451
480
|
# Calculate log returns from price history
|
|
452
481
|
log_returns = np.log(df_hist['close'] / df_hist['close'].shift(1)) * 100
|
|
453
482
|
log_returns = log_returns.dropna().values
|
|
454
483
|
|
|
455
|
-
# Fit volatility model
|
|
484
|
+
# Fit volatility model if using GARCH or EGARCH
|
|
456
485
|
vol_model = None
|
|
457
|
-
if method
|
|
486
|
+
if method in ['garch', 'egarch']:
|
|
487
|
+
model_type = method # Use method as model_type
|
|
488
|
+
logger.info(
|
|
489
|
+
f"Using {model_type.upper()} with {distribution} distribution, {n_fits} fits, {simulations} simulations")
|
|
490
|
+
|
|
458
491
|
vol_model = fit_volatility_model(
|
|
459
|
-
log_returns,
|
|
460
|
-
df_hist,
|
|
492
|
+
log_returns=log_returns,
|
|
493
|
+
df_hist=df_hist,
|
|
461
494
|
model_type=model_type,
|
|
462
495
|
distribution=distribution,
|
|
463
496
|
window_length=window_length,
|
|
464
497
|
n_fits=n_fits
|
|
465
498
|
)
|
|
499
|
+
elif method == 'basic':
|
|
500
|
+
logger.info(f"Using basic returns-based KDE method with bandwidth {bandwidth}")
|
|
466
501
|
|
|
502
|
+
# Initialize result containers
|
|
467
503
|
pdf_surface = {}
|
|
468
504
|
cdf_surface = {}
|
|
469
505
|
x_surface = {}
|
|
@@ -488,8 +524,8 @@ def get_hd_surface(model_results: pd.DataFrame,
|
|
|
488
524
|
|
|
489
525
|
logger.info(f"Processing HD for maturity {i} (t={t:.4f} years, {tau_days_float:.2f} days)")
|
|
490
526
|
|
|
491
|
-
if method == '
|
|
492
|
-
#
|
|
527
|
+
if method == 'basic':
|
|
528
|
+
# Simple returns-based method
|
|
493
529
|
# Filter historical data for this maturity's lookback period
|
|
494
530
|
start_date = pd.Timestamp.now() - pd.Timedelta(days=int(t * 365.25))
|
|
495
531
|
maturity_hist = df_hist[df_hist.index >= start_date].copy()
|
|
@@ -519,7 +555,16 @@ def get_hd_surface(model_results: pd.DataFrame,
|
|
|
519
555
|
f = stats.gaussian_kde(adj_returns, bw_method=bandwidth)
|
|
520
556
|
pdf_values = f(LM)
|
|
521
557
|
|
|
522
|
-
|
|
558
|
+
# Transform according to return domain
|
|
559
|
+
pdf_lm = pdf_values
|
|
560
|
+
pdf_m = pdf_lm / M
|
|
561
|
+
pdf_k = pdf_lm / K
|
|
562
|
+
pdf_r = pdf_lm / (1 + R)
|
|
563
|
+
|
|
564
|
+
# No model parameters to include
|
|
565
|
+
model_params = None
|
|
566
|
+
|
|
567
|
+
elif method in ['garch', 'egarch']:
|
|
523
568
|
# Volatility model-based method
|
|
524
569
|
if vol_model is None:
|
|
525
570
|
logger.warning(f"Volatility model fitting failed, skipping maturity {i}")
|
|
@@ -530,8 +575,7 @@ def get_hd_surface(model_results: pd.DataFrame,
|
|
|
530
575
|
simulated_returns, simulated_mu = simulate_volatility_paths(
|
|
531
576
|
vol_model,
|
|
532
577
|
horizon,
|
|
533
|
-
simulations
|
|
534
|
-
variate_parameters
|
|
578
|
+
simulations
|
|
535
579
|
)
|
|
536
580
|
|
|
537
581
|
# Scale the simulated returns to match target time horizon
|
|
@@ -548,25 +592,31 @@ def get_hd_surface(model_results: pd.DataFrame,
|
|
|
548
592
|
# Convert to terminal prices
|
|
549
593
|
simulated_prices = s * np.exp(risk_neutral_returns / 100)
|
|
550
594
|
|
|
551
|
-
# Convert to moneyness domain
|
|
595
|
+
# Convert to moneyness domain (x-domain)
|
|
552
596
|
simulated_moneyness = s / simulated_prices
|
|
553
597
|
|
|
554
598
|
# Perform KDE to get PDF
|
|
555
599
|
kde = stats.gaussian_kde(simulated_moneyness, bw_method=bandwidth)
|
|
556
600
|
pdf_values = kde(M)
|
|
557
601
|
|
|
602
|
+
# Transform according to return domain
|
|
603
|
+
pdf_m = pdf_values
|
|
604
|
+
pdf_lm = pdf_m * M
|
|
605
|
+
pdf_k = pdf_lm / K
|
|
606
|
+
pdf_r = pdf_lm / (1 + R)
|
|
607
|
+
|
|
558
608
|
# Include volatility model params in moments
|
|
559
609
|
avg_params = vol_model['avg_params']
|
|
560
610
|
param_names = vol_model['param_names']
|
|
561
|
-
model_params = {name: value for name, value in zip(param_names, avg_params)}
|
|
562
|
-
model_params['model_type'] =
|
|
611
|
+
model_params = {name.replace('[1]', ''): value for name, value in zip(param_names, avg_params)}
|
|
612
|
+
model_params['model_type'] = method
|
|
563
613
|
model_params['distribution'] = distribution
|
|
564
614
|
|
|
565
|
-
# Add persistence for GARCH
|
|
566
|
-
if
|
|
615
|
+
# Add persistence for GARCH models
|
|
616
|
+
if method == 'garch':
|
|
567
617
|
model_params['persistence'] = model_params.get('alpha', 0) + model_params.get('beta', 0)
|
|
568
618
|
else:
|
|
569
|
-
continue # Skip
|
|
619
|
+
continue # Skip if invalid method
|
|
570
620
|
|
|
571
621
|
# Ensure density integrates to 1
|
|
572
622
|
dx = LM[1] - LM[0]
|
|
@@ -577,41 +627,27 @@ def get_hd_surface(model_results: pd.DataFrame,
|
|
|
577
627
|
|
|
578
628
|
pdf_values = pdf_values / total_area
|
|
579
629
|
|
|
580
|
-
# Common processing for both methods
|
|
581
|
-
|
|
582
|
-
# Transform densities to various domains
|
|
583
|
-
if method == 'hist_returns':
|
|
584
|
-
pdf_lm = pdf_values
|
|
585
|
-
pdf_m = pdf_lm / M
|
|
586
|
-
pdf_k = pdf_lm / K
|
|
587
|
-
pdf_r = pdf_lm / (1 + R)
|
|
588
|
-
else: # volatility models
|
|
589
|
-
pdf_m = pdf_values
|
|
590
|
-
pdf_lm = pdf_m * M
|
|
591
|
-
pdf_k = pdf_lm / K
|
|
592
|
-
pdf_r = pdf_lm / (1 + R)
|
|
593
|
-
|
|
594
630
|
# Calculate CDF
|
|
595
631
|
cdf = np.cumsum(pdf_lm * dx)
|
|
596
|
-
cdf = np.minimum(cdf / cdf[-1], 1.0)
|
|
632
|
+
cdf = np.minimum(cdf / cdf[-1], 1.0) # Ensure CDF is between 0 and 1
|
|
597
633
|
|
|
598
634
|
# Select appropriate domain and calculate moments
|
|
599
635
|
if return_domain == 'log_moneyness':
|
|
600
636
|
x = LM
|
|
601
637
|
pdf = pdf_lm
|
|
602
|
-
moments = get_all_moments(x, pdf, model_params
|
|
638
|
+
moments = get_all_moments(x, pdf, model_params)
|
|
603
639
|
elif return_domain == 'moneyness':
|
|
604
640
|
x = M
|
|
605
641
|
pdf = pdf_m
|
|
606
|
-
moments = get_all_moments(x, pdf, model_params
|
|
642
|
+
moments = get_all_moments(x, pdf, model_params)
|
|
607
643
|
elif return_domain == 'returns':
|
|
608
644
|
x = R
|
|
609
645
|
pdf = pdf_r
|
|
610
|
-
moments = get_all_moments(x, pdf, model_params
|
|
646
|
+
moments = get_all_moments(x, pdf, model_params)
|
|
611
647
|
elif return_domain == 'strikes':
|
|
612
648
|
x = K
|
|
613
649
|
pdf = pdf_k
|
|
614
|
-
moments = get_all_moments(x, pdf, model_params
|
|
650
|
+
moments = get_all_moments(x, pdf, model_params)
|
|
615
651
|
else:
|
|
616
652
|
raise VolyError(f"Unsupported return_domain: {return_domain}")
|
|
617
653
|
|
|
@@ -621,11 +657,14 @@ def get_hd_surface(model_results: pd.DataFrame,
|
|
|
621
657
|
x_surface[i] = x
|
|
622
658
|
all_moments[i] = moments
|
|
623
659
|
|
|
660
|
+
# Check if we have any valid results
|
|
661
|
+
if not pdf_surface:
|
|
662
|
+
raise VolyError("No valid densities could be calculated. Check your input data.")
|
|
663
|
+
|
|
624
664
|
# Create DataFrame with moments
|
|
625
665
|
moments = pd.DataFrame(all_moments).T
|
|
626
666
|
|
|
627
|
-
logger.info(
|
|
628
|
-
f"Historical density calculation complete using {method} method with {model_type} model and {distribution} distribution")
|
|
667
|
+
logger.info(f"Historical density calculation complete using {method} method")
|
|
629
668
|
|
|
630
669
|
return {
|
|
631
670
|
'pdf_surface': pdf_surface,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|