voly 0.0.146__py3-none-any.whl → 0.0.147__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
voly/core/rnd.py CHANGED
@@ -5,7 +5,7 @@ fitted volatility models and converting to probability functions.
5
5
 
6
6
  import numpy as np
7
7
  import pandas as pd
8
- from typing import Dict, List, Tuple, Optional, Union, Any
8
+ from typing import Dict, List, Tuple, Optional, Union, Any, Callable
9
9
  from voly.utils.logger import logger, catch_exception
10
10
  from voly.exceptions import VolyError
11
11
  from voly.models import SVIModel
@@ -13,83 +13,240 @@ from voly.formulas import bs, d1, d2, get_domain
13
13
  from scipy import stats
14
14
 
15
15
 
16
- # Breeden-Litzenberger Method
17
16
  @catch_exception
18
- def breeden(domain_params, s, r, o, t, return_domain):
19
- LM = get_domain(domain_params, s, r, o, t, 'log_moneyness')
20
- M = get_domain(domain_params, s, r, o, t, 'moneyness')
21
- R = get_domain(domain_params, s, r, o, t, 'returns')
22
- K = get_domain(domain_params, s, r, o, t, 'strikes')
23
- D = get_domain(domain_params, s, r, o, t, 'delta')
17
+ def _prepare_domains(domain_params, s, r, o, t):
18
+ """
19
+ Calculate domain arrays for different representations (log_moneyness, moneyness, etc.).
24
20
 
25
- c = bs(s, K, r, o, t, option_type='call')
26
- c1 = np.gradient(c, K)
27
- c2 = np.gradient(c1, K)
21
+ Parameters:
22
+ -----------
23
+ domain_params : tuple
24
+ (min_log_moneyness, max_log_moneyness, num_points)
25
+ s : float
26
+ Spot price
27
+ r : float
28
+ Risk-free rate
29
+ o : ndarray
30
+ Implied volatility array
31
+ t : float
32
+ Time to expiry in years
28
33
 
29
- rnd_k = np.maximum(np.exp(r * t) * c2, 0)
30
- rnd_lm = rnd_k * K
34
+ Returns:
35
+ --------
36
+ dict
37
+ Dictionary containing arrays for different domains
38
+ """
39
+ domains = {}
40
+ domains['log_moneyness'] = get_domain(domain_params, s, r, o, t, 'log_moneyness')
41
+ domains['moneyness'] = get_domain(domain_params, s, r, o, t, 'moneyness')
42
+ domains['returns'] = get_domain(domain_params, s, r, o, t, 'returns')
43
+ domains['strikes'] = get_domain(domain_params, s, r, o, t, 'strikes')
44
+ domains['delta'] = get_domain(domain_params, s, r, o, t, 'delta')
45
+
46
+ # Precompute differentials for integration
47
+ domains['dx'] = domains['log_moneyness'][1] - domains['log_moneyness'][0]
48
+
49
+ return domains
50
+
51
+
52
+ @catch_exception
53
+ def _normalize_density(pdf_values, dx):
54
+ """
55
+ Normalize a probability density function to integrate to 1.
56
+
57
+ Parameters:
58
+ -----------
59
+ pdf_values : ndarray
60
+ Array of PDF values
61
+ dx : float
62
+ Grid spacing
63
+
64
+ Returns:
65
+ --------
66
+ ndarray
67
+ Normalized PDF values
68
+ """
69
+ total_area = np.sum(pdf_values * dx)
70
+ if total_area <= 0:
71
+ logger.warning("PDF area is negative or zero, using absolute values")
72
+ total_area = np.sum(np.abs(pdf_values) * dx)
73
+
74
+ return pdf_values / total_area
75
+
76
+
77
+ @catch_exception
78
+ def _transform_to_domains(rnd_k, domains):
79
+ """
80
+ Transform density from strike domain to other domains.
81
+
82
+ Parameters:
83
+ -----------
84
+ rnd_k : ndarray
85
+ PDF in strike domain
86
+ domains : dict
87
+ Domain arrays
88
+
89
+ Returns:
90
+ --------
91
+ dict
92
+ Dictionary of PDFs in different domains
93
+ """
94
+ LM = domains['log_moneyness']
95
+ M = domains['moneyness']
96
+ K = domains['strikes']
97
+ R = domains['returns']
98
+ dx = domains['dx']
99
+
100
+ # Calculate PDF in different domains
101
+ rnd_lm = rnd_k * K # Convert to log-moneyness domain
102
+ pdf_lm = _normalize_density(rnd_lm, dx)
31
103
 
32
- dx = LM[1] - LM[0]
33
- total_area = np.sum(rnd_lm * dx)
34
- pdf_lm = rnd_lm / total_area
104
+ # Transform to other domains
35
105
  pdf_k = pdf_lm / K
36
- pdf_m = pdf_k * s
106
+ pdf_m = pdf_k * domains['strikes'][0] # s = K[0] * M[0]
37
107
  pdf_r = pdf_lm / (1 + R)
38
108
 
39
- pdf_d1 = stats.norm.pdf(d1(s, K, r, o, t, option_type='call'))
40
- dd_dK = pdf_d1 / (o * np.sqrt(t) * K)
109
+ # For delta domain, need special handling due to non-monotonicity
110
+ pdf_d1 = stats.norm.pdf(d1(domains['strikes'][0], K, 0, domains['delta'][0], 1, option_type='call'))
111
+ dd_dK = pdf_d1 / (domains['delta'][0] * np.sqrt(1) * K)
41
112
  pdf_d = pdf_k / dd_dK
42
113
 
43
- cdf = np.cumsum(pdf_lm) * dx
44
- cdf = cdf / cdf[-1]
45
-
46
- if return_domain == 'log_moneyness':
47
- x = LM
48
- pdf = pdf_lm
49
- moments = get_all_moments(x, pdf)
50
- return pdf, cdf, x, moments
51
- elif return_domain == 'moneyness':
52
- x = M
53
- pdf = pdf_m
54
- moments = get_all_moments(x, pdf)
55
- return pdf, cdf, x, moments
56
- elif return_domain == 'returns':
57
- x = R
58
- pdf = pdf_r
59
- moments = get_all_moments(x, pdf)
60
- return pdf, cdf, x, moments
61
- elif return_domain == 'strikes':
62
- x = K
63
- pdf = pdf_k
64
- moments = get_all_moments(x, pdf)
65
- return pdf, cdf, x, moments
66
- elif return_domain == 'delta':
114
+ # Calculate CDF
115
+ cdf = np.cumsum(pdf_lm * dx)
116
+ cdf = np.minimum(cdf / cdf[-1], 1.0) # Ensure max value is 1
117
+
118
+ return {
119
+ 'log_moneyness': pdf_lm,
120
+ 'moneyness': pdf_m,
121
+ 'returns': pdf_r,
122
+ 'strikes': pdf_k,
123
+ 'delta': pdf_d,
124
+ 'cdf': cdf
125
+ }
126
+
127
+
128
+ @catch_exception
129
+ def _select_domain_results(pdfs, domains, return_domain):
130
+ """
131
+ Select results for the requested domain.
132
+
133
+ Parameters:
134
+ -----------
135
+ pdfs : dict
136
+ PDFs in different domains
137
+ domains : dict
138
+ Domain arrays
139
+ return_domain : str
140
+ Requested domain
141
+
142
+ Returns:
143
+ --------
144
+ tuple
145
+ (pdf, cdf, x, moments)
146
+ """
147
+ if return_domain == 'delta':
148
+ # Special handling for delta domain due to potential non-monotonicity
149
+ D = domains['delta']
150
+ pdf_d = pdfs['delta']
67
151
  sort_idx = np.argsort(D)
68
152
  x = D[sort_idx]
69
153
  pdf = pdf_d[sort_idx]
70
- moments = get_all_moments(x, pdf)
71
- return pdf, cdf, x, moments
154
+ else:
155
+ x = domains[return_domain]
156
+ pdf = pdfs[return_domain]
157
+
158
+ moments = get_all_moments(x, pdf)
159
+ return pdf, pdfs['cdf'], x, moments
160
+
161
+
162
+ @catch_exception
163
+ def breeden(domain_params, s, r, o, t, return_domain):
164
+ """
165
+ Breeden-Litzenberger method for RND estimation.
166
+
167
+ Parameters:
168
+ -----------
169
+ domain_params : tuple
170
+ (min_log_moneyness, max_log_moneyness, num_points)
171
+ s : float
172
+ Spot price
173
+ r : float
174
+ Risk-free rate
175
+ o : ndarray
176
+ Implied volatility array
177
+ t : float
178
+ Time to expiry in years
179
+ return_domain : str
180
+ Domain for results ('log_moneyness', 'moneyness', 'returns', 'strikes', 'delta')
181
+
182
+ Returns:
183
+ --------
184
+ tuple
185
+ (pdf, cdf, x, moments)
186
+ """
187
+ # Prepare domain arrays
188
+ domains = _prepare_domains(domain_params, s, r, o, t)
189
+ K = domains['strikes']
190
+
191
+ # Calculate option prices and derivatives
192
+ c = bs(s, K, r, o, t, option_type='call')
193
+ c1 = np.gradient(c, K)
194
+ c2 = np.gradient(c1, K)
195
+
196
+ # Calculate RND in strike domain and apply discount factor
197
+ rnd_k = np.maximum(np.exp(r * t) * c2, 0)
198
+
199
+ # Transform to other domains
200
+ pdfs = _transform_to_domains(rnd_k, domains)
201
+
202
+ # Return results for requested domain
203
+ return _select_domain_results(pdfs, domains, return_domain)
72
204
 
73
205
 
74
- # Rookley's Method
75
206
  @catch_exception
76
207
  def rookley(domain_params, s, r, o, t, return_domain):
77
- LM = get_domain(domain_params, s, r, o, t, 'log_moneyness')
78
- M = get_domain(domain_params, s, r, o, t, 'moneyness')
79
- R = get_domain(domain_params, s, r, o, t, 'returns')
80
- K = get_domain(domain_params, s, r, o, t, 'strikes')
81
- D = get_domain(domain_params, s, r, o, t, 'delta')
208
+ """
209
+ Rookley method for RND estimation, using volatility smile derivatives.
210
+
211
+ Parameters:
212
+ -----------
213
+ domain_params : tuple
214
+ (min_log_moneyness, max_log_moneyness, num_points)
215
+ s : float
216
+ Spot price
217
+ r : float
218
+ Risk-free rate
219
+ o : ndarray
220
+ Implied volatility array
221
+ t : float
222
+ Time to expiry in years
223
+ return_domain : str
224
+ Domain for results ('log_moneyness', 'moneyness', 'returns', 'strikes', 'delta')
225
+
226
+ Returns:
227
+ --------
228
+ tuple
229
+ (pdf, cdf, x, moments)
230
+ """
231
+ # Prepare domain arrays
232
+ domains = _prepare_domains(domain_params, s, r, o, t)
233
+ M = domains['moneyness']
234
+ K = domains['strikes']
82
235
 
236
+ # Calculate volatility derivatives with respect to moneyness
83
237
  o1 = np.gradient(o, M)
84
238
  o2 = np.gradient(o1, M)
85
239
 
240
+ # Precompute common terms
86
241
  st = np.sqrt(t)
87
242
  rt = r * t
88
243
  ert = np.exp(rt)
89
244
 
90
- n_d1 = (np.log(M) + (r + 1 / 2 * o ** 2) * t) / (o * st)
245
+ # Calculate Black-Scholes d1 and d2 terms
246
+ n_d1 = (np.log(M) + (r + 0.5 * o ** 2) * t) / (o * st)
91
247
  n_d2 = n_d1 - o * st
92
248
 
249
+ # Calculate various derivatives needed for the density
93
250
  del_d1_M = 1 / (M * o * st)
94
251
  del_d2_M = del_d1_M
95
252
  del_d1_o = -(np.log(M) + rt) / (o ** 2 * st) + st / 2
@@ -98,226 +255,163 @@ def rookley(domain_params, s, r, o, t, return_domain):
98
255
  d_d1_M = del_d1_M + del_d1_o * o1
99
256
  d_d2_M = del_d2_M + del_d2_o * o1
100
257
 
258
+ # Complex second derivatives
101
259
  dd_d1_M = (
102
260
  -(1 / (M * o * st)) * (1 / M + o1 / o)
103
261
  + o2 * (st / 2 - (np.log(M) + rt) / (o ** 2 * st))
104
262
  + o1 * (2 * o1 * (np.log(M) + rt) / (o ** 3 * st) - 1 / (M * o ** 2 * st))
105
263
  )
264
+
106
265
  dd_d2_M = (
107
266
  -(1 / (M * o * st)) * (1 / M + o1 / o)
108
267
  - o2 * (st / 2 + (np.log(M) + rt) / (o ** 2 * st))
109
268
  + o1 * (2 * o1 * (np.log(M) + rt) / (o ** 3 * st) - 1 / (M * o ** 2 * st))
110
269
  )
111
270
 
112
- d_c_M = stats.norm.pdf(n_d1) * d_d1_M - 1 / ert * stats.norm.pdf(n_d2) / M * d_d2_M + 1 / ert * stats.norm.cdf(n_d2) / (
113
- M ** 2)
271
+ # Call price derivatives with respect to moneyness
272
+ d_c_M = (
273
+ stats.norm.pdf(n_d1) * d_d1_M
274
+ - (1 / ert) * stats.norm.pdf(n_d2) / M * d_d2_M
275
+ + (1 / ert) * stats.norm.cdf(n_d2) / (M ** 2)
276
+ )
277
+
114
278
  dd_c_M = (
115
279
  stats.norm.pdf(n_d1) * (dd_d1_M - n_d1 * d_d1_M ** 2)
116
280
  - stats.norm.pdf(n_d2) / (ert * M) * (dd_d2_M - 2 / M * d_d2_M - n_d2 * d_d2_M ** 2)
117
281
  - 2 * stats.norm.cdf(n_d2) / (ert * M ** 3)
118
282
  )
119
283
 
284
+ # Convert from moneyness to strike derivatives
120
285
  dd_c_K = dd_c_M * (M / K) ** 2 + 2 * d_c_M * (M / K ** 2)
121
286
 
287
+ # Calculate RND in strike domain and apply discount factor
122
288
  rnd_k = np.maximum(ert * s * dd_c_K, 0)
123
- rnd_lm = rnd_k * K
124
289
 
125
- dx = LM[1] - LM[0]
126
- total_area = np.sum(rnd_lm * dx)
127
- pdf_lm = rnd_lm / total_area
128
- pdf_k = pdf_lm / K
129
- pdf_m = pdf_k * s
130
- pdf_r = pdf_lm / (1 + R)
290
+ # Transform to other domains
291
+ pdfs = _transform_to_domains(rnd_k, domains)
131
292
 
132
- pdf_d1 = stats.norm.pdf(d1(s, K, r, o, t, option_type='call'))
133
- dd_dK = pdf_d1 / (o * np.sqrt(t) * K)
134
- pdf_d = pdf_k / dd_dK
293
+ # Return results for requested domain
294
+ return _select_domain_results(pdfs, domains, return_domain)
135
295
 
136
- cdf = np.cumsum(pdf_lm) * dx
137
- cdf = cdf / cdf[-1]
138
-
139
- if return_domain == 'log_moneyness':
140
- x = LM
141
- pdf = pdf_lm
142
- moments = get_all_moments(x, pdf)
143
- return pdf, cdf, x, moments
144
- elif return_domain == 'moneyness':
145
- x = M
146
- pdf = pdf_m
147
- moments = get_all_moments(x, pdf)
148
- return pdf, cdf, x, moments
149
- elif return_domain == 'returns':
150
- x = R
151
- pdf = pdf_r
152
- moments = get_all_moments(x, pdf)
153
- return pdf, cdf, moments
154
- elif return_domain == 'strikes':
155
- x = K
156
- pdf = pdf_k
157
- moments = get_all_moments(x, pdf)
158
- return pdf, cdf, x, moments
159
- elif return_domain == 'delta':
160
- sort_idx = np.argsort(D)
161
- x = D[sort_idx]
162
- pdf = pdf_d[sort_idx]
163
- moments = get_all_moments(x, pdf)
164
- return pdf, cdf, x, moments
165
296
 
166
- '''
167
297
  @catch_exception
168
298
  def get_all_moments(x, pdf, model_params=None):
169
- mean = np.trapz(x * pdf, x) # E[X]
170
- median = x[np.searchsorted(np.cumsum(pdf * np.diff(x, prepend=x[0])), 0.5)] # Median (50th percentile)
171
- mode = x[np.argmax(pdf)] # Mode (peak of PDF)
172
- variance = np.trapz((x - mean) ** 2 * pdf, x) # Var[X] = E[(X - μ)^2]
173
- std_dev = np.sqrt(variance) # Standard deviation
174
- skewness = np.trapz((x - mean) ** 3 * pdf, x) / std_dev ** 3 # Skewness
175
- kurtosis = np.trapz((x - mean) ** 4 * pdf, x) / std_dev ** 4 # Kurtosis
176
- excess_kurtosis = kurtosis - 3 # Excess kurtosis (relative to normal dist.)
177
- q25 = x[np.searchsorted(np.cumsum(pdf * np.diff(x, prepend=x[0])), 0.25)] # 25th percentile
178
- q75 = x[np.searchsorted(np.cumsum(pdf * np.diff(x, prepend=x[0])), 0.75)] # 75th percentile
179
- iqr = q75 - q25 # Inter-quartile range
180
- entropy = -np.trapz(pdf * np.log(pdf + 1e-10), x) # Differential entropy (avoid log(0))
181
-
182
- # Full Z-score areas
183
- dx = np.diff(x, prepend=x[0])
184
- z = (x - mean) / std_dev
185
- o1p = np.sum(pdf[(z > 0) & (z < 1)] * dx[(z > 0) & (z < 1)])
186
- o2p = np.sum(pdf[(z >= 1) & (z < 2)] * dx[(z >= 1) & (z < 2)])
187
- o3p = np.sum(pdf[(z >= 2) & (z < 3)] * dx[(z >= 2) & (z < 3)])
188
- o4p = np.sum(pdf[z >= 3] * dx[z >= 3])
189
- o1n = np.sum(pdf[(z < 0) & (z > -1)] * dx[(z < 0) & (z > -1)])
190
- o2n = np.sum(pdf[(z <= -1) & (z > -2)] * dx[(z <= -1) & (z > -2)])
191
- o3n = np.sum(pdf[(z <= -2) & (z > -3)] * dx[(z <= -2) & (z > -3)])
192
- o4n = np.sum(pdf[z <= -3] * dx[z <= -3])
193
-
194
- moments = {
195
- 'mean': mean,
196
- 'median': median,
197
- 'mode': mode,
198
- 'variance': variance,
199
- 'std_dev': std_dev,
200
- 'skewness': skewness,
201
- 'kurtosis': kurtosis,
202
- 'excess_kurtosis': excess_kurtosis,
203
- 'q25': q25,
204
- 'q75': q75,
205
- 'iqr': iqr,
206
- 'entropy': entropy,
207
- 'o1p': o1p,
208
- 'o2p': o2p,
209
- 'o3p': o3p,
210
- 'o4p': o4p,
211
- 'o1n': o1n,
212
- 'o2n': o2n,
213
- 'o3n': o3n,
214
- 'o4n': o4n
215
- }
216
-
217
- # Add model parameters if provided
218
- if model_params is not None:
219
- moments.update(model_params)
299
+ """
300
+ Calculate statistical moments and other distributional properties.
220
301
 
221
- return moments
222
- '''
302
+ Parameters:
303
+ -----------
304
+ x : ndarray
305
+ Domain values
306
+ pdf : ndarray
307
+ Probability density values
308
+ model_params : dict, optional
309
+ Additional model parameters to include in the results
223
310
 
311
+ Returns:
312
+ --------
313
+ dict
314
+ Dictionary of calculated moments and properties
315
+ """
316
+ # Skip calculation for invalid inputs
317
+ if len(x) != len(pdf) or len(x) < 3:
318
+ logger.warning("Invalid inputs for moment calculation")
319
+ return {}
224
320
 
225
- @catch_exception
226
- def get_all_moments(x, pdf, model_params=None):
227
- # Precompute dx for integration
321
+ # Compute dx for integration
228
322
  dx = np.diff(x, prepend=x[0])
229
323
 
324
+ # Ensure the PDF integrates to 1
325
+ pdf_normalized = pdf / np.trapz(pdf, x)
326
+
230
327
  # Raw Moments (μ_k = E[X^k])
231
- raw_0 = np.trapz(pdf, x) # Zeroth (~1)
232
- raw_1 = np.trapz(x * pdf, x) # First (mean)
233
- raw_2 = np.trapz(x**2 * pdf, x) # Second
234
- raw_3 = np.trapz(x**3 * pdf, x) # Third
235
- raw_4 = np.trapz(x**4 * pdf, x) # Fourth
236
- raw_5 = np.trapz(x**5 * pdf, x) # Fifth
237
- raw_6 = np.trapz(x**6 * pdf, x) # Sixth
238
-
239
- mean = raw_1
240
- variance = np.trapz((x - mean)**2 * pdf, x) # m_2
241
- std_dev = np.sqrt(variance)
328
+ raw_moments = {
329
+ 'raw_0': np.trapz(pdf_normalized, x), # Zeroth (~1)
330
+ 'raw_1': np.trapz(x * pdf_normalized, x), # First (mean)
331
+ 'raw_2': np.trapz(x ** 2 * pdf_normalized, x), # Second
332
+ 'raw_3': np.trapz(x ** 3 * pdf_normalized, x), # Third
333
+ 'raw_4': np.trapz(x ** 4 * pdf_normalized, x), # Fourth
334
+ 'raw_5': np.trapz(x ** 5 * pdf_normalized, x), # Fifth
335
+ 'raw_6': np.trapz(x ** 6 * pdf_normalized, x), # Sixth
336
+ }
337
+
338
+ # Derived statistics
339
+ mean = raw_moments['raw_1']
340
+ variance = np.trapz((x - mean) ** 2 * pdf_normalized, x)
341
+ std_dev = np.sqrt(max(variance, 1e-10)) # Prevent division by zero
242
342
 
243
343
  # Central Moments (m_k = E[(X - μ)^k])
244
- cent_0 = raw_0 # Zeroth (~1)
245
- cent_1 = np.trapz((x - mean) * pdf, x) # First (~0)
246
- cent_2 = variance # Second (variance)
247
- cent_3 = np.trapz((x - mean)**3 * pdf, x) # Third
248
- cent_4 = np.trapz((x - mean)**4 * pdf, x) # Fourth
249
- cent_5 = np.trapz((x - mean)**5 * pdf, x) # Fifth
250
- cent_6 = np.trapz((x - mean)**6 * pdf, x) # Sixth
344
+ cent_moments = {
345
+ 'cent_1': 0, # Theoretically zero
346
+ 'cent_2': variance, # Second (variance)
347
+ 'cent_3': np.trapz((x - mean) ** 3 * pdf_normalized, x), # Third
348
+ 'cent_4': np.trapz((x - mean) ** 4 * pdf_normalized, x), # Fourth
349
+ 'cent_5': np.trapz((x - mean) ** 5 * pdf_normalized, x), # Fifth
350
+ 'cent_6': np.trapz((x - mean) ** 6 * pdf_normalized, x), # Sixth
351
+ }
251
352
 
252
353
  # Standardized Moments (m̄_k = E[((X - μ)/σ)^k])
253
354
  z = (x - mean) / std_dev
254
- std_0 = np.trapz(pdf, x) # Zeroth (~1)
255
- std_1 = np.trapz(z * pdf, x) # First (~0)
256
- std_2 = np.trapz(z**2 * pdf, x) # Second (~1)
257
- std_3 = np.trapz(z**3 * pdf, x) # Skewness
258
- std_4 = np.trapz(z**4 * pdf, x) # Kurtosis
259
- std_5 = np.trapz(z**5 * pdf, x) # Fifth
260
- std_6 = np.trapz(z**6 * pdf, x) # Sixth
261
-
262
- # Extra statistics
263
- cdf = np.cumsum(pdf * dx)
264
- median = x[np.searchsorted(cdf, 0.5)] # Median
265
- excess_kurtosis = std_4 - 3
266
- q25 = x[np.searchsorted(cdf, 0.25)] # 25th percentile
267
- q75 = x[np.searchsorted(cdf, 0.75)] # 75th percentile
355
+ std_moments = {
356
+ 'std_3': np.trapz(z ** 3 * pdf_normalized, x), # Skewness
357
+ 'std_4': np.trapz(z ** 4 * pdf_normalized, x), # Kurtosis
358
+ 'std_5': np.trapz(z ** 5 * pdf_normalized, x), # Fifth
359
+ 'std_6': np.trapz(z ** 6 * pdf_normalized, x), # Sixth
360
+ }
361
+
362
+ # Calculate CDF for quantiles
363
+ cdf = np.cumsum(pdf_normalized * dx)
364
+ cdf = cdf / cdf[-1] # Normalize
365
+
366
+ # Quantiles and other statistics
367
+ mode_idx = np.argmax(pdf)
368
+ mode = x[mode_idx] if 0 <= mode_idx < len(x) else mean
369
+
370
+ # Find percentiles
371
+ q25_idx = np.searchsorted(cdf, 0.25)
372
+ q50_idx = np.searchsorted(cdf, 0.50)
373
+ q75_idx = np.searchsorted(cdf, 0.75)
374
+
375
+ q25 = x[q25_idx] if 0 <= q25_idx < len(x) else np.nan
376
+ median = x[q50_idx] if 0 <= q50_idx < len(x) else np.nan
377
+ q75 = x[q75_idx] if 0 <= q75_idx < len(x) else np.nan
268
378
  iqr = q75 - q25
269
- entropy = -np.trapz(pdf * np.log(pdf + 1e-10), x)
270
-
271
- # Z-score areas
272
- o1p = np.sum(pdf[(z > 0) & (z < 1)] * dx[(z > 0) & (z < 1)])
273
- o2p = np.sum(pdf[(z >= 1) & (z < 2)] * dx[(z >= 1) & (z < 2)])
274
- o3p = np.sum(pdf[(z >= 2) & (z < 3)] * dx[(z >= 2) & (z < 3)])
275
- o4p = np.sum(pdf[z >= 3] * dx[z >= 3])
276
- o1n = np.sum(pdf[(z < 0) & (z > -1)] * dx[(z < 0) & (z > -1)])
277
- o2n = np.sum(pdf[(z <= -1) & (z > -2)] * dx[(z <= -1) & (z > -2)])
278
- o3n = np.sum(pdf[(z <= -2) & (z > -3)] * dx[(z <= -2) & (z > -3)])
279
- o4n = np.sum(pdf[z <= -3] * dx[z <= -3])
280
-
281
- # Combine results as flat columns
282
- moments = {
283
- 'raw_0': raw_0,
284
- 'raw_1': raw_1,
285
- 'raw_2': raw_2,
286
- 'raw_3': raw_3,
287
- 'raw_4': raw_4,
288
- 'raw_5': raw_5,
289
- 'raw_6': raw_6,
290
- 'cent_0': cent_0,
291
- 'cent_1': cent_1,
292
- 'cent_2': cent_2,
293
- 'cent_3': cent_3,
294
- 'cent_4': cent_4,
295
- 'cent_5': cent_5,
296
- 'cent_6': cent_6,
297
- 'std_0': std_0,
298
- 'std_1': std_1,
299
- 'std_2': std_2,
300
- 'std_3': std_3,
301
- 'std_4': std_4,
302
- 'std_5': std_5,
303
- 'std_6': std_6,
304
- 'median': median,
379
+
380
+ # Information theory measures
381
+ entropy = -np.trapz(pdf_normalized * np.log(pdf_normalized + 1e-10), x)
382
+
383
+ # Z-score areas (probability mass in standard deviation regions)
384
+ z_areas = {
385
+ 'o1p': np.sum(pdf_normalized[(z > 0) & (z < 1)] * dx[(z > 0) & (z < 1)]),
386
+ 'o2p': np.sum(pdf_normalized[(z >= 1) & (z < 2)] * dx[(z >= 1) & (z < 2)]),
387
+ 'o3p': np.sum(pdf_normalized[(z >= 2) & (z < 3)] * dx[(z >= 2) & (z < 3)]),
388
+ 'o4p': np.sum(pdf_normalized[z >= 3] * dx[z >= 3]),
389
+ 'o1n': np.sum(pdf_normalized[(z < 0) & (z > -1)] * dx[(z < 0) & (z > -1)]),
390
+ 'o2n': np.sum(pdf_normalized[(z <= -1) & (z > -2)] * dx[(z <= -1) & (z > -2)]),
391
+ 'o3n': np.sum(pdf_normalized[(z <= -2) & (z > -3)] * dx[(z <= -2) & (z > -3)]),
392
+ 'o4n': np.sum(pdf_normalized[z <= -3] * dx[z <= -3]),
393
+ }
394
+
395
+ # Common statistic names
396
+ common_stats = {
397
+ 'mean': mean,
398
+ 'variance': variance,
305
399
  'std_dev': std_dev,
306
- 'excess_kurtosis': excess_kurtosis,
400
+ 'skewness': std_moments['std_3'],
401
+ 'kurtosis': std_moments['std_4'],
402
+ 'excess_kurtosis': std_moments['std_4'] - 3,
403
+ 'median': median,
404
+ 'mode': mode,
307
405
  'q25': q25,
308
406
  'q75': q75,
309
407
  'iqr': iqr,
310
408
  'entropy': entropy,
311
- 'o1p': o1p,
312
- 'o2p': o2p,
313
- 'o3p': o3p,
314
- 'o4p': o4p,
315
- 'o1n': o1n,
316
- 'o2n': o2n,
317
- 'o3n': o3n,
318
- 'o4n': o4n
319
409
  }
320
410
 
411
+ # Combine all statistics
412
+ moments = {**raw_moments, **cent_moments, **std_moments, **z_areas, **common_stats}
413
+
414
+ # Add model parameters if provided
321
415
  if model_params is not None:
322
416
  moments.update(model_params)
323
417
 
@@ -328,70 +422,90 @@ def get_all_moments(x, pdf, model_params=None):
328
422
  def get_rnd_surface(model_results: pd.DataFrame,
329
423
  domain_params: Tuple[float, float, int] = (-1.5, 1.5, 1000),
330
424
  return_domain: str = 'log_moneyness',
331
- method: str = 'rookley') -> Dict[str, np.ndarray]:
425
+ method: str = 'rookley') -> Dict[str, Any]:
332
426
  """
333
- Generate RND surface from vol smile parameters.
334
-
335
- Works with both regular fit_results and interpolated_results dataframes.
427
+ Generate risk-neutral density surface from volatility surface parameters.
336
428
 
337
429
  Parameters:
338
- - model_results: DataFrame from fit_model() or interpolate_model(). Maturity names or DTM as Index
339
- - domain_params: Tuple of (min, max, num_points) for the x-domain grid
340
- - return_domain: Domain for x-axis values ('log_moneyness', 'moneyness', 'returns', 'strikes', 'delta')
341
- - method: 'rookley' or 'breeden'
430
+ -----------
431
+ model_results : pd.DataFrame
432
+ DataFrame from fit_model() or interpolate_model() with SVI parameters
433
+ domain_params : tuple
434
+ (min_log_moneyness, max_log_moneyness, num_points)
435
+ return_domain : str
436
+ Domain for results ('log_moneyness', 'moneyness', 'returns', 'strikes', 'delta')
437
+ method : str
438
+ Method for RND estimation ('rookley' or 'breeden')
342
439
 
343
440
  Returns:
344
- - Tuple containing:
345
- - pdf_surface: Dictionary mapping maturity/dtm names to PDF arrays of their requested domain
346
- - cdf_surface: Dictionary mapping maturity/dtm names to CDF arrays
347
- - x_surface: Dictionary mapping maturity/dtm names to requested x domain arrays
348
- - moments_df: DataFrame with moments of the distributions using model_results index
441
+ --------
442
+ dict
443
+ Dictionary containing pdf_surface, cdf_surface, x_surface, and moments
349
444
  """
350
- # Check if required columns are present
445
+ # Validate inputs
351
446
  required_columns = ['s', 'a', 'b', 'sigma', 'm', 'rho', 't', 'r']
352
447
  missing_columns = [col for col in required_columns if col not in model_results.columns]
353
448
  if missing_columns:
354
449
  raise VolyError(f"Required columns missing in model_results: {missing_columns}")
355
450
 
451
+ # Validate method
452
+ if method not in ['rookley', 'breeden']:
453
+ raise VolyError(f"Invalid method: {method}. Must be 'rookley' or 'breeden'")
454
+
455
+ # Validate return_domain
456
+ valid_domains = ['log_moneyness', 'moneyness', 'returns', 'strikes', 'delta']
457
+ if return_domain not in valid_domains:
458
+ raise VolyError(f"Invalid return_domain: {return_domain}. Must be one of {valid_domains}")
459
+
460
+ # Select method function
461
+ rnd_method = rookley if method == 'rookley' else breeden
462
+
463
+ # Initialize result containers
356
464
  pdf_surface = {}
357
465
  cdf_surface = {}
358
466
  x_surface = {}
359
467
  all_moments = {}
360
468
 
361
- # Process each maturity/dtm
469
+ # Process each maturity/expiry
362
470
  for i in model_results.index:
363
- # Calculate SVI total implied variance and convert to IV
364
- params = [
365
- model_results.loc[i, 'a'],
366
- model_results.loc[i, 'b'],
367
- model_results.loc[i, 'sigma'],
368
- model_results.loc[i, 'rho'],
369
- model_results.loc[i, 'm']
370
- ]
371
- s = model_results.loc[i, 's']
372
- r = model_results.loc[i, 'r']
373
- t = model_results.loc[i, 't']
374
-
375
- # Calculate implied volatility
376
- LM = np.linspace(domain_params[0], domain_params[1], domain_params[2])
377
- w = np.array([SVIModel.svi(x, *params) for x in LM])
378
- o = np.sqrt(w / t)
379
-
380
- if method == 'rookley':
381
- pdf, cdf, x, moments = rookley(domain_params, s, r, o, t, return_domain)
382
- else:
383
- pdf, cdf, x, moments = breeden(domain_params, s, r, o, t, return_domain)
384
-
385
- pdf_surface[i] = pdf
386
- cdf_surface[i] = cdf
387
- x_surface[i] = x
388
- all_moments[i] = moments
389
-
390
- # Create a DataFrame with moments using the same index as model_results
471
+ try:
472
+ # Extract SVI parameters for this maturity
473
+ params = [
474
+ model_results.loc[i, 'a'],
475
+ model_results.loc[i, 'b'],
476
+ model_results.loc[i, 'sigma'],
477
+ model_results.loc[i, 'rho'],
478
+ model_results.loc[i, 'm']
479
+ ]
480
+ s = model_results.loc[i, 's']
481
+ r = model_results.loc[i, 'r']
482
+ t = model_results.loc[i, 't']
483
+
484
+ # Calculate implied volatility surface from SVI parameters
485
+ LM = np.linspace(domain_params[0], domain_params[1], domain_params[2])
486
+ w = np.array([SVIModel.svi(x, *params) for x in LM])
487
+ o = np.sqrt(w / t)
488
+
489
+ # Calculate RND using the selected method
490
+ pdf, cdf, x, moments = rnd_method(domain_params, s, r, o, t, return_domain)
491
+
492
+ # Store results
493
+ pdf_surface[i] = pdf
494
+ cdf_surface[i] = cdf
495
+ x_surface[i] = x
496
+ all_moments[i] = moments
497
+
498
+ except Exception as e:
499
+ logger.warning(f"Failed to calculate RND for maturity {i}: {str(e)}")
500
+
501
+ # Check if we have any valid results
502
+ if not pdf_surface:
503
+ raise VolyError("No valid densities could be calculated. Check your input data.")
504
+
505
+ # Create DataFrame with moments
391
506
  moments = pd.DataFrame(all_moments).T
392
507
 
393
- # Ensure the index matches the model_results index
394
- moments.index = model_results.index
508
+ logger.info(f"RND surface calculation complete using {method} method")
395
509
 
396
510
  return {
397
511
  'pdf_surface': pdf_surface,