PyPI - voly - Versions diffs - 0.0.144__tar.gz → 0.0.146__tar.gz - Mend

voly 0.0.144tar.gz → 0.0.146tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

{voly-0.0.144/src/voly.egg-info → voly-0.0.146}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: voly
-Version: 0.0.144
+Version: 0.0.146
 Summary: Options & volatility research package
 Author-email: Manu de Cara <manu.de.cara@gmail.com>
 License: MIT

{voly-0.0.144 → voly-0.0.146}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "voly"
-version = "0.0.144"
+version = "0.0.146"
 description = "Options & volatility research package"
 readme = "README.md"
 authors = [
@@ -60,7 +60,7 @@ line_length = 100
 multi_line_output = 3
 [tool.mypy]
-python_version = "0.0.144"
+python_version = "0.0.146"
 warn_return_any = true
 warn_unused_configs = true
 disallow_untyped_defs = true

{voly-0.0.144 → voly-0.0.146}/src/voly/core/hd.py RENAMED Viewed

@@ -16,11 +16,13 @@ from voly.formulas import iv, get_domain
 from voly.models import SVIModel
 from voly.core.fit import fit_model
 from arch import arch_model
-from arch.univariate import GARCH, EGARCH
 @catch_exception
-def get_historical_data(currency, lookback_days, granularity, exchange_name):
+def get_historical_data(currency: str,
+                        lookback_days: str,
+                        granularity: str,
+                        exchange_name: str) -> pd.DataFrame:
     """
     Fetch historical OHLCV data for a cryptocurrency.
@@ -37,9 +39,8 @@ def get_historical_data(currency, lookback_days, granularity, exchange_name):
     Returns:
     -------
-    df_hist : pandas.DataFrame containing the historical price data with OHLCV columns.
+    pd.DataFrame: Historical price data with OHLCV columns.
     """
     try:
         # Get the exchange class from ccxt
         exchange_class = getattr(ccxt, exchange_name.lower())
@@ -61,11 +62,15 @@ def get_historical_data(currency, lookback_days, granularity, exchange_name):
     ohlcv_list = []
     ohlcv = exchange.fetch_ohlcv(symbol, granularity, since=from_ts, limit=1000)
     ohlcv_list.append(ohlcv)
-    while True:
+    # Fetch all available data within the lookback period
+    while len(ohlcv) == 1000:
         from_ts = ohlcv[-1][0]
         new_ohlcv = exchange.fetch_ohlcv(symbol, granularity, since=from_ts, limit=1000)
-        ohlcv.extend(new_ohlcv)
-        if len(new_ohlcv) != 1000:
+        if len(new_ohlcv) <= 1:
+            break
+        ohlcv.extend(new_ohlcv[1:])  # Skip first element to avoid duplication
+        if len(new_ohlcv) < 1000:
             break
     # Convert to DataFrame
@@ -74,13 +79,13 @@ def get_historical_data(currency, lookback_days, granularity, exchange_name):
     df_hist.set_index('date', inplace=True)
     df_hist = df_hist.sort_index(ascending=True)
-    print(f"Data fetched successfully: {len(df_hist)} rows from {df_hist.index[0]} to {df_hist.index[-1]}")
+    logger.info(f"Data fetched successfully: {len(df_hist)} rows from {df_hist.index[0]} to {df_hist.index[-1]}")
     return df_hist
 @catch_exception
-def parse_window_length(window_length, df_hist):
+def parse_window_length(window_length: str, df_hist: pd.DataFrame) -> int:
     """
     Parse window length from string format (e.g., '7d', '30d') to number of data points.
@@ -96,7 +101,7 @@ def parse_window_length(window_length, df_hist):
     int
         Number of data points corresponding to the window length.
     """
-    if not window_length.endswith('d'):
+    if not isinstance(window_length, str) or not window_length.endswith('d'):
         raise VolyError("window_length should be in format '7d', '30d', etc.")
     # Extract number of days
@@ -115,21 +120,34 @@ def parse_window_length(window_length, df_hist):
 @catch_exception
-def fit_volatility_model(log_returns, df_hist, model_type='garch', distribution='normal', window_length='30d',
-                         n_fits=400):
+def fit_volatility_model(log_returns: np.ndarray,
+                         df_hist: pd.DataFrame,
+                         model_type: str = 'garch',
+                         distribution: str = 'normal',
+                         window_length: str = '30d',
+                         n_fits: int = 400) -> Dict[str, Any]:
     """
     Fit a volatility model (GARCH or EGARCH) to log returns.
-    Args:
-        log_returns: Array of log returns
-        df_hist: DataFrame with historical price data
-        model_type: Type of volatility model ('garch' or 'egarch')
-        distribution: Distribution type ('normal', 'studentst', or 'skewstudent')
-        window_length: Length of each window as a string (e.g., '30d')
-        n_fits: Number of sliding windows
+    Parameters:
+    -----------
+    log_returns : np.ndarray
+        Array of log returns
+    df_hist : pd.DataFrame
+        DataFrame with historical price data
+    model_type : str
+        Type of volatility model ('garch' or 'egarch')
+    distribution : str
+        Distribution type ('normal', 'studentst', or 'skewstudent')
+    window_length : str
+        Length of each window as a string (e.g., '30d')
+    n_fits : int
+        Number of sliding windows
     Returns:
-        Dict with model parameters and processes
+    --------
+    Dict[str, Any]
+        Dictionary with model parameters and processes
     """
     # Parse window length
     window_points = parse_window_length(window_length, df_hist)
@@ -137,36 +155,38 @@ def fit_volatility_model(log_returns, df_hist, model_type='garch', distribution=
     if len(log_returns) < window_points + n_fits:
         raise VolyError(f"Not enough data points. Need at least {window_points + n_fits}, got {len(log_returns)}")
-    # Adjust window sizes if necessary
-    n_fits = min(n_fits, len(log_returns) // 3)
-    window_points = min(window_points, len(log_returns) // 3)
+    # Adjust window sizes if necessary to avoid over-fitting
+    n_fits = min(n_fits, max(100, len(log_returns) // 3))
+    window_points = min(window_points, max(20, len(log_returns) // 3))
     start = window_points + n_fits
     end = n_fits
     # Different number of parameters based on model type and distribution
-    if model_type.lower() == 'garch':
-        if distribution.lower() == 'normal':
-            n_params = 4  # mu, omega, alpha, beta
-        elif distribution.lower() == 'studentst':
-            n_params = 5  # mu, omega, alpha, beta, nu
-        else:  # skewstudent
-            n_params = 6  # mu, omega, alpha, beta, nu, lambda (skew)
-    else:  # egarch
-        if distribution.lower() == 'normal':
-            n_params = 5  # mu, omega, alpha, gamma, beta
-        elif distribution.lower() == 'studentst':
-            n_params = 6  # mu, omega, alpha, gamma, beta, nu
-        else:  # skewstudent
-            n_params = 7  # mu, omega, alpha, gamma, beta, nu, lambda (skew)
+    param_names = get_param_names(model_type, distribution)
+    n_params = len(param_names)
     parameters = np.zeros((n_fits, n_params))
     z_process = []
-    logger.info(f"Fitting {model_type.upper()} model with {distribution} distribution using {n_fits} windows...")
+    logger.info(
+        f"Fitting {model_type.upper()} model with {distribution} distribution using {n_fits} windows of {window_length}...")
     for i in range(n_fits):
+        if i % (n_fits // 10) == 0:
+            logger.info(f"Fitting progress: {i}/{n_fits}")
+        # Skip if we don't have enough data
+        if end - i - 1 < 0 or start - i - 1 > len(log_returns):
+            continue
         window = log_returns[end - i - 1:start - i - 1]
+        # Skip windows that are too small or have invalid data
+        if len(window) < 10 or np.isnan(window).any() or np.isinf(window).any():
+            continue
+        # Mean-center the data to improve numerical stability
         data = window - np.mean(window)
         try:
@@ -176,101 +196,102 @@ def fit_volatility_model(log_returns, df_hist, model_type='garch', distribution=
             else:  # egarch
                 model = arch_model(data, vol='EGARCH', p=1, o=1, q=1, dist=distribution.lower())
-            fit_result = model.fit(disp='off')
+            fit_result = model.fit(disp='off', options={'maxiter': 1000})
             # Extract parameters based on model type and distribution
             params_dict = fit_result.params.to_dict()
-            if model_type.lower() == 'garch':
-                mu = params_dict.get("mu", 0)
-                omega = params_dict.get("omega", 0)
-                alpha = params_dict.get("alpha[1]", 0)
-                beta = params_dict.get("beta[1]", 0)
-                if distribution.lower() == 'normal':
-                    parameters[i, :] = [mu, omega, alpha, beta]
-                elif distribution.lower() == 'studentst':
-                    nu = params_dict.get("nu", 0)
-                    parameters[i, :] = [mu, omega, alpha, beta, nu]
-                else:  # skewstudent
-                    nu = params_dict.get("nu", 0)
-                    lam = params_dict.get("lambda", 0)
-                    parameters[i, :] = [mu, omega, alpha, beta, nu, lam]
-            else:  # egarch
-                mu = params_dict.get("mu", 0)
-                omega = params_dict.get("omega", 0)
-                alpha = params_dict.get("alpha[1]", 0)
-                gamma = params_dict.get("gamma[1]", 0)
-                beta = params_dict.get("beta[1]", 0)
-                if distribution.lower() == 'normal':
-                    parameters[i, :] = [mu, omega, alpha, gamma, beta]
-                elif distribution.lower() == 'studentst':
-                    nu = params_dict.get("nu", 0)
-                    parameters[i, :] = [mu, omega, alpha, gamma, beta, nu]
-                else:  # skewstudent
-                    nu = params_dict.get("nu", 0)
-                    lam = params_dict.get("lambda", 0)
-                    parameters[i, :] = [mu, omega, alpha, gamma, beta, nu, lam]
-            # Get last innovation
+            # Extract parameter values in correct order
+            param_values = [params_dict.get(param, 0) for param in param_names]
+            parameters[i, :] = param_values
+            # Get last innovation (standardized residual)
             residuals = fit_result.resid
             conditional_vol = fit_result.conditional_volatility
-            z_t = residuals[-1] / conditional_vol[-1]
-            z_process.append(z_t)
+            if len(residuals) > 0 and len(conditional_vol) > 0:
+                z_t = residuals[-1] / conditional_vol[-1]
+                if not np.isnan(z_t) and not np.isinf(z_t):
+                    z_process.append(z_t)
         except Exception as e:
             logger.warning(f"Model fit failed for window {i}: {str(e)}")
     # Clean up any failed fits
     if len(z_process) < n_fits / 2:
-        raise VolyError("Too many model fits failed. Check your data.")
+        raise VolyError(f"Too many model fits failed ({len(z_process)}/{n_fits}). Check your data.")
+    # Filter out rows with zeros (failed fits)
+    valid_rows = ~np.all(parameters == 0, axis=1)
+    parameters = parameters[valid_rows]
+    # Calculate average parameters and standard deviations
     avg_params = np.mean(parameters, axis=0)
     std_params = np.std(parameters, axis=0)
     return {
+        'model_type': model_type,
+        'distribution': distribution,
         'parameters': parameters,
         'avg_params': avg_params,
         'std_params': std_params,
         'z_process': np.array(z_process),
-        'model_type': model_type,
-        'distribution': distribution,
-        'param_names': get_param_names(model_type, distribution)
+        'param_names': param_names
     }
-def get_param_names(model_type, distribution):
-    """Get parameter names based on model type and distribution."""
+def get_param_names(model_type: str, distribution: str) -> List[str]:
+    """
+    Get parameter names based on model type and distribution.
+    Parameters:
+    -----------
+    model_type : str
+        Type of volatility model ('garch' or 'egarch')
+    distribution : str
+        Distribution type ('normal', 'studentst', or 'skewstudent')
+    Returns:
+    --------
+    List[str]
+        List of parameter names
+    """
     if model_type.lower() == 'garch':
         if distribution.lower() == 'normal':
-            return ['mu', 'omega', 'alpha', 'beta']
+            return ['mu', 'omega', 'alpha[1]', 'beta[1]']
         elif distribution.lower() == 'studentst':
-            return ['mu', 'omega', 'alpha', 'beta', 'nu']
+            return ['mu', 'omega', 'alpha[1]', 'beta[1]', 'nu']
         else:  # skewstudent
-            return ['mu', 'omega', 'alpha', 'beta', 'nu', 'lambda']
+            return ['mu', 'omega', 'alpha[1]', 'beta[1]', 'nu', 'lambda']
     else:  # egarch
         if distribution.lower() == 'normal':
-            return ['mu', 'omega', 'alpha', 'gamma', 'beta']
+            return ['mu', 'omega', 'alpha[1]', 'gamma[1]', 'beta[1]']
         elif distribution.lower() == 'studentst':
-            return ['mu', 'omega', 'alpha', 'gamma', 'beta', 'nu']
+            return ['mu', 'omega', 'alpha[1]', 'gamma[1]', 'beta[1]', 'nu']
         else:  # skewstudent
-            return ['mu', 'omega', 'alpha', 'gamma', 'beta', 'nu', 'lambda']
+            return ['mu', 'omega', 'alpha[1]', 'gamma[1]', 'beta[1]', 'nu', 'lambda']
 @catch_exception
-def simulate_volatility_paths(vol_model, horizon, simulations=5000, variate_parameters=True):
+def simulate_volatility_paths(vol_model: Dict[str, Any],
+                              horizon: int,
+                              simulations: int = 5000) -> Tuple[np.ndarray, float]:
     """
     Simulate future paths using a fitted volatility model.
-    Args:
-        vol_model: Dict with volatility model parameters
-        horizon: Number of steps to simulate
-        simulations: Number of paths to simulate
-        variate_parameters: Whether to vary parameters between simulations
+    Parameters:
+    -----------
+    vol_model : Dict[str, Any]
+        Dict with volatility model parameters
+    horizon : int
+        Number of steps to simulate
+    simulations : int
+        Number of paths to simulate
     Returns:
-        Array of simulated log returns
+    --------
+    Tuple[np.ndarray, float]
+        Simulated returns and drift
     """
     parameters = vol_model['parameters']
     z_process = vol_model['z_process']
@@ -282,18 +303,21 @@ def simulate_volatility_paths(vol_model, horizon, simulations=5000, variate_para
     pars = vol_model['avg_params'].copy()
     bounds = vol_model['std_params'].copy()
-    # Log parameters
-    param_str = ", ".join([f"{name}={par:.6f}" for name, par in zip(param_names, pars)])
+    # Create dictionary for easier parameter access
+    param_dict = {name: value for name, value in zip(param_names, pars)}
+    # Log parameters in a structured way
+    param_str = ", ".join([f"{name}={param_dict.get(name, 0):.6f}" for name in param_names])
     logger.info(f"{model_type.upper()} parameters: {param_str}")
-    # Create KDE for innovations based on distribution
+    # Create sampling function based on distribution
     if distribution.lower() == 'normal':
         # Use standard normal for normal distribution
         def sample_innovation(size=1):
             return np.random.normal(0, 1, size=size)
     else:
         # Use KDE for non-normal distributions to capture empirical distribution
-        kde = stats.gaussian_kde(z_process, bw_method='silverman')  # original code doesn't include bw_method
+        kde = stats.gaussian_kde(z_process, bw_method='silverman')  # original code didnt have bw_method
         z_range = np.linspace(min(z_process), max(z_process), 1000)
         z_prob = kde(z_range)
         z_prob = z_prob / np.sum(z_prob)
@@ -303,65 +327,65 @@ def simulate_volatility_paths(vol_model, horizon, simulations=5000, variate_para
     # Simulate paths
     simulated_returns = np.zeros(simulations)
+    mu = param_dict.get('mu', 0)
     for i in range(simulations):
         if (i + 1) % (simulations // 10) == 0:
             logger.info(f"Simulation progress: {i + 1}/{simulations}")
-        # Optionally vary parameters
-        if variate_parameters and (i + 1) % (simulations // 20) == 0:
-            new_pars = []
-            for j, (par, bound) in enumerate(zip(pars, bounds)):
-                var = bound ** 2 / len(parameters)
-                new_par = np.random.normal(par, var)
-                # Ensure omega is positive, betas are between 0 and 1, etc.
-                if j >= 1 and new_par <= 0:
-                    new_par = 0.01
-                new_pars.append(new_par)
-            sim_pars = new_pars
+        # Optionally vary parameters between simulations
+        if (i + 1) % (simulations // 20) == 0:
+            # Create parameter variations based on their estimated distribution
+            sim_params = {}
+            for j, (name, par, bound) in enumerate(zip(param_names, pars, bounds)):
+                var = bound ** 2 / max(len(parameters), 1)
+                # Generate new parameter from normal distribution around the mean
+                new_par = np.random.normal(par, np.sqrt(var))
+                # Apply constraints to ensure valid parameters
+                if name == 'omega':
+                    new_par = max(new_par, 1e-6)  # Must be positive
+                elif name in ['alpha[1]', 'beta[1]']:
+                    new_par = max(min(new_par, 0.999), 0.001)  # Between 0 and 1
+                elif name == 'nu':
+                    new_par = max(new_par, 2.1)  # Degrees of freedom > 2
+                sim_params[name] = new_par
         else:
-            sim_pars = pars.copy()
+            sim_params = param_dict.copy()
-        # Initialize variables based on model type
+        # Initialize volatility based on model type
         if model_type.lower() == 'garch':
-            if distribution.lower() == 'normal':
-                mu, omega, alpha, beta = sim_pars
-                sigma2 = omega / (1 - alpha - beta)
-            elif distribution.lower() == 'studentst':
-                mu, omega, alpha, beta, nu = sim_pars
-                sigma2 = omega / (1 - alpha - beta)
-            else:  # skewstudent
-                mu, omega, alpha, beta, nu, lam = sim_pars
-                sigma2 = omega / (1 - alpha - beta)
+            omega = sim_params.get('omega', 0)
+            alpha = sim_params.get('alpha[1]', 0)
+            beta = sim_params.get('beta[1]', 0)
+            # Initialize GARCH volatility (unconditional variance)
+            sigma2 = omega / (1 - alpha - beta) if alpha + beta < 1 else omega / 0.99
         else:  # egarch
-            if distribution.lower() == 'normal':
-                mu, omega, alpha, gamma, beta = sim_pars
-                log_sigma2 = omega / (1 - beta)
-                sigma2 = np.exp(log_sigma2)
-            elif distribution.lower() == 'studentst':
-                mu, omega, alpha, gamma, beta, nu = sim_pars
-                log_sigma2 = omega / (1 - beta)
-                sigma2 = np.exp(log_sigma2)
-            else:  # skewstudent
-                mu, omega, alpha, gamma, beta, nu, lam = sim_pars
-                log_sigma2 = omega / (1 - beta)
-                sigma2 = np.exp(log_sigma2)
+            omega = sim_params.get('omega', 0)
+            beta = sim_params.get('beta[1]', 0)
+            # Initialize EGARCH volatility
+            log_sigma2 = omega / (1 - beta) if beta < 1 else omega / 0.99
+            sigma2 = np.exp(log_sigma2)
         returns_sum = 0
-        # Simulate path
+        # Simulate path step by step
         for _ in range(horizon):
-            # Sample innovation
+            # Sample a random innovation
             z = sample_innovation()
-            # Update volatility and returns based on model type
+            # Update returns and volatility based on model type
             if model_type.lower() == 'garch':
                 # Calculate return
                 e = z * np.sqrt(sigma2)
                 returns_sum += e + mu
                 # Update GARCH volatility
-                sigma2 = omega + alpha * e ** 2 + beta * sigma2
+                sigma2 = sim_params.get('omega', 0) + sim_params.get('alpha[1]', 0) * e ** 2 + sim_params.get('beta[1]',
+                                                                                                              0) * sigma2
             else:  # egarch
                 # Calculate return
                 e = z * np.sqrt(sigma2)
@@ -369,6 +393,12 @@ def simulate_volatility_paths(vol_model, horizon, simulations=5000, variate_para
                 # Update EGARCH volatility
                 abs_z = abs(z)
+                gamma = sim_params.get('gamma[1]', 0)
+                alpha = sim_params.get('alpha[1]', 0)
+                beta = sim_params.get('beta[1]', 0)
+                omega = sim_params.get('omega', 0)
+                # EGARCH update equation
                 log_sigma2 = omega + beta * log_sigma2 + alpha * (abs_z - np.sqrt(2 / np.pi)) + gamma * z
                 sigma2 = np.exp(log_sigma2)
@@ -381,89 +411,95 @@ def get_hd_surface(model_results: pd.DataFrame,
                    df_hist: pd.DataFrame,
                    domain_params: Tuple[float, float, int] = (-1.5, 1.5, 1000),
                    return_domain: str = 'log_moneyness',
-                   method: str = 'arch_returns',
-                   model_type: str = 'garch',
+                   method: str = 'garch',
                    distribution: str = 'normal',
-                   **kwargs) -> Dict[str, Any]:
+                   window_length: str = '30d',
+                   n_fits: int = 400,
+                   simulations: int = 5000,
+                   bandwidth: str = 'silverman') -> Dict[str, Any]:
     """
     Generate historical density surface from historical price data.
     Parameters:
-        model_results: DataFrame with model parameters and maturities
-        df_hist: DataFrame with historical price data
-        domain_params: Tuple of (min, max, num_points) for x-domain
-        return_domain: Domain for x-axis values ('log_moneyness', 'moneyness', 'returns', 'strikes')
-        method: Method to use for HD estimation ('hist_returns' or 'arch_returns')
-        model_type: Type of volatility model to use ('garch' or 'egarch')
-        distribution: Distribution to use ('normal', 'studentst', or 'skewstudent')
-        **kwargs: Additional parameters for specific methods:
-            For volatility models ('garch'/'egarch' method):
-                n_fits: Number of sliding windows (default: 400)
-                simulations: Number of Monte Carlo simulations (default: 5000)
-                window_length: Length of sliding windows as string (default: '30d')
-                variate_parameters: Whether to vary parameters (default: True)
-                bandwidth: KDE bandwidth (default: 'silverman')
-            For 'hist_returns' method:
-                bandwidth: KDE bandwidth (default: 'silverman')
+    -----------
+    model_results : pd.DataFrame
+        DataFrame with model parameters and maturities
+    df_hist : pd.DataFrame
+        DataFrame with historical price data
+    domain_params : Tuple[float, float, int]
+        Tuple of (min, max, num_points) for x-domain
+    return_domain : str
+        Domain for x-axis values ('log_moneyness', 'moneyness', 'returns', 'strikes')
+    method : str
+        Method to use for HD estimation:
+        - 'garch': GARCH(1,1) model
+        - 'egarch': EGARCH(1,1,1) model with asymmetry
+        - 'basic': Simple histogram/KDE of historical returns
+    distribution : str
+        Distribution to use for volatility models ('normal', 'studentst', or 'skewstudent')
+    window_length : str
+        Length of sliding windows as string (e.g., '30d')
+    n_fits : int
+        Number of sliding windows for volatility model fitting
+    simulations : int
+        Number of Monte Carlo simulations for volatility models
+    bandwidth : str
+        KDE bandwidth method (default: 'silverman')
     Returns:
+    --------
+    Dict[str, Any]
         Dictionary containing pdf_surface, cdf_surface, x_surface, and moments
     """
-    # Check if required columns are present
+    # Validate inputs
     required_columns = ['s', 't', 'r']
     missing_columns = [col for col in required_columns if col not in model_results.columns]
     if missing_columns:
         raise VolyError(f"Required columns missing in model_results: {missing_columns}")
+    if len(df_hist) < 2:
+        raise VolyError("Not enough data points in df_hist")
     # Determine granularity from df_hist
-    if len(df_hist) > 1:
-        # Calculate minutes between consecutive timestamps
-        minutes_diff = (df_hist.index[1] - df_hist.index[0]).total_seconds() / 60
-        minutes_per_period = int(minutes_diff)
-    else:
-        raise VolyError("Cannot determine granularity from df_hist.")
+    minutes_diff = (df_hist.index[1] - df_hist.index[0]).total_seconds() / 60
+    minutes_per_period = max(1, int(minutes_diff))
-    # Validate model_type and distribution
-    valid_model_types = ['garch', 'egarch']
+    # Validate method and model parameters
+    valid_methods = ['garch', 'egarch', 'basic']
     valid_distributions = ['normal', 'studentst', 'skewstudent']
-    if model_type.lower() not in valid_model_types:
-        raise VolyError(f"Invalid model_type: {model_type}. Must be one of {valid_model_types}")
+    method = method.lower()
+    distribution = distribution.lower()
-    if distribution.lower() not in valid_distributions:
-        raise VolyError(f"Invalid distribution: {distribution}. Must be one of {valid_distributions}")
+    if method not in valid_methods:
+        raise VolyError(f"Invalid method: {method}. Must be one of {valid_methods}")
-    # Get method-specific parameters
-    if method == 'arch_returns':
-        n_fits = kwargs.get('n_fits', 400)
-        simulations = kwargs.get('simulations', 5000)
-        window_length = kwargs.get('window_length', '30d')
-        variate_parameters = kwargs.get('variate_parameters', True)
-        bandwidth = kwargs.get('bandwidth', 'silverman')
-        logger.info(
-            f"Using {model_type.upper()} method with {distribution} distribution, {n_fits} fits, {simulations} simulations")
-    elif method == 'hist_returns':
-        bandwidth = kwargs.get('bandwidth', 'silverman')
-        logger.info(f"Using returns-based KDE method with bandwidth {bandwidth}")
-    else:
-        raise VolyError(f"Unknown method: {method}. Use 'hist_returns', 'arch_returns'.")
+    if method in ['garch', 'egarch'] and distribution not in valid_distributions:
+        raise VolyError(f"Invalid distribution: {distribution}. Must be one of {valid_distributions}")
     # Calculate log returns from price history
     log_returns = np.log(df_hist['close'] / df_hist['close'].shift(1)) * 100
     log_returns = log_returns.dropna().values
-    # Fit volatility model once if using garch/egarch method
+    # Fit volatility model if using GARCH or EGARCH
     vol_model = None
-    if method == 'arch_returns':
+    if method in ['garch', 'egarch']:
+        model_type = method  # Use method as model_type
+        logger.info(
+            f"Using {model_type.upper()} with {distribution} distribution, {n_fits} fits, {simulations} simulations")
         vol_model = fit_volatility_model(
-            log_returns,
-            df_hist,
+            log_returns=log_returns,
+            df_hist=df_hist,
             model_type=model_type,
             distribution=distribution,
             window_length=window_length,
             n_fits=n_fits
         )
+    elif method == 'basic':
+        logger.info(f"Using basic returns-based KDE method with bandwidth {bandwidth}")
+    # Initialize result containers
     pdf_surface = {}
     cdf_surface = {}
     x_surface = {}
@@ -488,8 +524,8 @@ def get_hd_surface(model_results: pd.DataFrame,
         logger.info(f"Processing HD for maturity {i} (t={t:.4f} years, {tau_days_float:.2f} days)")
-        if method == 'hist_returns':
-            # Standard returns-based method
+        if method == 'basic':
+            # Simple returns-based method
             # Filter historical data for this maturity's lookback period
             start_date = pd.Timestamp.now() - pd.Timedelta(days=int(t * 365.25))
             maturity_hist = df_hist[df_hist.index >= start_date].copy()
@@ -519,7 +555,16 @@ def get_hd_surface(model_results: pd.DataFrame,
             f = stats.gaussian_kde(adj_returns, bw_method=bandwidth)
             pdf_values = f(LM)
-        elif method == 'arch_returns':
+            # Transform according to return domain
+            pdf_lm = pdf_values
+            pdf_m = pdf_lm / M
+            pdf_k = pdf_lm / K
+            pdf_r = pdf_lm / (1 + R)
+            # No model parameters to include
+            model_params = None
+        elif method in ['garch', 'egarch']:
             # Volatility model-based method
             if vol_model is None:
                 logger.warning(f"Volatility model fitting failed, skipping maturity {i}")
@@ -530,8 +575,7 @@ def get_hd_surface(model_results: pd.DataFrame,
             simulated_returns, simulated_mu = simulate_volatility_paths(
                 vol_model,
                 horizon,
-                simulations,
-                variate_parameters
+                simulations
             )
             # Scale the simulated returns to match target time horizon
@@ -548,25 +592,31 @@ def get_hd_surface(model_results: pd.DataFrame,
             # Convert to terminal prices
             simulated_prices = s * np.exp(risk_neutral_returns / 100)
-            # Convert to moneyness domain
+            # Convert to moneyness domain (x-domain)
             simulated_moneyness = s / simulated_prices
             # Perform KDE to get PDF
             kde = stats.gaussian_kde(simulated_moneyness, bw_method=bandwidth)
             pdf_values = kde(M)
+            # Transform according to return domain
+            pdf_m = pdf_values
+            pdf_lm = pdf_m * M
+            pdf_k = pdf_lm / K
+            pdf_r = pdf_lm / (1 + R)
             # Include volatility model params in moments
             avg_params = vol_model['avg_params']
             param_names = vol_model['param_names']
-            model_params = {name: value for name, value in zip(param_names, avg_params)}
-            model_params['model_type'] = model_type
+            model_params = {name.replace('[1]', ''): value for name, value in zip(param_names, avg_params)}
+            model_params['model_type'] = method
             model_params['distribution'] = distribution
-            # Add persistence for GARCH-type models
-            if model_type.lower() == 'garch':
+            # Add persistence for GARCH models
+            if method == 'garch':
                 model_params['persistence'] = model_params.get('alpha', 0) + model_params.get('beta', 0)
         else:
-            continue  # Skip this maturity if method is invalid
+            continue  # Skip if invalid method
         # Ensure density integrates to 1
         dx = LM[1] - LM[0]
@@ -577,41 +627,27 @@ def get_hd_surface(model_results: pd.DataFrame,
         pdf_values = pdf_values / total_area
-        # Common processing for both methods
-        # Transform densities to various domains
-        if method == 'hist_returns':
-            pdf_lm = pdf_values
-            pdf_m = pdf_lm / M
-            pdf_k = pdf_lm / K
-            pdf_r = pdf_lm / (1 + R)
-        else:  # volatility models
-            pdf_m = pdf_values
-            pdf_lm = pdf_m * M
-            pdf_k = pdf_lm / K
-            pdf_r = pdf_lm / (1 + R)
         # Calculate CDF
         cdf = np.cumsum(pdf_lm * dx)
-        cdf = np.minimum(cdf / cdf[-1], 1.0)
+        cdf = np.minimum(cdf / cdf[-1], 1.0)  # Ensure CDF is between 0 and 1
         # Select appropriate domain and calculate moments
         if return_domain == 'log_moneyness':
             x = LM
             pdf = pdf_lm
-            moments = get_all_moments(x, pdf, model_params if method in ['garch', 'egarch'] else None)
+            moments = get_all_moments(x, pdf, model_params)
         elif return_domain == 'moneyness':
             x = M
             pdf = pdf_m
-            moments = get_all_moments(x, pdf, model_params if method in ['garch', 'egarch'] else None)
+            moments = get_all_moments(x, pdf, model_params)
         elif return_domain == 'returns':
             x = R
             pdf = pdf_r
-            moments = get_all_moments(x, pdf, model_params if method in ['garch', 'egarch'] else None)
+            moments = get_all_moments(x, pdf, model_params)
         elif return_domain == 'strikes':
             x = K
             pdf = pdf_k
-            moments = get_all_moments(x, pdf, model_params if method in ['garch', 'egarch'] else None)
+            moments = get_all_moments(x, pdf, model_params)
         else:
             raise VolyError(f"Unsupported return_domain: {return_domain}")
@@ -621,11 +657,14 @@ def get_hd_surface(model_results: pd.DataFrame,
         x_surface[i] = x
         all_moments[i] = moments
+    # Check if we have any valid results
+    if not pdf_surface:
+        raise VolyError("No valid densities could be calculated. Check your input data.")
     # Create DataFrame with moments
     moments = pd.DataFrame(all_moments).T
-    logger.info(
-        f"Historical density calculation complete using {method} method with {model_type} model and {distribution} distribution")
+    logger.info(f"Historical density calculation complete using {method} method")
     return {
         'pdf_surface': pdf_surface,

{voly-0.0.144 → voly-0.0.146/src/voly.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: voly
-Version: 0.0.144
+Version: 0.0.146
 Summary: Options & volatility research package
 Author-email: Manu de Cara <manu.de.cara@gmail.com>
 License: MIT