PyPI - well-log-toolkit - Versions diffs - 0.1.118__tar.gz → 0.1.119__tar.gz - Mend

well-log-toolkit 0.1.118tar.gz → 0.1.119tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

{well_log_toolkit-0.1.118 → well_log_toolkit-0.1.119}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: well-log-toolkit
-Version: 0.1.118
+Version: 0.1.119
 Summary: Fast LAS file processing with lazy loading and filtering for well log analysis
 Author-email: Kristian dF Kollsgård <kkollsg@gmail.com>
 License: MIT

{well_log_toolkit-0.1.118 → well_log_toolkit-0.1.119}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "well-log-toolkit"
-version = "0.1.118"
+version = "0.1.119"
 description = "Fast LAS file processing with lazy loading and filtering for well log analysis"
 readme = "README.md"
 requires-python = ">=3.9"

{well_log_toolkit-0.1.118 → well_log_toolkit-0.1.119}/well_log_toolkit/regression.py RENAMED Viewed

@@ -70,13 +70,14 @@ class RegressionBase(ABC):
         """
         return self.predict(x)
-    def _calculate_metrics(self, x: np.ndarray, y: np.ndarray, y_pred: np.ndarray) -> None:
+    def _calculate_metrics(self, x: np.ndarray, y: np.ndarray, y_pred: np.ndarray, use_log_space: bool = False) -> None:
         """Calculate R² and RMSE metrics.
         Args:
             x: Independent variable values
             y: Actual dependent variable values
             y_pred: Predicted dependent variable values
+            use_log_space: If True, calculate R² in log space (useful when y spans orders of magnitude)
         """
         # Store original data
         self.x_data = x
@@ -85,12 +86,21 @@ class RegressionBase(ABC):
         # Store x-axis range
         self.x_range = (float(np.min(x)), float(np.max(x)))
-        # R² calculation
-        ss_res = np.sum((y - y_pred) ** 2)
-        ss_tot = np.sum((y - np.mean(y)) ** 2)
-        self.r_squared = 1 - (ss_res / ss_tot) if ss_tot != 0 else 0.0
+        # R² calculation - in log space if requested
+        if use_log_space and np.all(y > 0) and np.all(y_pred > 0):
+            # Calculate R² in log space for data spanning orders of magnitude
+            log_y = np.log10(y)
+            log_y_pred = np.log10(y_pred)
+            ss_res = np.sum((log_y - log_y_pred) ** 2)
+            ss_tot = np.sum((log_y - np.mean(log_y)) ** 2)
+            self.r_squared = 1 - (ss_res / ss_tot) if ss_tot != 0 else 0.0
+        else:
+            # Standard R² in linear space
+            ss_res = np.sum((y - y_pred) ** 2)
+            ss_tot = np.sum((y - np.mean(y)) ** 2)
+            self.r_squared = 1 - (ss_res / ss_tot) if ss_tot != 0 else 0.0
-        # RMSE calculation
+        # RMSE calculation (always in linear space)
         self.rmse = np.sqrt(np.mean((y - y_pred) ** 2))
     def _prepare_data(self, x: ArrayLike, y: ArrayLike) -> Tuple[np.ndarray, np.ndarray]:

{well_log_toolkit-0.1.118 → well_log_toolkit-0.1.119}/well_log_toolkit/visualization.py RENAMED Viewed

@@ -3139,6 +3139,27 @@ class Crossplot:
             self._regressions[reg_type] = {}
         self._regressions[reg_type][identifier] = regression_obj
+    def _get_group_colors(self, data: pd.DataFrame, group_column: str) -> dict:
+        """Get the color assigned to each group in the plot.
+        Args:
+            data: DataFrame with plotting data
+            group_column: Column name used for grouping
+        Returns:
+            Dictionary mapping group names to their colors
+        """
+        group_colors = {}
+        # Get unique groups in the same order as they'll appear in the plot
+        groups = data.groupby(group_column)
+        for idx, (group_name, _) in enumerate(groups):
+            # Use the same color assignment logic as _plot_by_groups
+            group_colors[group_name] = DEFAULT_COLORS[idx % len(DEFAULT_COLORS)]
+        return group_colors
     def _find_best_legend_locations(self, data: pd.DataFrame) -> tuple[str, str]:
         """Find the two best locations for legends based on data density.
@@ -3228,7 +3249,12 @@ class Crossplot:
         # Add R² on second line if requested (will be styled grey later)
         if include_r2:
-            return f"{first_line}\nR² = {reg.r_squared:.3f}"
+            # Format R² with appropriate note
+            if self.y_log:
+                r2_label = f"R² = {reg.r_squared:.3f} (log)"
+            else:
+                r2_label = f"R² = {reg.r_squared:.3f}"
+            return f"{first_line}\n{r2_label}"
         else:
             return first_line
@@ -3371,15 +3397,19 @@ class Crossplot:
                             f"Reduce the number of groups or use a different regression strategy."
                         )
+                    # Get the actual colors used for each group in the plot
+                    group_colors_map = self._get_group_colors(data, group_column)
                     for idx, (group_name, group_data) in enumerate(color_groups):
                         x_vals = group_data['x'].values
                         y_vals = group_data['y'].values
                         mask = np.isfinite(x_vals) & np.isfinite(y_vals)
                         if np.sum(mask) >= 2:
-                            # Copy config and set default line color if not specified
+                            # Copy config and use the same color as the data group
                             group_config = config.copy()
                             if 'line_color' not in group_config:
-                                group_config['line_color'] = regression_colors[color_idx % len(regression_colors)]
+                                # Use the same color as the data points for this group
+                                group_config['line_color'] = group_colors_map.get(group_name, regression_colors[color_idx % len(regression_colors)])
                             # Skip legend update for all but last regression
                             is_last = (idx == n_groups - 1)
@@ -3412,15 +3442,19 @@ class Crossplot:
                         f"Reduce the number of groups or use a different regression strategy."
                     )
+                # Get the actual colors used for each group in the plot
+                group_colors_map = self._get_group_colors(data, group_col)
                 for idx, (group_name, group_data) in enumerate(groups):
                     x_vals = group_data['x'].values
                     y_vals = group_data['y'].values
                     mask = np.isfinite(x_vals) & np.isfinite(y_vals)
                     if np.sum(mask) >= 2:
-                        # Copy config and set default line color if not specified
+                        # Copy config and use the same color as the data group
                         group_config = config.copy()
                         if 'line_color' not in group_config:
-                            group_config['line_color'] = regression_colors[color_idx % len(regression_colors)]
+                            # Use the same color as the data points for this group
+                            group_config['line_color'] = group_colors_map.get(group_name, regression_colors[color_idx % len(regression_colors)])
                         # Skip legend update for all but last regression
                         is_last = (idx == n_groups - 1)
@@ -3467,6 +3501,11 @@ class Crossplot:
             warnings.warn(f"Failed to fit {regression_type} regression for {name}: {e}")
             return
+        # Recalculate R² in log space if y-axis is log scale
+        if self.y_log:
+            y_pred = reg.predict(x_vals)
+            reg._calculate_metrics(x_vals, y_vals, y_pred, use_log_space=True)
         # Store regression in nested structure
         self._store_regression(regression_type, name, reg)
@@ -3810,6 +3849,11 @@ class Crossplot:
         except ValueError as e:
             raise ValueError(f"Failed to fit {regression_type} regression: {e}")
+        # Recalculate R² in log space if y-axis is log scale
+        if self.y_log:
+            y_pred = reg.predict(x_clean)
+            reg._calculate_metrics(x_clean, y_clean, y_pred, use_log_space=True)
         # Store regression in nested structure
         reg_name = name if name else regression_type
         self._store_regression(regression_type, reg_name, reg)

{well_log_toolkit-0.1.118 → well_log_toolkit-0.1.119}/well_log_toolkit.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: well-log-toolkit
-Version: 0.1.118
+Version: 0.1.119
 Summary: Fast LAS file processing with lazy loading and filtering for well log analysis
 Author-email: Kristian dF Kollsgård <kkollsg@gmail.com>
 License: MIT