PyPI - well-log-toolkit - Versions diffs - 0.1.117__tar.gz → 0.1.119__tar.gz - Mend

well-log-toolkit 0.1.117tar.gz → 0.1.119tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

{well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: well-log-toolkit
-Version: 0.1.117
+Version: 0.1.119
 Summary: Fast LAS file processing with lazy loading and filtering for well log analysis
 Author-email: Kristian dF Kollsgård <kkollsg@gmail.com>
 License: MIT

{well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "well-log-toolkit"
-version = "0.1.117"
+version = "0.1.119"
 description = "Fast LAS file processing with lazy loading and filtering for well log analysis"
 readme = "README.md"
 requires-python = ">=3.9"

{well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/well_log_toolkit/regression.py RENAMED Viewed

@@ -70,13 +70,14 @@ class RegressionBase(ABC):
         """
         return self.predict(x)
-    def _calculate_metrics(self, x: np.ndarray, y: np.ndarray, y_pred: np.ndarray) -> None:
+    def _calculate_metrics(self, x: np.ndarray, y: np.ndarray, y_pred: np.ndarray, use_log_space: bool = False) -> None:
         """Calculate R² and RMSE metrics.
         Args:
             x: Independent variable values
             y: Actual dependent variable values
             y_pred: Predicted dependent variable values
+            use_log_space: If True, calculate R² in log space (useful when y spans orders of magnitude)
         """
         # Store original data
         self.x_data = x
@@ -85,12 +86,21 @@ class RegressionBase(ABC):
         # Store x-axis range
         self.x_range = (float(np.min(x)), float(np.max(x)))
-        # R² calculation
-        ss_res = np.sum((y - y_pred) ** 2)
-        ss_tot = np.sum((y - np.mean(y)) ** 2)
-        self.r_squared = 1 - (ss_res / ss_tot) if ss_tot != 0 else 0.0
+        # R² calculation - in log space if requested
+        if use_log_space and np.all(y > 0) and np.all(y_pred > 0):
+            # Calculate R² in log space for data spanning orders of magnitude
+            log_y = np.log10(y)
+            log_y_pred = np.log10(y_pred)
+            ss_res = np.sum((log_y - log_y_pred) ** 2)
+            ss_tot = np.sum((log_y - np.mean(log_y)) ** 2)
+            self.r_squared = 1 - (ss_res / ss_tot) if ss_tot != 0 else 0.0
+        else:
+            # Standard R² in linear space
+            ss_res = np.sum((y - y_pred) ** 2)
+            ss_tot = np.sum((y - np.mean(y)) ** 2)
+            self.r_squared = 1 - (ss_res / ss_tot) if ss_tot != 0 else 0.0
-        # RMSE calculation
+        # RMSE calculation (always in linear space)
         self.rmse = np.sqrt(np.mean((y - y_pred) ** 2))
     def _prepare_data(self, x: ArrayLike, y: ArrayLike) -> Tuple[np.ndarray, np.ndarray]:

{well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/well_log_toolkit/visualization.py RENAMED Viewed

@@ -3139,6 +3139,88 @@ class Crossplot:
             self._regressions[reg_type] = {}
         self._regressions[reg_type][identifier] = regression_obj
+    def _get_group_colors(self, data: pd.DataFrame, group_column: str) -> dict:
+        """Get the color assigned to each group in the plot.
+        Args:
+            data: DataFrame with plotting data
+            group_column: Column name used for grouping
+        Returns:
+            Dictionary mapping group names to their colors
+        """
+        group_colors = {}
+        # Get unique groups in the same order as they'll appear in the plot
+        groups = data.groupby(group_column)
+        for idx, (group_name, _) in enumerate(groups):
+            # Use the same color assignment logic as _plot_by_groups
+            group_colors[group_name] = DEFAULT_COLORS[idx % len(DEFAULT_COLORS)]
+        return group_colors
+    def _find_best_legend_locations(self, data: pd.DataFrame) -> tuple[str, str]:
+        """Find the two best locations for legends based on data density.
+        Divides the plot into a 3x3 grid and finds the two squares with the least data points.
+        Args:
+            data: DataFrame with 'x' and 'y' columns
+        Returns:
+            Tuple of (primary_location, secondary_location) as matplotlib location strings
+        """
+        # Get x and y bounds
+        x_vals = data['x'].values
+        y_vals = data['y'].values
+        # Handle log scales for binning
+        if self.x_log:
+            x_vals = np.log10(x_vals[x_vals > 0])
+        if self.y_log:
+            y_vals = np.log10(y_vals[y_vals > 0])
+        x_min, x_max = np.nanmin(x_vals), np.nanmax(x_vals)
+        y_min, y_max = np.nanmin(y_vals), np.nanmax(y_vals)
+        # Create 3x3 grid and count points in each square
+        x_bins = np.linspace(x_min, x_max, 4)
+        y_bins = np.linspace(y_min, y_max, 4)
+        # Count points in each of 9 squares
+        counts = {}
+        for i in range(3):
+            for j in range(3):
+                x_mask = (x_vals >= x_bins[i]) & (x_vals < x_bins[i+1])
+                y_mask = (y_vals >= y_bins[j]) & (y_vals < y_bins[j+1])
+                counts[(i, j)] = np.sum(x_mask & y_mask)
+        # Map grid positions to matplotlib location strings
+        # Grid: (0,2) (1,2) (2,2)  ->  upper left, upper center, upper right
+        #       (0,1) (1,1) (2,1)  ->  center left, center, center right
+        #       (0,0) (1,0) (2,0)  ->  lower left, lower center, lower right
+        position_map = {
+            (0, 2): 'upper left',
+            (1, 2): 'upper center',
+            (2, 2): 'upper right',
+            (0, 1): 'center left',
+            (1, 1): 'center',
+            (2, 1): 'center right',
+            (0, 0): 'lower left',
+            (1, 0): 'lower center',
+            (2, 0): 'lower right',
+        }
+        # Sort squares by count (ascending)
+        sorted_squares = sorted(counts.items(), key=lambda x: x[1])
+        # Get two best locations
+        best_pos = position_map[sorted_squares[0][0]]
+        second_best_pos = position_map[sorted_squares[1][0]]
+        return best_pos, second_best_pos
     def _format_regression_label(self, name: str, reg, include_equation: bool = None, include_r2: bool = None) -> str:
         """Format a modern, compact regression label.
@@ -3156,26 +3238,25 @@ class Crossplot:
         if include_r2 is None:
             include_r2 = self.show_regression_r2
-        # Start with name
-        parts = [name]
-        # Add equation and R² on same line if both shown, more compact
-        metrics = []
+        # Format: "Name (equation)" with R² on second line
+        # Equation and R² will be colored grey in the legend update method
+        first_line = name
         if include_equation:
             eq = reg.equation()
-            # Shorten equation format for compactness
-            eq = eq.replace(' ', '')  # Remove spaces
-            metrics.append(eq)
+            eq = eq.replace(' ', '')  # Remove spaces for compactness
+            # Add equation in parentheses (will be styled grey later)
+            first_line = f"{name} ({eq})"
+        # Add R² on second line if requested (will be styled grey later)
         if include_r2:
-            # Use superscript 2 for R²
-            metrics.append(f"R²={reg.r_squared:.3f}")
-        if metrics:
-            # Join equation and R² with pipe separator for clarity
-            parts.append(" | ".join(metrics))
-        return "\n".join(parts)
+            # Format R² with appropriate note
+            if self.y_log:
+                r2_label = f"R² = {reg.r_squared:.3f} (log)"
+            else:
+                r2_label = f"R² = {reg.r_squared:.3f}"
+            return f"{first_line}\n{r2_label}"
+        else:
+            return first_line
     def _update_regression_legend(self) -> None:
         """Create or update the separate regression legend with smart placement."""
@@ -3199,61 +3280,43 @@ class Crossplot:
             regression_labels.append(line.get_label())
         if regression_handles:
-            # Smart placement: try these locations in priority order
-            # Prefer corners away from main legend and colorbar
-            locations = [
-                'lower right',   # Primary choice
-                'upper right',   # If lower right conflicts with data
-                'lower left',    # If right side has colorbar/main legend
-                'center right',  # Fallback
-            ]
-            # If main legend is shown, it's likely in upper left
-            # If colorbar is shown, it's on the right side
-            # Adjust preferences based on what's visible
-            if self.show_legend and self.show_colorbar:
-                # Both legend and colorbar present - lower left might be better
-                locations = ['lower left', 'lower right', 'upper right', 'center left']
-            elif self.show_colorbar:
-                # Colorbar on right - prefer left side
-                locations = ['lower left', 'upper left', 'lower right', 'center left']
-            # Try to create legend with best location
-            # Use 'best' as fallback - matplotlib will find optimal position
-            try:
-                self.regression_legend = self.ax.legend(
-                    regression_handles,
-                    regression_labels,
-                    loc=locations[0],  # Try first preference
-                    frameon=True,
-                    framealpha=0.95,
-                    edgecolor='#cccccc',
-                    fancybox=False,
-                    shadow=False,
-                    fontsize=9,
-                    title='Regressions',
-                    title_fontsize=10
-                )
-            except Exception:
-                # Fallback to 'best' if specific location fails
-                self.regression_legend = self.ax.legend(
-                    regression_handles,
-                    regression_labels,
-                    loc='best',
-                    frameon=True,
-                    framealpha=0.95,
-                    edgecolor='#cccccc',
-                    fancybox=False,
-                    shadow=False,
-                    fontsize=9,
-                    title='Regressions',
-                    title_fontsize=10
-                )
+            # Get smart placement based on data density
+            if self._data is not None:
+                _, secondary_loc = self._find_best_legend_locations(self._data)
+            else:
+                # Fallback if data not available
+                secondary_loc = 'lower right'
+            # Import legend from matplotlib
+            from matplotlib.legend import Legend
+            # Create regression legend at secondary location
+            self.regression_legend = Legend(
+                self.ax,
+                regression_handles,
+                regression_labels,
+                loc=secondary_loc,
+                frameon=True,
+                framealpha=0.95,
+                edgecolor='#cccccc',
+                fancybox=False,
+                shadow=False,
+                fontsize=9,
+                title='Regressions',
+                title_fontsize=10
+            )
-            # Modern styling
+            # Modern styling with grey text for equation and R²
             self.regression_legend.get_frame().set_linewidth(0.8)
             self.regression_legend.get_title().set_fontweight('600')
+            # Set text color to grey for all labels
+            for text in self.regression_legend.get_texts():
+                text.set_color('#555555')
+            # Add as artist to avoid replacing the primary legend
+            self.ax.add_artist(self.regression_legend)
     def _add_automatic_regressions(self, data: pd.DataFrame) -> None:
         """Add automatic regressions based on initialization parameters."""
         if not any([self.regression, self.regression_by_color, self.regression_by_group]):
@@ -3334,15 +3397,19 @@ class Crossplot:
                             f"Reduce the number of groups or use a different regression strategy."
                         )
+                    # Get the actual colors used for each group in the plot
+                    group_colors_map = self._get_group_colors(data, group_column)
                     for idx, (group_name, group_data) in enumerate(color_groups):
                         x_vals = group_data['x'].values
                         y_vals = group_data['y'].values
                         mask = np.isfinite(x_vals) & np.isfinite(y_vals)
                         if np.sum(mask) >= 2:
-                            # Copy config and set default line color if not specified
+                            # Copy config and use the same color as the data group
                             group_config = config.copy()
                             if 'line_color' not in group_config:
-                                group_config['line_color'] = regression_colors[color_idx % len(regression_colors)]
+                                # Use the same color as the data points for this group
+                                group_config['line_color'] = group_colors_map.get(group_name, regression_colors[color_idx % len(regression_colors)])
                             # Skip legend update for all but last regression
                             is_last = (idx == n_groups - 1)
@@ -3375,15 +3442,19 @@ class Crossplot:
                         f"Reduce the number of groups or use a different regression strategy."
                     )
+                # Get the actual colors used for each group in the plot
+                group_colors_map = self._get_group_colors(data, group_col)
                 for idx, (group_name, group_data) in enumerate(groups):
                     x_vals = group_data['x'].values
                     y_vals = group_data['y'].values
                     mask = np.isfinite(x_vals) & np.isfinite(y_vals)
                     if np.sum(mask) >= 2:
-                        # Copy config and set default line color if not specified
+                        # Copy config and use the same color as the data group
                         group_config = config.copy()
                         if 'line_color' not in group_config:
-                            group_config['line_color'] = regression_colors[color_idx % len(regression_colors)]
+                            # Use the same color as the data points for this group
+                            group_config['line_color'] = group_colors_map.get(group_name, regression_colors[color_idx % len(regression_colors)])
                         # Skip legend update for all but last regression
                         is_last = (idx == n_groups - 1)
@@ -3430,6 +3501,11 @@ class Crossplot:
             warnings.warn(f"Failed to fit {regression_type} regression for {name}: {e}")
             return
+        # Recalculate R² in log space if y-axis is log scale
+        if self.y_log:
+            y_pred = reg.predict(x_vals)
+            reg._calculate_metrics(x_vals, y_vals, y_pred, use_log_space=True)
         # Store regression in nested structure
         self._store_regression(regression_type, name, reg)
@@ -3672,17 +3748,26 @@ class Crossplot:
             if first_scatter is None and self.color:
                 first_scatter = scatter
-        # Add legend
+        # Add legend with smart placement
         if self.show_legend:
+            # Get best location based on data density
+            if self._data is not None:
+                primary_loc, _ = self._find_best_legend_locations(self._data)
+            else:
+                primary_loc = 'best'
             legend = self.ax.legend(
                 title=group_label,
-                loc='best',
+                loc=primary_loc,
                 frameon=True,
                 framealpha=0.9,
                 edgecolor='black'
             )
             legend.get_title().set_fontweight('bold')
+            # Store the primary legend so it persists when regression legend is added
+            self.ax.add_artist(legend)
         # Add colorbar if using color mapping
         if self.color and self.show_colorbar and first_scatter:
             self.colorbar = self.fig.colorbar(first_scatter, ax=self.ax)
@@ -3764,6 +3849,11 @@ class Crossplot:
         except ValueError as e:
             raise ValueError(f"Failed to fit {regression_type} regression: {e}")
+        # Recalculate R² in log space if y-axis is log scale
+        if self.y_log:
+            y_pred = reg.predict(x_clean)
+            reg._calculate_metrics(x_clean, y_clean, y_pred, use_log_space=True)
         # Store regression in nested structure
         reg_name = name if name else regression_type
         self._store_regression(regression_type, reg_name, reg)

{well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/well_log_toolkit.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: well-log-toolkit
-Version: 0.1.117
+Version: 0.1.119
 Summary: Fast LAS file processing with lazy loading and filtering for well log analysis
 Author-email: Kristian dF Kollsgård <kkollsg@gmail.com>
 License: MIT