well-log-toolkit 0.1.117__tar.gz → 0.1.119__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (20) hide show
  1. {well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/PKG-INFO +1 -1
  2. {well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/pyproject.toml +1 -1
  3. {well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/well_log_toolkit/regression.py +16 -6
  4. {well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/well_log_toolkit/visualization.py +163 -73
  5. {well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/well_log_toolkit.egg-info/PKG-INFO +1 -1
  6. {well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/README.md +0 -0
  7. {well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/setup.cfg +0 -0
  8. {well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/well_log_toolkit/__init__.py +0 -0
  9. {well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/well_log_toolkit/exceptions.py +0 -0
  10. {well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/well_log_toolkit/las_file.py +0 -0
  11. {well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/well_log_toolkit/manager.py +0 -0
  12. {well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/well_log_toolkit/operations.py +0 -0
  13. {well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/well_log_toolkit/property.py +0 -0
  14. {well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/well_log_toolkit/statistics.py +0 -0
  15. {well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/well_log_toolkit/utils.py +0 -0
  16. {well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/well_log_toolkit/well.py +0 -0
  17. {well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/well_log_toolkit.egg-info/SOURCES.txt +0 -0
  18. {well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/well_log_toolkit.egg-info/dependency_links.txt +0 -0
  19. {well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/well_log_toolkit.egg-info/requires.txt +0 -0
  20. {well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/well_log_toolkit.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: well-log-toolkit
3
- Version: 0.1.117
3
+ Version: 0.1.119
4
4
  Summary: Fast LAS file processing with lazy loading and filtering for well log analysis
5
5
  Author-email: Kristian dF Kollsgård <kkollsg@gmail.com>
6
6
  License: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "well-log-toolkit"
7
- version = "0.1.117"
7
+ version = "0.1.119"
8
8
  description = "Fast LAS file processing with lazy loading and filtering for well log analysis"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -70,13 +70,14 @@ class RegressionBase(ABC):
70
70
  """
71
71
  return self.predict(x)
72
72
 
73
- def _calculate_metrics(self, x: np.ndarray, y: np.ndarray, y_pred: np.ndarray) -> None:
73
+ def _calculate_metrics(self, x: np.ndarray, y: np.ndarray, y_pred: np.ndarray, use_log_space: bool = False) -> None:
74
74
  """Calculate R² and RMSE metrics.
75
75
 
76
76
  Args:
77
77
  x: Independent variable values
78
78
  y: Actual dependent variable values
79
79
  y_pred: Predicted dependent variable values
80
+ use_log_space: If True, calculate R² in log space (useful when y spans orders of magnitude)
80
81
  """
81
82
  # Store original data
82
83
  self.x_data = x
@@ -85,12 +86,21 @@ class RegressionBase(ABC):
85
86
  # Store x-axis range
86
87
  self.x_range = (float(np.min(x)), float(np.max(x)))
87
88
 
88
- # R² calculation
89
- ss_res = np.sum((y - y_pred) ** 2)
90
- ss_tot = np.sum((y - np.mean(y)) ** 2)
91
- self.r_squared = 1 - (ss_res / ss_tot) if ss_tot != 0 else 0.0
89
+ # R² calculation - in log space if requested
90
+ if use_log_space and np.all(y > 0) and np.all(y_pred > 0):
91
+ # Calculate in log space for data spanning orders of magnitude
92
+ log_y = np.log10(y)
93
+ log_y_pred = np.log10(y_pred)
94
+ ss_res = np.sum((log_y - log_y_pred) ** 2)
95
+ ss_tot = np.sum((log_y - np.mean(log_y)) ** 2)
96
+ self.r_squared = 1 - (ss_res / ss_tot) if ss_tot != 0 else 0.0
97
+ else:
98
+ # Standard R² in linear space
99
+ ss_res = np.sum((y - y_pred) ** 2)
100
+ ss_tot = np.sum((y - np.mean(y)) ** 2)
101
+ self.r_squared = 1 - (ss_res / ss_tot) if ss_tot != 0 else 0.0
92
102
 
93
- # RMSE calculation
103
+ # RMSE calculation (always in linear space)
94
104
  self.rmse = np.sqrt(np.mean((y - y_pred) ** 2))
95
105
 
96
106
  def _prepare_data(self, x: ArrayLike, y: ArrayLike) -> Tuple[np.ndarray, np.ndarray]:
@@ -3139,6 +3139,88 @@ class Crossplot:
3139
3139
  self._regressions[reg_type] = {}
3140
3140
  self._regressions[reg_type][identifier] = regression_obj
3141
3141
 
3142
+ def _get_group_colors(self, data: pd.DataFrame, group_column: str) -> dict:
3143
+ """Get the color assigned to each group in the plot.
3144
+
3145
+ Args:
3146
+ data: DataFrame with plotting data
3147
+ group_column: Column name used for grouping
3148
+
3149
+ Returns:
3150
+ Dictionary mapping group names to their colors
3151
+ """
3152
+ group_colors = {}
3153
+
3154
+ # Get unique groups in the same order as they'll appear in the plot
3155
+ groups = data.groupby(group_column)
3156
+
3157
+ for idx, (group_name, _) in enumerate(groups):
3158
+ # Use the same color assignment logic as _plot_by_groups
3159
+ group_colors[group_name] = DEFAULT_COLORS[idx % len(DEFAULT_COLORS)]
3160
+
3161
+ return group_colors
3162
+
3163
+ def _find_best_legend_locations(self, data: pd.DataFrame) -> tuple[str, str]:
3164
+ """Find the two best locations for legends based on data density.
3165
+
3166
+ Divides the plot into a 3x3 grid and finds the two squares with the least data points.
3167
+
3168
+ Args:
3169
+ data: DataFrame with 'x' and 'y' columns
3170
+
3171
+ Returns:
3172
+ Tuple of (primary_location, secondary_location) as matplotlib location strings
3173
+ """
3174
+ # Get x and y bounds
3175
+ x_vals = data['x'].values
3176
+ y_vals = data['y'].values
3177
+
3178
+ # Handle log scales for binning
3179
+ if self.x_log:
3180
+ x_vals = np.log10(x_vals[x_vals > 0])
3181
+ if self.y_log:
3182
+ y_vals = np.log10(y_vals[y_vals > 0])
3183
+
3184
+ x_min, x_max = np.nanmin(x_vals), np.nanmax(x_vals)
3185
+ y_min, y_max = np.nanmin(y_vals), np.nanmax(y_vals)
3186
+
3187
+ # Create 3x3 grid and count points in each square
3188
+ x_bins = np.linspace(x_min, x_max, 4)
3189
+ y_bins = np.linspace(y_min, y_max, 4)
3190
+
3191
+ # Count points in each of 9 squares
3192
+ counts = {}
3193
+ for i in range(3):
3194
+ for j in range(3):
3195
+ x_mask = (x_vals >= x_bins[i]) & (x_vals < x_bins[i+1])
3196
+ y_mask = (y_vals >= y_bins[j]) & (y_vals < y_bins[j+1])
3197
+ counts[(i, j)] = np.sum(x_mask & y_mask)
3198
+
3199
+ # Map grid positions to matplotlib location strings
3200
+ # Grid: (0,2) (1,2) (2,2) -> upper left, upper center, upper right
3201
+ # (0,1) (1,1) (2,1) -> center left, center, center right
3202
+ # (0,0) (1,0) (2,0) -> lower left, lower center, lower right
3203
+ position_map = {
3204
+ (0, 2): 'upper left',
3205
+ (1, 2): 'upper center',
3206
+ (2, 2): 'upper right',
3207
+ (0, 1): 'center left',
3208
+ (1, 1): 'center',
3209
+ (2, 1): 'center right',
3210
+ (0, 0): 'lower left',
3211
+ (1, 0): 'lower center',
3212
+ (2, 0): 'lower right',
3213
+ }
3214
+
3215
+ # Sort squares by count (ascending)
3216
+ sorted_squares = sorted(counts.items(), key=lambda x: x[1])
3217
+
3218
+ # Get two best locations
3219
+ best_pos = position_map[sorted_squares[0][0]]
3220
+ second_best_pos = position_map[sorted_squares[1][0]]
3221
+
3222
+ return best_pos, second_best_pos
3223
+
3142
3224
  def _format_regression_label(self, name: str, reg, include_equation: bool = None, include_r2: bool = None) -> str:
3143
3225
  """Format a modern, compact regression label.
3144
3226
 
@@ -3156,26 +3238,25 @@ class Crossplot:
3156
3238
  if include_r2 is None:
3157
3239
  include_r2 = self.show_regression_r2
3158
3240
 
3159
- # Start with name
3160
- parts = [name]
3161
-
3162
- # Add equation and R² on same line if both shown, more compact
3163
- metrics = []
3241
+ # Format: "Name (equation)" with R² on second line
3242
+ # Equation and R² will be colored grey in the legend update method
3243
+ first_line = name
3164
3244
  if include_equation:
3165
3245
  eq = reg.equation()
3166
- # Shorten equation format for compactness
3167
- eq = eq.replace(' ', '') # Remove spaces
3168
- metrics.append(eq)
3246
+ eq = eq.replace(' ', '') # Remove spaces for compactness
3247
+ # Add equation in parentheses (will be styled grey later)
3248
+ first_line = f"{name} ({eq})"
3169
3249
 
3250
+ # Add R² on second line if requested (will be styled grey later)
3170
3251
  if include_r2:
3171
- # Use superscript 2 for
3172
- metrics.append(f"R²={reg.r_squared:.3f}")
3173
-
3174
- if metrics:
3175
- # Join equation and with pipe separator for clarity
3176
- parts.append(" | ".join(metrics))
3177
-
3178
- return "\n".join(parts)
3252
+ # Format with appropriate note
3253
+ if self.y_log:
3254
+ r2_label = f"R² = {reg.r_squared:.3f} (log)"
3255
+ else:
3256
+ r2_label = f"= {reg.r_squared:.3f}"
3257
+ return f"{first_line}\n{r2_label}"
3258
+ else:
3259
+ return first_line
3179
3260
 
3180
3261
  def _update_regression_legend(self) -> None:
3181
3262
  """Create or update the separate regression legend with smart placement."""
@@ -3199,61 +3280,43 @@ class Crossplot:
3199
3280
  regression_labels.append(line.get_label())
3200
3281
 
3201
3282
  if regression_handles:
3202
- # Smart placement: try these locations in priority order
3203
- # Prefer corners away from main legend and colorbar
3204
- locations = [
3205
- 'lower right', # Primary choice
3206
- 'upper right', # If lower right conflicts with data
3207
- 'lower left', # If right side has colorbar/main legend
3208
- 'center right', # Fallback
3209
- ]
3210
-
3211
- # If main legend is shown, it's likely in upper left
3212
- # If colorbar is shown, it's on the right side
3213
- # Adjust preferences based on what's visible
3214
- if self.show_legend and self.show_colorbar:
3215
- # Both legend and colorbar present - lower left might be better
3216
- locations = ['lower left', 'lower right', 'upper right', 'center left']
3217
- elif self.show_colorbar:
3218
- # Colorbar on right - prefer left side
3219
- locations = ['lower left', 'upper left', 'lower right', 'center left']
3220
-
3221
- # Try to create legend with best location
3222
- # Use 'best' as fallback - matplotlib will find optimal position
3223
- try:
3224
- self.regression_legend = self.ax.legend(
3225
- regression_handles,
3226
- regression_labels,
3227
- loc=locations[0], # Try first preference
3228
- frameon=True,
3229
- framealpha=0.95,
3230
- edgecolor='#cccccc',
3231
- fancybox=False,
3232
- shadow=False,
3233
- fontsize=9,
3234
- title='Regressions',
3235
- title_fontsize=10
3236
- )
3237
- except Exception:
3238
- # Fallback to 'best' if specific location fails
3239
- self.regression_legend = self.ax.legend(
3240
- regression_handles,
3241
- regression_labels,
3242
- loc='best',
3243
- frameon=True,
3244
- framealpha=0.95,
3245
- edgecolor='#cccccc',
3246
- fancybox=False,
3247
- shadow=False,
3248
- fontsize=9,
3249
- title='Regressions',
3250
- title_fontsize=10
3251
- )
3283
+ # Get smart placement based on data density
3284
+ if self._data is not None:
3285
+ _, secondary_loc = self._find_best_legend_locations(self._data)
3286
+ else:
3287
+ # Fallback if data not available
3288
+ secondary_loc = 'lower right'
3289
+
3290
+ # Import legend from matplotlib
3291
+ from matplotlib.legend import Legend
3292
+
3293
+ # Create regression legend at secondary location
3294
+ self.regression_legend = Legend(
3295
+ self.ax,
3296
+ regression_handles,
3297
+ regression_labels,
3298
+ loc=secondary_loc,
3299
+ frameon=True,
3300
+ framealpha=0.95,
3301
+ edgecolor='#cccccc',
3302
+ fancybox=False,
3303
+ shadow=False,
3304
+ fontsize=9,
3305
+ title='Regressions',
3306
+ title_fontsize=10
3307
+ )
3252
3308
 
3253
- # Modern styling
3309
+ # Modern styling with grey text for equation and R²
3254
3310
  self.regression_legend.get_frame().set_linewidth(0.8)
3255
3311
  self.regression_legend.get_title().set_fontweight('600')
3256
3312
 
3313
+ # Set text color to grey for all labels
3314
+ for text in self.regression_legend.get_texts():
3315
+ text.set_color('#555555')
3316
+
3317
+ # Add as artist to avoid replacing the primary legend
3318
+ self.ax.add_artist(self.regression_legend)
3319
+
3257
3320
  def _add_automatic_regressions(self, data: pd.DataFrame) -> None:
3258
3321
  """Add automatic regressions based on initialization parameters."""
3259
3322
  if not any([self.regression, self.regression_by_color, self.regression_by_group]):
@@ -3334,15 +3397,19 @@ class Crossplot:
3334
3397
  f"Reduce the number of groups or use a different regression strategy."
3335
3398
  )
3336
3399
 
3400
+ # Get the actual colors used for each group in the plot
3401
+ group_colors_map = self._get_group_colors(data, group_column)
3402
+
3337
3403
  for idx, (group_name, group_data) in enumerate(color_groups):
3338
3404
  x_vals = group_data['x'].values
3339
3405
  y_vals = group_data['y'].values
3340
3406
  mask = np.isfinite(x_vals) & np.isfinite(y_vals)
3341
3407
  if np.sum(mask) >= 2:
3342
- # Copy config and set default line color if not specified
3408
+ # Copy config and use the same color as the data group
3343
3409
  group_config = config.copy()
3344
3410
  if 'line_color' not in group_config:
3345
- group_config['line_color'] = regression_colors[color_idx % len(regression_colors)]
3411
+ # Use the same color as the data points for this group
3412
+ group_config['line_color'] = group_colors_map.get(group_name, regression_colors[color_idx % len(regression_colors)])
3346
3413
 
3347
3414
  # Skip legend update for all but last regression
3348
3415
  is_last = (idx == n_groups - 1)
@@ -3375,15 +3442,19 @@ class Crossplot:
3375
3442
  f"Reduce the number of groups or use a different regression strategy."
3376
3443
  )
3377
3444
 
3445
+ # Get the actual colors used for each group in the plot
3446
+ group_colors_map = self._get_group_colors(data, group_col)
3447
+
3378
3448
  for idx, (group_name, group_data) in enumerate(groups):
3379
3449
  x_vals = group_data['x'].values
3380
3450
  y_vals = group_data['y'].values
3381
3451
  mask = np.isfinite(x_vals) & np.isfinite(y_vals)
3382
3452
  if np.sum(mask) >= 2:
3383
- # Copy config and set default line color if not specified
3453
+ # Copy config and use the same color as the data group
3384
3454
  group_config = config.copy()
3385
3455
  if 'line_color' not in group_config:
3386
- group_config['line_color'] = regression_colors[color_idx % len(regression_colors)]
3456
+ # Use the same color as the data points for this group
3457
+ group_config['line_color'] = group_colors_map.get(group_name, regression_colors[color_idx % len(regression_colors)])
3387
3458
 
3388
3459
  # Skip legend update for all but last regression
3389
3460
  is_last = (idx == n_groups - 1)
@@ -3430,6 +3501,11 @@ class Crossplot:
3430
3501
  warnings.warn(f"Failed to fit {regression_type} regression for {name}: {e}")
3431
3502
  return
3432
3503
 
3504
+ # Recalculate R² in log space if y-axis is log scale
3505
+ if self.y_log:
3506
+ y_pred = reg.predict(x_vals)
3507
+ reg._calculate_metrics(x_vals, y_vals, y_pred, use_log_space=True)
3508
+
3433
3509
  # Store regression in nested structure
3434
3510
  self._store_regression(regression_type, name, reg)
3435
3511
 
@@ -3672,17 +3748,26 @@ class Crossplot:
3672
3748
  if first_scatter is None and self.color:
3673
3749
  first_scatter = scatter
3674
3750
 
3675
- # Add legend
3751
+ # Add legend with smart placement
3676
3752
  if self.show_legend:
3753
+ # Get best location based on data density
3754
+ if self._data is not None:
3755
+ primary_loc, _ = self._find_best_legend_locations(self._data)
3756
+ else:
3757
+ primary_loc = 'best'
3758
+
3677
3759
  legend = self.ax.legend(
3678
3760
  title=group_label,
3679
- loc='best',
3761
+ loc=primary_loc,
3680
3762
  frameon=True,
3681
3763
  framealpha=0.9,
3682
3764
  edgecolor='black'
3683
3765
  )
3684
3766
  legend.get_title().set_fontweight('bold')
3685
3767
 
3768
+ # Store the primary legend so it persists when regression legend is added
3769
+ self.ax.add_artist(legend)
3770
+
3686
3771
  # Add colorbar if using color mapping
3687
3772
  if self.color and self.show_colorbar and first_scatter:
3688
3773
  self.colorbar = self.fig.colorbar(first_scatter, ax=self.ax)
@@ -3764,6 +3849,11 @@ class Crossplot:
3764
3849
  except ValueError as e:
3765
3850
  raise ValueError(f"Failed to fit {regression_type} regression: {e}")
3766
3851
 
3852
+ # Recalculate R² in log space if y-axis is log scale
3853
+ if self.y_log:
3854
+ y_pred = reg.predict(x_clean)
3855
+ reg._calculate_metrics(x_clean, y_clean, y_pred, use_log_space=True)
3856
+
3767
3857
  # Store regression in nested structure
3768
3858
  reg_name = name if name else regression_type
3769
3859
  self._store_regression(regression_type, reg_name, reg)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: well-log-toolkit
3
- Version: 0.1.117
3
+ Version: 0.1.119
4
4
  Summary: Fast LAS file processing with lazy loading and filtering for well log analysis
5
5
  Author-email: Kristian dF Kollsgård <kkollsg@gmail.com>
6
6
  License: MIT