well-log-toolkit 0.1.118__tar.gz → 0.1.119__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (20) hide show
  1. {well_log_toolkit-0.1.118 → well_log_toolkit-0.1.119}/PKG-INFO +1 -1
  2. {well_log_toolkit-0.1.118 → well_log_toolkit-0.1.119}/pyproject.toml +1 -1
  3. {well_log_toolkit-0.1.118 → well_log_toolkit-0.1.119}/well_log_toolkit/regression.py +16 -6
  4. {well_log_toolkit-0.1.118 → well_log_toolkit-0.1.119}/well_log_toolkit/visualization.py +49 -5
  5. {well_log_toolkit-0.1.118 → well_log_toolkit-0.1.119}/well_log_toolkit.egg-info/PKG-INFO +1 -1
  6. {well_log_toolkit-0.1.118 → well_log_toolkit-0.1.119}/README.md +0 -0
  7. {well_log_toolkit-0.1.118 → well_log_toolkit-0.1.119}/setup.cfg +0 -0
  8. {well_log_toolkit-0.1.118 → well_log_toolkit-0.1.119}/well_log_toolkit/__init__.py +0 -0
  9. {well_log_toolkit-0.1.118 → well_log_toolkit-0.1.119}/well_log_toolkit/exceptions.py +0 -0
  10. {well_log_toolkit-0.1.118 → well_log_toolkit-0.1.119}/well_log_toolkit/las_file.py +0 -0
  11. {well_log_toolkit-0.1.118 → well_log_toolkit-0.1.119}/well_log_toolkit/manager.py +0 -0
  12. {well_log_toolkit-0.1.118 → well_log_toolkit-0.1.119}/well_log_toolkit/operations.py +0 -0
  13. {well_log_toolkit-0.1.118 → well_log_toolkit-0.1.119}/well_log_toolkit/property.py +0 -0
  14. {well_log_toolkit-0.1.118 → well_log_toolkit-0.1.119}/well_log_toolkit/statistics.py +0 -0
  15. {well_log_toolkit-0.1.118 → well_log_toolkit-0.1.119}/well_log_toolkit/utils.py +0 -0
  16. {well_log_toolkit-0.1.118 → well_log_toolkit-0.1.119}/well_log_toolkit/well.py +0 -0
  17. {well_log_toolkit-0.1.118 → well_log_toolkit-0.1.119}/well_log_toolkit.egg-info/SOURCES.txt +0 -0
  18. {well_log_toolkit-0.1.118 → well_log_toolkit-0.1.119}/well_log_toolkit.egg-info/dependency_links.txt +0 -0
  19. {well_log_toolkit-0.1.118 → well_log_toolkit-0.1.119}/well_log_toolkit.egg-info/requires.txt +0 -0
  20. {well_log_toolkit-0.1.118 → well_log_toolkit-0.1.119}/well_log_toolkit.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: well-log-toolkit
3
- Version: 0.1.118
3
+ Version: 0.1.119
4
4
  Summary: Fast LAS file processing with lazy loading and filtering for well log analysis
5
5
  Author-email: Kristian dF Kollsgård <kkollsg@gmail.com>
6
6
  License: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "well-log-toolkit"
7
- version = "0.1.118"
7
+ version = "0.1.119"
8
8
  description = "Fast LAS file processing with lazy loading and filtering for well log analysis"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -70,13 +70,14 @@ class RegressionBase(ABC):
70
70
  """
71
71
  return self.predict(x)
72
72
 
73
- def _calculate_metrics(self, x: np.ndarray, y: np.ndarray, y_pred: np.ndarray) -> None:
73
+ def _calculate_metrics(self, x: np.ndarray, y: np.ndarray, y_pred: np.ndarray, use_log_space: bool = False) -> None:
74
74
  """Calculate R² and RMSE metrics.
75
75
 
76
76
  Args:
77
77
  x: Independent variable values
78
78
  y: Actual dependent variable values
79
79
  y_pred: Predicted dependent variable values
80
+ use_log_space: If True, calculate R² in log space (useful when y spans orders of magnitude)
80
81
  """
81
82
  # Store original data
82
83
  self.x_data = x
@@ -85,12 +86,21 @@ class RegressionBase(ABC):
85
86
  # Store x-axis range
86
87
  self.x_range = (float(np.min(x)), float(np.max(x)))
87
88
 
88
- # R² calculation
89
- ss_res = np.sum((y - y_pred) ** 2)
90
- ss_tot = np.sum((y - np.mean(y)) ** 2)
91
- self.r_squared = 1 - (ss_res / ss_tot) if ss_tot != 0 else 0.0
89
+ # R² calculation - in log space if requested
90
+ if use_log_space and np.all(y > 0) and np.all(y_pred > 0):
91
+ # Calculate in log space for data spanning orders of magnitude
92
+ log_y = np.log10(y)
93
+ log_y_pred = np.log10(y_pred)
94
+ ss_res = np.sum((log_y - log_y_pred) ** 2)
95
+ ss_tot = np.sum((log_y - np.mean(log_y)) ** 2)
96
+ self.r_squared = 1 - (ss_res / ss_tot) if ss_tot != 0 else 0.0
97
+ else:
98
+ # Standard R² in linear space
99
+ ss_res = np.sum((y - y_pred) ** 2)
100
+ ss_tot = np.sum((y - np.mean(y)) ** 2)
101
+ self.r_squared = 1 - (ss_res / ss_tot) if ss_tot != 0 else 0.0
92
102
 
93
- # RMSE calculation
103
+ # RMSE calculation (always in linear space)
94
104
  self.rmse = np.sqrt(np.mean((y - y_pred) ** 2))
95
105
 
96
106
  def _prepare_data(self, x: ArrayLike, y: ArrayLike) -> Tuple[np.ndarray, np.ndarray]:
@@ -3139,6 +3139,27 @@ class Crossplot:
3139
3139
  self._regressions[reg_type] = {}
3140
3140
  self._regressions[reg_type][identifier] = regression_obj
3141
3141
 
3142
+ def _get_group_colors(self, data: pd.DataFrame, group_column: str) -> dict:
3143
+ """Get the color assigned to each group in the plot.
3144
+
3145
+ Args:
3146
+ data: DataFrame with plotting data
3147
+ group_column: Column name used for grouping
3148
+
3149
+ Returns:
3150
+ Dictionary mapping group names to their colors
3151
+ """
3152
+ group_colors = {}
3153
+
3154
+ # Get unique groups in the same order as they'll appear in the plot
3155
+ groups = data.groupby(group_column)
3156
+
3157
+ for idx, (group_name, _) in enumerate(groups):
3158
+ # Use the same color assignment logic as _plot_by_groups
3159
+ group_colors[group_name] = DEFAULT_COLORS[idx % len(DEFAULT_COLORS)]
3160
+
3161
+ return group_colors
3162
+
3142
3163
  def _find_best_legend_locations(self, data: pd.DataFrame) -> tuple[str, str]:
3143
3164
  """Find the two best locations for legends based on data density.
3144
3165
 
@@ -3228,7 +3249,12 @@ class Crossplot:
3228
3249
 
3229
3250
  # Add R² on second line if requested (will be styled grey later)
3230
3251
  if include_r2:
3231
- return f"{first_line}\nR² = {reg.r_squared:.3f}"
3252
+ # Format R² with appropriate note
3253
+ if self.y_log:
3254
+ r2_label = f"R² = {reg.r_squared:.3f} (log)"
3255
+ else:
3256
+ r2_label = f"R² = {reg.r_squared:.3f}"
3257
+ return f"{first_line}\n{r2_label}"
3232
3258
  else:
3233
3259
  return first_line
3234
3260
 
@@ -3371,15 +3397,19 @@ class Crossplot:
3371
3397
  f"Reduce the number of groups or use a different regression strategy."
3372
3398
  )
3373
3399
 
3400
+ # Get the actual colors used for each group in the plot
3401
+ group_colors_map = self._get_group_colors(data, group_column)
3402
+
3374
3403
  for idx, (group_name, group_data) in enumerate(color_groups):
3375
3404
  x_vals = group_data['x'].values
3376
3405
  y_vals = group_data['y'].values
3377
3406
  mask = np.isfinite(x_vals) & np.isfinite(y_vals)
3378
3407
  if np.sum(mask) >= 2:
3379
- # Copy config and set default line color if not specified
3408
+ # Copy config and use the same color as the data group
3380
3409
  group_config = config.copy()
3381
3410
  if 'line_color' not in group_config:
3382
- group_config['line_color'] = regression_colors[color_idx % len(regression_colors)]
3411
+ # Use the same color as the data points for this group
3412
+ group_config['line_color'] = group_colors_map.get(group_name, regression_colors[color_idx % len(regression_colors)])
3383
3413
 
3384
3414
  # Skip legend update for all but last regression
3385
3415
  is_last = (idx == n_groups - 1)
@@ -3412,15 +3442,19 @@ class Crossplot:
3412
3442
  f"Reduce the number of groups or use a different regression strategy."
3413
3443
  )
3414
3444
 
3445
+ # Get the actual colors used for each group in the plot
3446
+ group_colors_map = self._get_group_colors(data, group_col)
3447
+
3415
3448
  for idx, (group_name, group_data) in enumerate(groups):
3416
3449
  x_vals = group_data['x'].values
3417
3450
  y_vals = group_data['y'].values
3418
3451
  mask = np.isfinite(x_vals) & np.isfinite(y_vals)
3419
3452
  if np.sum(mask) >= 2:
3420
- # Copy config and set default line color if not specified
3453
+ # Copy config and use the same color as the data group
3421
3454
  group_config = config.copy()
3422
3455
  if 'line_color' not in group_config:
3423
- group_config['line_color'] = regression_colors[color_idx % len(regression_colors)]
3456
+ # Use the same color as the data points for this group
3457
+ group_config['line_color'] = group_colors_map.get(group_name, regression_colors[color_idx % len(regression_colors)])
3424
3458
 
3425
3459
  # Skip legend update for all but last regression
3426
3460
  is_last = (idx == n_groups - 1)
@@ -3467,6 +3501,11 @@ class Crossplot:
3467
3501
  warnings.warn(f"Failed to fit {regression_type} regression for {name}: {e}")
3468
3502
  return
3469
3503
 
3504
+ # Recalculate R² in log space if y-axis is log scale
3505
+ if self.y_log:
3506
+ y_pred = reg.predict(x_vals)
3507
+ reg._calculate_metrics(x_vals, y_vals, y_pred, use_log_space=True)
3508
+
3470
3509
  # Store regression in nested structure
3471
3510
  self._store_regression(regression_type, name, reg)
3472
3511
 
@@ -3810,6 +3849,11 @@ class Crossplot:
3810
3849
  except ValueError as e:
3811
3850
  raise ValueError(f"Failed to fit {regression_type} regression: {e}")
3812
3851
 
3852
+ # Recalculate R² in log space if y-axis is log scale
3853
+ if self.y_log:
3854
+ y_pred = reg.predict(x_clean)
3855
+ reg._calculate_metrics(x_clean, y_clean, y_pred, use_log_space=True)
3856
+
3813
3857
  # Store regression in nested structure
3814
3858
  reg_name = name if name else regression_type
3815
3859
  self._store_regression(regression_type, reg_name, reg)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: well-log-toolkit
3
- Version: 0.1.118
3
+ Version: 0.1.119
4
4
  Summary: Fast LAS file processing with lazy loading and filtering for well log analysis
5
5
  Author-email: Kristian dF Kollsgård <kkollsg@gmail.com>
6
6
  License: MIT