well-log-toolkit 0.1.131__py3-none-any.whl → 0.1.133__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -642,7 +642,7 @@ class Property(PropertyOperationsMixin):
642
642
 
643
643
  def resample(self, target_depth: Union[np.ndarray, 'Property']) -> 'Property':
644
644
  """
645
- Resample property to a new depth grid using linear interpolation.
645
+ Resample property to a new depth grid using appropriate interpolation.
646
646
 
647
647
  This method creates a new Property object with values interpolated to match
648
648
  the target depth grid. This is required when combining properties with
@@ -663,7 +663,10 @@ class Property(PropertyOperationsMixin):
663
663
  Notes
664
664
  -----
665
665
  - Uses linear interpolation for continuous data
666
- - Uses nearest-neighbor interpolation for discrete data
666
+ - Uses forward-fill (previous) for discrete data - geological zones extend
667
+ from their top/boundary until the next boundary is encountered. For example,
668
+ "Cerisa West top" at 2929.93m remains active until "Cerisa West SST 1 top"
669
+ at 2955.10m is intercepted.
667
670
  - Values outside the original depth range are set to NaN
668
671
  - NaN values in original data are excluded from interpolation
669
672
 
@@ -728,7 +731,11 @@ class Property(PropertyOperationsMixin):
728
731
 
729
732
  # Choose interpolation method based on type
730
733
  if self.type == 'discrete':
731
- kind = 'nearest'
734
+ # Use 'previous' (forward-fill) for discrete properties
735
+ # This ensures geological zones extend from their top/boundary
736
+ # until the next top is encountered (e.g., "Cerisa West top" at 2929.93
737
+ # remains active until "Cerisa West SST 1 top" at 2955.10)
738
+ kind = 'previous'
732
739
  else:
733
740
  kind = 'linear'
734
741
 
@@ -2853,6 +2853,16 @@ class Crossplot:
2853
2853
  Regression type to apply separately for each group (well or shape). Creates
2854
2854
  separate regression lines for each well or shape category. Accepts string or dict.
2855
2855
  Default: None
2856
+ regression_by_color_and_shape : str or dict, optional
2857
+ Regression type to apply separately for each combination of color AND shape groups.
2858
+ Creates separate regression lines for each (color, shape) combination. This is useful
2859
+ for analyzing how the relationship changes across both dimensions simultaneously
2860
+ (e.g., each well in each formation, each layer in each zone). Accepts string or dict.
2861
+ Default: None
2862
+ regression_by_shape_and_color : str or dict, optional
2863
+ Alias for regression_by_color_and_shape. Provided for convenience - both parameters
2864
+ do exactly the same thing. Use whichever order feels more natural.
2865
+ Default: None
2856
2866
 
2857
2867
  Examples
2858
2868
  --------
@@ -2970,6 +2980,8 @@ class Crossplot:
2970
2980
  regression: Optional[Union[str, dict]] = None,
2971
2981
  regression_by_color: Optional[Union[str, dict]] = None,
2972
2982
  regression_by_group: Optional[Union[str, dict]] = None,
2983
+ regression_by_color_and_shape: Optional[Union[str, dict]] = None,
2984
+ regression_by_shape_and_color: Optional[Union[str, dict]] = None,
2973
2985
  ):
2974
2986
  # Store wells as list
2975
2987
  if not isinstance(wells, list):
@@ -3055,6 +3067,19 @@ class Crossplot:
3055
3067
  self.regression_by_color = regression_by_color
3056
3068
  self.regression_by_group = regression_by_group
3057
3069
 
3070
+ # Handle regression_by_shape_and_color as alias for regression_by_color_and_shape
3071
+ if regression_by_shape_and_color is not None and regression_by_color_and_shape is not None:
3072
+ warnings.warn(
3073
+ "Both regression_by_color_and_shape and regression_by_shape_and_color were specified. "
3074
+ "These are aliases for the same feature. Using regression_by_color_and_shape."
3075
+ )
3076
+ self.regression_by_color_and_shape = regression_by_color_and_shape
3077
+ elif regression_by_shape_and_color is not None:
3078
+ # Use the alias
3079
+ self.regression_by_color_and_shape = regression_by_shape_and_color
3080
+ else:
3081
+ self.regression_by_color_and_shape = regression_by_color_and_shape
3082
+
3058
3083
  # Plot objects
3059
3084
  self.fig = None
3060
3085
  self.ax = None
@@ -3072,7 +3097,15 @@ class Crossplot:
3072
3097
  # Data cache
3073
3098
  self._data = None
3074
3099
 
3075
- def add_layer(self, x: str, y: str, label: str):
3100
+ # Discrete property labels storage
3101
+ # Maps property role ('shape', 'color', 'size') to labels dict {0: 'label0', 1: 'label1', ...}
3102
+ self._discrete_labels = {}
3103
+
3104
+ # Legend placement tracking
3105
+ # Maps segment numbers (1-9) to legend type placed there
3106
+ self._occupied_segments = {}
3107
+
3108
+ def add_layer(self, x: str, y: str, label: str) -> 'Crossplot':
3076
3109
  """
3077
3110
  Add a new data layer to the crossplot.
3078
3111
 
@@ -3136,6 +3169,32 @@ class Crossplot:
3136
3169
  # Only do expensive allclose if needed
3137
3170
  return not np.allclose(prop_depth, ref_depth)
3138
3171
 
3172
+ # Helper function to align property values to target depth grid
3173
+ def align_property(prop, target_depth):
3174
+ """
3175
+ Align property values to target depth grid.
3176
+
3177
+ Uses appropriate interpolation based on property type:
3178
+ - Discrete properties: forward-fill/previous (geological zones extend from
3179
+ their top/boundary until the next boundary is encountered)
3180
+ - Continuous properties: linear interpolation
3181
+
3182
+ Args:
3183
+ prop: Property object to align
3184
+ target_depth: Target depth array
3185
+
3186
+ Returns:
3187
+ Aligned values array
3188
+ """
3189
+ if prop.type == 'discrete':
3190
+ # Use Property's resample method which handles discrete properties correctly
3191
+ # (forward-fill to preserve integer codes and geological zone logic)
3192
+ resampled = prop.resample(target_depth)
3193
+ return resampled.values
3194
+ else:
3195
+ # For continuous properties, use linear interpolation
3196
+ return np.interp(target_depth, prop.depth, prop.values, left=np.nan, right=np.nan)
3197
+
3139
3198
  # Loop through each layer
3140
3199
  for layer in self._layers:
3141
3200
  layer_x = layer['x']
@@ -3153,9 +3212,9 @@ class Crossplot:
3153
3212
  x_values = x_prop.values
3154
3213
  y_values = y_prop.values
3155
3214
 
3156
- # Align y values to x depth grid if needed
3215
+ # Align y values to x depth grid if needed using appropriate method
3157
3216
  if needs_alignment(y_prop.depth, depths):
3158
- y_values = np.interp(depths, y_prop.depth, y_prop.values, left=np.nan, right=np.nan)
3217
+ y_values = align_property(y_prop, depths)
3159
3218
 
3160
3219
  # Create dataframe for this well and layer
3161
3220
  df = pd.DataFrame({
@@ -3176,10 +3235,14 @@ class Crossplot:
3176
3235
  elif self.color and self.color != "depth":
3177
3236
  try:
3178
3237
  color_prop = well.get_property(self.color)
3179
- color_values = color_prop.values
3180
- # Align to x depth grid
3238
+ # Store labels if discrete property (only once)
3239
+ if 'color' not in self._discrete_labels:
3240
+ self._store_discrete_labels(color_prop, 'color')
3241
+ # Align to x depth grid using appropriate method for property type
3181
3242
  if needs_alignment(color_prop.depth, depths):
3182
- color_values = np.interp(depths, color_prop.depth, color_prop.values, left=np.nan, right=np.nan)
3243
+ color_values = align_property(color_prop, depths)
3244
+ else:
3245
+ color_values = color_prop.values
3183
3246
  df['color_val'] = color_values
3184
3247
  except (AttributeError, KeyError, PropertyNotFoundError):
3185
3248
  # Silently use depth as fallback
@@ -3194,10 +3257,14 @@ class Crossplot:
3194
3257
  elif self.size:
3195
3258
  try:
3196
3259
  size_prop = well.get_property(self.size)
3197
- size_values = size_prop.values
3198
- # Align to x depth grid
3260
+ # Store labels if discrete property (only once)
3261
+ if 'size' not in self._discrete_labels:
3262
+ self._store_discrete_labels(size_prop, 'size')
3263
+ # Align to x depth grid using appropriate method for property type
3199
3264
  if needs_alignment(size_prop.depth, depths):
3200
- size_values = np.interp(depths, size_prop.depth, size_prop.values, left=np.nan, right=np.nan)
3265
+ size_values = align_property(size_prop, depths)
3266
+ else:
3267
+ size_values = size_prop.values
3201
3268
  df['size_val'] = size_values
3202
3269
  except (AttributeError, KeyError, PropertyNotFoundError):
3203
3270
  # Silently skip if size property not found
@@ -3210,10 +3277,14 @@ class Crossplot:
3210
3277
  elif self.shape and self.shape != "well":
3211
3278
  try:
3212
3279
  shape_prop = well.get_property(self.shape)
3213
- shape_values = shape_prop.values
3214
- # Align to x depth grid
3280
+ # Store labels if discrete property (only once)
3281
+ if 'shape' not in self._discrete_labels:
3282
+ self._store_discrete_labels(shape_prop, 'shape')
3283
+ # Align to x depth grid using appropriate method for property type
3215
3284
  if needs_alignment(shape_prop.depth, depths):
3216
- shape_values = np.interp(depths, shape_prop.depth, shape_prop.values, left=np.nan, right=np.nan)
3285
+ shape_values = align_property(shape_prop, depths)
3286
+ else:
3287
+ shape_values = shape_prop.values
3217
3288
  df['shape_val'] = shape_values
3218
3289
  except (AttributeError, KeyError, PropertyNotFoundError):
3219
3290
  # Silently skip if shape property not found
@@ -3377,6 +3448,316 @@ class Crossplot:
3377
3448
 
3378
3449
  return best_pos, second_best_pos
3379
3450
 
3451
+ def _find_optimal_legend_segment(
3452
+ self,
3453
+ data: pd.DataFrame,
3454
+ legend_type: str,
3455
+ is_large: bool = False
3456
+ ) -> tuple[int, str]:
3457
+ """Find optimal segment for legend placement using priority-based algorithm.
3458
+
3459
+ Segments are numbered 1-9:
3460
+ 1 2 3 (upper left, upper center, upper right)
3461
+ 4 5 6 (center left, center, center right)
3462
+ 7 8 9 (lower left, lower center, lower right)
3463
+
3464
+ Checks segments in priority order: 1,9,4,6,3,7,2,8,5
3465
+ A segment is eligible if:
3466
+ - It has <10% of datapoints
3467
+ - It doesn't have a previous legend, EXCEPT:
3468
+ - Shape and color legends can share a segment if neither is very large
3469
+
3470
+ Args:
3471
+ data: DataFrame with 'x' and 'y' columns
3472
+ legend_type: Type of legend ('shape', 'color', 'size', 'regression', etc.)
3473
+ is_large: Whether this legend is considered large (many items)
3474
+
3475
+ Returns:
3476
+ Tuple of (segment_number, matplotlib_location_string)
3477
+ """
3478
+ # Get x and y bounds
3479
+ x_vals = data['x'].values
3480
+ y_vals = data['y'].values
3481
+
3482
+ # Handle log scales for binning
3483
+ if self.x_log:
3484
+ x_vals = np.log10(x_vals[x_vals > 0])
3485
+ if self.y_log:
3486
+ y_vals = np.log10(y_vals[y_vals > 0])
3487
+
3488
+ x_min, x_max = np.nanmin(x_vals), np.nanmax(x_vals)
3489
+ y_min, y_max = np.nanmin(y_vals), np.nanmax(y_vals)
3490
+
3491
+ # Create 3x3 grid and count points in each square
3492
+ x_bins = np.linspace(x_min, x_max, 4)
3493
+ y_bins = np.linspace(y_min, y_max, 4)
3494
+
3495
+ # Map segments 1-9 to grid positions (i, j)
3496
+ # Segment numbering:
3497
+ # 1 2 3
3498
+ # 4 5 6
3499
+ # 7 8 9
3500
+ segment_to_grid = {
3501
+ 1: (0, 2), # upper left
3502
+ 2: (1, 2), # upper center
3503
+ 3: (2, 2), # upper right
3504
+ 4: (0, 1), # center left
3505
+ 5: (1, 1), # center
3506
+ 6: (2, 1), # center right
3507
+ 7: (0, 0), # lower left
3508
+ 8: (1, 0), # lower center
3509
+ 9: (2, 0), # lower right
3510
+ }
3511
+
3512
+ # Map segments to matplotlib location strings
3513
+ segment_to_location = {
3514
+ 1: 'upper left',
3515
+ 2: 'upper center',
3516
+ 3: 'upper right',
3517
+ 4: 'center left',
3518
+ 5: 'center',
3519
+ 6: 'center right',
3520
+ 7: 'lower left',
3521
+ 8: 'lower center',
3522
+ 9: 'lower right',
3523
+ }
3524
+
3525
+ # Count points in each segment
3526
+ total_points = len(x_vals)
3527
+ segment_counts = {}
3528
+ for segment, (i, j) in segment_to_grid.items():
3529
+ x_mask = (x_vals >= x_bins[i]) & (x_vals < x_bins[i+1])
3530
+ y_mask = (y_vals >= y_bins[j]) & (y_vals < y_bins[j+1])
3531
+ count = np.sum(x_mask & y_mask)
3532
+ segment_counts[segment] = count
3533
+
3534
+ # Priority order: 1,9,4,6,3,7,2,8,5
3535
+ priority_order = [1, 9, 4, 6, 3, 7, 2, 8, 5]
3536
+
3537
+ # Check each segment in priority order
3538
+ for segment in priority_order:
3539
+ # Check datapoint percentage
3540
+ if total_points > 0:
3541
+ percentage = segment_counts[segment] / total_points
3542
+ if percentage >= 0.10: # 10% threshold
3543
+ continue
3544
+
3545
+ # Check if segment is occupied
3546
+ if segment in self._occupied_segments:
3547
+ existing_type = self._occupied_segments[segment]
3548
+
3549
+ # Allow shape and color to share if neither is very large
3550
+ shareable = {'shape', 'color'}
3551
+ if (legend_type in shareable and existing_type in shareable
3552
+ and not is_large
3553
+ and not self._occupied_segments.get(f'{segment}_large', False)):
3554
+ # Can share this segment
3555
+ pass
3556
+ else:
3557
+ # Segment occupied, try next
3558
+ continue
3559
+
3560
+ # Found eligible segment
3561
+ return segment, segment_to_location[segment]
3562
+
3563
+ # If no eligible segment found, use fallback (segment 1)
3564
+ return 1, segment_to_location[1]
3565
+
3566
+ def _store_discrete_labels(self, prop, role: str) -> None:
3567
+ """
3568
+ Store labels from a discrete property for later use in legends.
3569
+
3570
+ Args:
3571
+ prop: Property object (must have type and labels attributes)
3572
+ role: Property role - 'shape', 'color', or 'size'
3573
+ """
3574
+ if hasattr(prop, 'type') and prop.type == 'discrete' and hasattr(prop, 'labels') and prop.labels:
3575
+ self._discrete_labels[role] = prop.labels.copy()
3576
+
3577
+ def _get_display_label(self, value, role: str) -> str:
3578
+ """
3579
+ Get display label for a value, using stored labels for discrete properties.
3580
+
3581
+ For discrete properties with labels, converts integer codes to readable names.
3582
+ For continuous properties or discrete without labels, returns string value.
3583
+
3584
+ Args:
3585
+ value: The value to get label for (could be int, float, or string)
3586
+ role: Property role - 'shape', 'color', or 'size'
3587
+
3588
+ Returns:
3589
+ Display label string
3590
+
3591
+ Examples:
3592
+ >>> # For discrete property with labels {0: 'Agat top', 1: 'Cerisa Main top'}
3593
+ >>> self._get_display_label(0.0, 'shape')
3594
+ 'Agat top'
3595
+
3596
+ >>> # For continuous property or no labels
3597
+ >>> self._get_display_label(2.5, 'color')
3598
+ '2.5'
3599
+ """
3600
+ if role in self._discrete_labels:
3601
+ # Try to convert to integer and look up label
3602
+ try:
3603
+ int_val = int(np.round(float(value)))
3604
+ return self._discrete_labels[role].get(int_val, str(value))
3605
+ except (ValueError, TypeError):
3606
+ return str(value)
3607
+ return str(value)
3608
+
3609
+ def _is_edge_location(self, location: str) -> bool:
3610
+ """Check if a legend location is on the left or right edge.
3611
+
3612
+ Args:
3613
+ location: Matplotlib location string
3614
+
3615
+ Returns:
3616
+ True if on left or right edge (for vertical stacking)
3617
+ """
3618
+ edge_locations = ['upper left', 'center left', 'lower left',
3619
+ 'upper right', 'center right', 'lower right']
3620
+ return location in edge_locations
3621
+
3622
+ def _create_grouped_legends(self,
3623
+ shape_handles, shape_title: str,
3624
+ color_handles, color_title: str,
3625
+ location: str) -> None:
3626
+ """Create grouped legends in the same region, stacked or side-by-side.
3627
+
3628
+ When both shape and color legends are needed, this groups them in the same
3629
+ 1/9th section without overlap. Stacks vertically on edges, side-by-side elsewhere.
3630
+
3631
+ Args:
3632
+ shape_handles: List of handles for shape legend
3633
+ shape_title: Title for shape legend
3634
+ color_handles: List of handles for color legend
3635
+ color_title: Title for color legend
3636
+ location: Matplotlib location string for positioning
3637
+ """
3638
+ is_edge = self._is_edge_location(location)
3639
+
3640
+ # Determine base anchor point from location string
3641
+ # Map location to (x, y) coordinates in figure space
3642
+ anchor_map = {
3643
+ 'upper left': (0.02, 0.98),
3644
+ 'upper center': (0.5, 0.98),
3645
+ 'upper right': (0.98, 0.98),
3646
+ 'center left': (0.02, 0.5),
3647
+ 'center': (0.5, 0.5),
3648
+ 'center right': (0.98, 0.5),
3649
+ 'lower left': (0.02, 0.02),
3650
+ 'lower center': (0.5, 0.02),
3651
+ 'lower right': (0.98, 0.02),
3652
+ }
3653
+
3654
+ base_x, base_y = anchor_map.get(location, (0.98, 0.98))
3655
+
3656
+ if is_edge:
3657
+ # Stack vertically on edges
3658
+ # Position shape legend at the top
3659
+ shape_legend = self.ax.legend(
3660
+ handles=shape_handles,
3661
+ title=shape_title,
3662
+ loc=location,
3663
+ frameon=True,
3664
+ framealpha=0.9,
3665
+ edgecolor='black',
3666
+ bbox_to_anchor=(base_x, base_y),
3667
+ bbox_transform=self.fig.transFigure
3668
+ )
3669
+ shape_legend.get_title().set_fontweight('bold')
3670
+ self.ax.add_artist(shape_legend)
3671
+
3672
+ # Calculate offset for color legend below shape legend
3673
+ # Estimate shape legend height and add spacing
3674
+ shape_height = len(shape_handles) * 0.025 + 0.05 # Rough estimate
3675
+
3676
+ # Adjust y position for color legend
3677
+ if 'upper' in location:
3678
+ color_y = base_y - shape_height - 0.02 # Stack below
3679
+ elif 'lower' in location:
3680
+ color_y = base_y + shape_height + 0.02 # Stack above
3681
+ else: # center
3682
+ color_y = base_y - shape_height / 2 - 0.01 # Stack below
3683
+
3684
+ color_legend = self.ax.legend(
3685
+ handles=color_handles,
3686
+ title=color_title,
3687
+ loc=location,
3688
+ frameon=True,
3689
+ framealpha=0.9,
3690
+ edgecolor='black',
3691
+ bbox_to_anchor=(base_x, color_y),
3692
+ bbox_transform=self.fig.transFigure
3693
+ )
3694
+ color_legend.get_title().set_fontweight('bold')
3695
+ else:
3696
+ # Place side by side for non-edge locations (top, bottom, center)
3697
+ # Estimate width of each legend
3698
+ legend_width = 0.15
3699
+
3700
+ if 'center' in location and location != 'center left' and location != 'center right':
3701
+ # For center positions, place them side by side
3702
+ shape_x = base_x - legend_width / 2 - 0.01
3703
+ color_x = base_x + legend_width / 2 + 0.01
3704
+
3705
+ shape_legend = self.ax.legend(
3706
+ handles=shape_handles,
3707
+ title=shape_title,
3708
+ loc='center',
3709
+ frameon=True,
3710
+ framealpha=0.9,
3711
+ edgecolor='black',
3712
+ bbox_to_anchor=(shape_x, base_y),
3713
+ bbox_transform=self.fig.transFigure
3714
+ )
3715
+ shape_legend.get_title().set_fontweight('bold')
3716
+ self.ax.add_artist(shape_legend)
3717
+
3718
+ color_legend = self.ax.legend(
3719
+ handles=color_handles,
3720
+ title=color_title,
3721
+ loc='center',
3722
+ frameon=True,
3723
+ framealpha=0.9,
3724
+ edgecolor='black',
3725
+ bbox_to_anchor=(color_x, base_y),
3726
+ bbox_transform=self.fig.transFigure
3727
+ )
3728
+ color_legend.get_title().set_fontweight('bold')
3729
+ else:
3730
+ # For other positions, fall back to stacking
3731
+ shape_legend = self.ax.legend(
3732
+ handles=shape_handles,
3733
+ title=shape_title,
3734
+ loc=location,
3735
+ frameon=True,
3736
+ framealpha=0.9,
3737
+ edgecolor='black'
3738
+ )
3739
+ shape_legend.get_title().set_fontweight('bold')
3740
+ self.ax.add_artist(shape_legend)
3741
+
3742
+ # Estimate offset
3743
+ shape_height = len(shape_handles) * 0.025 + 0.05
3744
+ if 'upper' in location:
3745
+ color_y = base_y - shape_height - 0.02
3746
+ else:
3747
+ color_y = base_y + shape_height + 0.02
3748
+
3749
+ color_legend = self.ax.legend(
3750
+ handles=color_handles,
3751
+ title=color_title,
3752
+ loc=location,
3753
+ frameon=True,
3754
+ framealpha=0.9,
3755
+ edgecolor='black',
3756
+ bbox_to_anchor=(base_x, color_y),
3757
+ bbox_transform=self.fig.transFigure
3758
+ )
3759
+ color_legend.get_title().set_fontweight('bold')
3760
+
3380
3761
  def _format_regression_label(self, name: str, reg, include_equation: bool = None, include_r2: bool = None) -> str:
3381
3762
  """Format a modern, compact regression label.
3382
3763
 
@@ -3436,9 +3817,18 @@ class Crossplot:
3436
3817
  regression_labels.append(line.get_label())
3437
3818
 
3438
3819
  if regression_handles:
3439
- # Get smart placement based on data density
3820
+ # Get smart placement based on data density using optimized segment algorithm
3440
3821
  if self._data is not None:
3441
- _, secondary_loc = self._find_best_legend_locations(self._data)
3822
+ # Determine if regression legend is large
3823
+ regression_is_large = len(regression_handles) > 5
3824
+ segment, secondary_loc = self._find_optimal_legend_segment(
3825
+ self._data,
3826
+ legend_type='regression',
3827
+ is_large=regression_is_large
3828
+ )
3829
+ # Mark segment as occupied
3830
+ self._occupied_segments[segment] = 'regression'
3831
+ self._occupied_segments[f'{segment}_large'] = regression_is_large
3442
3832
  else:
3443
3833
  # Fallback if data not available
3444
3834
  secondary_loc = 'lower right'
@@ -3475,7 +3865,7 @@ class Crossplot:
3475
3865
 
3476
3866
  def _add_automatic_regressions(self, data: pd.DataFrame) -> None:
3477
3867
  """Add automatic regressions based on initialization parameters."""
3478
- if not any([self.regression, self.regression_by_color, self.regression_by_group]):
3868
+ if not any([self.regression, self.regression_by_color, self.regression_by_group, self.regression_by_color_and_shape]):
3479
3869
  return
3480
3870
 
3481
3871
  total_points = len(data)
@@ -3534,12 +3924,54 @@ class Crossplot:
3534
3924
  )
3535
3925
  else:
3536
3926
  # Check if color is categorical (not continuous like depth)
3537
- if group_column == 'color_val' and (self.color == 'depth' or pd.api.types.is_numeric_dtype(data[group_column])):
3538
- # For continuous values, we can't create separate regressions
3539
- warnings.warn(
3540
- f"regression_by_color requires categorical color mapping, "
3541
- f"but '{self.color}' is continuous. Use regression_by_group instead."
3542
- )
3927
+ # Use _is_categorical_color() to properly handle discrete properties
3928
+ if group_column == 'color_val':
3929
+ is_categorical = self._is_categorical_color(data[group_column].values)
3930
+ if not is_categorical:
3931
+ # For continuous values, we can't create separate regressions
3932
+ warnings.warn(
3933
+ f"regression_by_color requires categorical color mapping, "
3934
+ f"but '{self.color}' is continuous. Use regression_by_group instead."
3935
+ )
3936
+ # Skip this section
3937
+ else:
3938
+ # Categorical values - group and create regressions
3939
+ color_groups = data.groupby(group_column)
3940
+ n_groups = len(color_groups)
3941
+
3942
+ # Validate regression count
3943
+ if regression_count + n_groups > total_points / 2:
3944
+ raise ValueError(
3945
+ f"Too many regression lines requested: {regression_count + n_groups} lines "
3946
+ f"for {total_points} data points (average < 2 points per line). "
3947
+ f"Reduce the number of groups or use a different regression strategy."
3948
+ )
3949
+
3950
+ # Get the actual colors used for each group in the plot
3951
+ group_colors_map = self._get_group_colors(data, group_column)
3952
+
3953
+ for idx, (group_name, group_data) in enumerate(color_groups):
3954
+ x_vals = group_data['x'].values
3955
+ y_vals = group_data['y'].values
3956
+ mask = np.isfinite(x_vals) & np.isfinite(y_vals)
3957
+ if np.sum(mask) >= 2:
3958
+ # Copy config and use the same color as the data group
3959
+ group_config = config.copy()
3960
+ if 'line_color' not in group_config:
3961
+ # Use the same color as the data points for this group
3962
+ group_config['line_color'] = group_colors_map.get(group_name, regression_colors[color_idx % len(regression_colors)])
3963
+
3964
+ # Skip legend update for all but last regression
3965
+ is_last = (idx == n_groups - 1)
3966
+ self._add_group_regression(
3967
+ x_vals[mask], y_vals[mask],
3968
+ reg_type,
3969
+ name=f"{group_label}={group_name}",
3970
+ config=group_config,
3971
+ update_legend=is_last
3972
+ )
3973
+ regression_count += 1
3974
+ color_idx += 1
3543
3975
  else:
3544
3976
  # Categorical values - group and create regressions
3545
3977
  color_groups = data.groupby(group_column)
@@ -3629,6 +4061,95 @@ class Crossplot:
3629
4061
  "Use shape='well' or set shape to a property name."
3630
4062
  )
3631
4063
 
4064
+ # Add regression by color AND shape combinations
4065
+ if self.regression_by_color_and_shape:
4066
+ config = self._parse_regression_config(self.regression_by_color_and_shape)
4067
+ reg_type = config['type']
4068
+
4069
+ # Determine color and shape columns
4070
+ color_col = None
4071
+ shape_col = None
4072
+ color_label = None
4073
+ shape_label = None
4074
+
4075
+ # Identify color column
4076
+ if self.color and 'color_val' in data.columns:
4077
+ # Check if categorical
4078
+ if self._is_categorical_color(data['color_val'].values):
4079
+ color_col = 'color_val'
4080
+ color_label = self.color
4081
+ elif self.shape == "well" and 'well' in data.columns:
4082
+ # When shape="well", wells provide colors
4083
+ color_col = 'well'
4084
+ color_label = 'well'
4085
+
4086
+ # Identify shape column
4087
+ if self.shape == "well" and 'well' in data.columns:
4088
+ shape_col = 'well'
4089
+ shape_label = 'well'
4090
+ elif self.shape and self.shape != "well" and 'shape_val' in data.columns:
4091
+ shape_col = 'shape_val'
4092
+ shape_label = self.shape
4093
+
4094
+ # Need both color and shape columns for this to work
4095
+ if color_col is None or shape_col is None:
4096
+ warnings.warn(
4097
+ "regression_by_color_and_shape requires both categorical color mapping AND shape/well grouping. "
4098
+ "Set both color and shape parameters, or use regression_by_color or regression_by_group instead."
4099
+ )
4100
+ elif color_col == shape_col:
4101
+ warnings.warn(
4102
+ "regression_by_color_and_shape requires DIFFERENT color and shape mappings. "
4103
+ "Currently both are mapped to the same property. Use regression_by_color or regression_by_group instead."
4104
+ )
4105
+ else:
4106
+ # Group by both color and shape
4107
+ combined_groups = data.groupby([color_col, shape_col])
4108
+ n_groups = len(combined_groups)
4109
+
4110
+ # Validate regression count
4111
+ if regression_count + n_groups > total_points / 2:
4112
+ raise ValueError(
4113
+ f"Too many regression lines requested: {regression_count + n_groups} lines "
4114
+ f"for {total_points} data points (average < 2 points per line). "
4115
+ f"Reduce the number of groups or use a simpler regression strategy."
4116
+ )
4117
+
4118
+ # Get color maps for both dimensions
4119
+ color_colors_map = self._get_group_colors(data, color_col)
4120
+ shape_colors_map = self._get_group_colors(data, shape_col)
4121
+
4122
+ for idx, ((color_val, shape_val), group_data) in enumerate(combined_groups):
4123
+ x_vals = group_data['x'].values
4124
+ y_vals = group_data['y'].values
4125
+ mask = np.isfinite(x_vals) & np.isfinite(y_vals)
4126
+ if np.sum(mask) >= 2:
4127
+ # Copy config and use appropriate color
4128
+ group_config = config.copy()
4129
+ if 'line_color' not in group_config:
4130
+ # Prefer color from color dimension, fallback to shape dimension
4131
+ group_config['line_color'] = color_colors_map.get(
4132
+ color_val,
4133
+ shape_colors_map.get(shape_val, regression_colors[color_idx % len(regression_colors)])
4134
+ )
4135
+
4136
+ # Create descriptive name with both dimensions
4137
+ color_display = self._get_display_label(color_val, 'color')
4138
+ shape_display = self._get_display_label(shape_val, 'shape')
4139
+ name = f"{color_label}={color_display}, {shape_label}={shape_display}"
4140
+
4141
+ # Skip legend update for all but last regression
4142
+ is_last = (idx == n_groups - 1)
4143
+ self._add_group_regression(
4144
+ x_vals[mask], y_vals[mask],
4145
+ reg_type,
4146
+ name=name,
4147
+ config=group_config,
4148
+ update_legend=is_last
4149
+ )
4150
+ regression_count += 1
4151
+ color_idx += 1
4152
+
3632
4153
  def _add_group_regression(
3633
4154
  self,
3634
4155
  x_vals: np.ndarray,
@@ -3724,6 +4245,9 @@ class Crossplot:
3724
4245
 
3725
4246
  def plot(self) -> 'Crossplot':
3726
4247
  """Generate the crossplot figure."""
4248
+ # Reset legend placement tracking for new plot
4249
+ self._occupied_segments = {}
4250
+
3727
4251
  # Prepare data
3728
4252
  data = self._prepare_data()
3729
4253
 
@@ -3898,13 +4422,29 @@ class Crossplot:
3898
4422
  # Create custom legend handles
3899
4423
  legend_elements = [Patch(facecolor=category_colors[cat],
3900
4424
  edgecolor=self.edge_color,
3901
- label=str(cat))
4425
+ label=self._get_display_label(cat, 'color'))
3902
4426
  for cat in unique_categories]
3903
4427
 
4428
+ # Determine if legend is large
4429
+ color_is_large = len(legend_elements) > 5
4430
+
4431
+ # Find optimal segment for color legend
4432
+ if self._data is not None:
4433
+ segment, location = self._find_optimal_legend_segment(
4434
+ self._data,
4435
+ legend_type='color',
4436
+ is_large=color_is_large
4437
+ )
4438
+ # Mark segment as occupied
4439
+ self._occupied_segments[segment] = 'color'
4440
+ self._occupied_segments[f'{segment}_large'] = color_is_large
4441
+ else:
4442
+ location = 'best'
4443
+
3904
4444
  colorbar_label = self.color if self.color != "depth" and self.color != "label" else "Category"
3905
4445
  legend = self.ax.legend(handles=legend_elements,
3906
4446
  title=colorbar_label,
3907
- loc='best',
4447
+ loc=location,
3908
4448
  frameon=True,
3909
4449
  framealpha=0.9,
3910
4450
  edgecolor='black')
@@ -4008,65 +4548,92 @@ class Crossplot:
4008
4548
  edgecolors=self.edge_color,
4009
4549
  linewidths=self.edge_width,
4010
4550
  marker=marker,
4011
- label=str(group_name)
4551
+ label=self._get_display_label(group_name, 'shape')
4012
4552
  )
4013
4553
 
4014
4554
  if first_scatter is None and self.color and not is_categorical:
4015
4555
  first_scatter = scatter
4016
4556
 
4017
- # Add legend with smart placement
4018
- if self.show_legend:
4019
- # Get best location based on data density
4557
+ # Check if we need both shape and color legends (grouped layout)
4558
+ need_shape_legend = self.show_legend
4559
+ need_color_legend = self.color and is_categorical and self.show_legend
4560
+
4561
+ if need_shape_legend and need_color_legend:
4562
+ # Create grouped legends in the same region using optimized placement
4563
+ # Prepare shape legend handles (from scatter plots)
4564
+ shape_handles, _ = self.ax.get_legend_handles_labels()
4565
+
4566
+ # Prepare color legend handles
4567
+ c_vals_all = data['color_val'].values
4568
+ unique_categories = pd.Series(c_vals_all).dropna().unique()
4569
+ color_handles = [Patch(facecolor=category_colors[cat],
4570
+ edgecolor=self.edge_color,
4571
+ label=self._get_display_label(cat, 'color'))
4572
+ for cat in unique_categories]
4573
+
4574
+ # Determine if legends are large (more than 5 items)
4575
+ shape_is_large = len(shape_handles) > 5
4576
+ color_is_large = len(color_handles) > 5
4577
+
4578
+ # Find optimal segment for the grouped legends
4020
4579
  if self._data is not None:
4021
- primary_loc, _ = self._find_best_legend_locations(self._data)
4580
+ segment, location = self._find_optimal_legend_segment(
4581
+ self._data,
4582
+ legend_type='shape',
4583
+ is_large=shape_is_large or color_is_large
4584
+ )
4585
+ # Mark segment as occupied by both shape and color
4586
+ self._occupied_segments[segment] = 'shape'
4587
+ self._occupied_segments[f'{segment}_large'] = shape_is_large or color_is_large
4022
4588
  else:
4023
- primary_loc = 'best'
4589
+ location = 'best'
4590
+
4591
+ colorbar_label = self.color if self.color != "depth" and self.color != "label" else "Category"
4592
+
4593
+ # Create grouped legends
4594
+ self._create_grouped_legends(
4595
+ shape_handles=shape_handles,
4596
+ shape_title=group_label,
4597
+ color_handles=color_handles,
4598
+ color_title=colorbar_label,
4599
+ location=location
4600
+ )
4601
+
4602
+ elif need_shape_legend:
4603
+ # Only shape legend needed
4604
+ shape_handles, _ = self.ax.get_legend_handles_labels()
4605
+ shape_is_large = len(shape_handles) > 5
4606
+
4607
+ # Find optimal segment
4608
+ if self._data is not None:
4609
+ segment, location = self._find_optimal_legend_segment(
4610
+ self._data,
4611
+ legend_type='shape',
4612
+ is_large=shape_is_large
4613
+ )
4614
+ # Mark segment as occupied
4615
+ self._occupied_segments[segment] = 'shape'
4616
+ self._occupied_segments[f'{segment}_large'] = shape_is_large
4617
+ else:
4618
+ location = 'best'
4024
4619
 
4025
4620
  legend = self.ax.legend(
4026
4621
  title=group_label,
4027
- loc=primary_loc,
4622
+ loc=location,
4028
4623
  frameon=True,
4029
4624
  framealpha=0.9,
4030
4625
  edgecolor='black'
4031
4626
  )
4032
4627
  legend.get_title().set_fontweight('bold')
4033
-
4034
4628
  # Store the primary legend so it persists when regression legend is added
4035
4629
  self.ax.add_artist(legend)
4036
4630
 
4037
- # Add colorbar or color legend based on color type
4038
- if self.color:
4039
- if is_categorical and self.show_legend:
4040
- # Create separate legend for categorical colors
4041
- c_vals_all = data['color_val'].values
4042
- unique_categories = pd.Series(c_vals_all).dropna().unique()
4043
-
4044
- # Create custom legend handles
4045
- legend_elements = [Patch(facecolor=category_colors[cat],
4046
- edgecolor=self.edge_color,
4047
- label=str(cat))
4048
- for cat in unique_categories]
4049
-
4050
- colorbar_label = self.color if self.color != "depth" and self.color != "label" else "Category"
4051
-
4052
- # Find a good location for the color legend (opposite corner from shape legend)
4053
- if self._data is not None:
4054
- _, secondary_loc = self._find_best_legend_locations(self._data)
4055
- else:
4056
- secondary_loc = 'upper right'
4057
-
4058
- color_legend = self.ax.legend(handles=legend_elements,
4059
- title=colorbar_label,
4060
- loc=secondary_loc,
4061
- frameon=True,
4062
- framealpha=0.9,
4063
- edgecolor='black')
4064
- color_legend.get_title().set_fontweight('bold')
4065
- elif not is_categorical and self.show_colorbar and first_scatter:
4066
- # Add continuous colorbar
4067
- self.colorbar = self.fig.colorbar(first_scatter, ax=self.ax)
4068
- colorbar_label = self.color if self.color != "depth" else "Depth"
4069
- self.colorbar.set_label(colorbar_label, fontsize=11, fontweight='bold')
4631
+ # Add colorbar for continuous color mapping
4632
+ if self.color and not is_categorical and self.show_colorbar and first_scatter:
4633
+ # Add continuous colorbar
4634
+ self.colorbar = self.fig.colorbar(first_scatter, ax=self.ax)
4635
+ colorbar_label = self.color if self.color != "depth" else "Depth"
4636
+ self.colorbar.set_label(colorbar_label, fontsize=11, fontweight='bold')
4070
4637
 
4071
4638
  def add_regression(
4072
4639
  self,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: well-log-toolkit
3
- Version: 0.1.131
3
+ Version: 0.1.133
4
4
  Summary: Fast LAS file processing with lazy loading and filtering for well log analysis
5
5
  Author-email: Kristian dF Kollsgård <kkollsg@gmail.com>
6
6
  License: MIT
@@ -3,13 +3,13 @@ well_log_toolkit/exceptions.py,sha256=X_fzC7d4yaBFO9Vx74dEIB6xmI9Agi6_bTU3MPxn6k
3
3
  well_log_toolkit/las_file.py,sha256=Tj0mRfX1aX2s6uug7BBlY1m_mu3G50EGxHGzD0eEedE,53876
4
4
  well_log_toolkit/manager.py,sha256=PuHF8rqypirNIN77STHcvg8WneExikpq6ZvkcRbcQpg,109776
5
5
  well_log_toolkit/operations.py,sha256=z8j8fGBOwoJGUQFy-Vawjq9nm3OD_dUt0oaNh8yuG7o,18515
6
- well_log_toolkit/property.py,sha256=WOzoNQcmHCQ8moIKsnSyLgVC8s4LBu2x5IBXtFzmMe8,76236
6
+ well_log_toolkit/property.py,sha256=B-3mXNJmvIqjjMdsu1kgVSwMgEwbJ36wn_n_oppdJFw,76769
7
7
  well_log_toolkit/regression.py,sha256=7D3oI-1XVlFb-mOoHTxTTtUHERFyvQSBAzJzAGVoZnk,25192
8
8
  well_log_toolkit/statistics.py,sha256=_huPMbv2H3o9ezunjEM94mJknX5wPK8V4nDv2lIZZRw,16814
9
9
  well_log_toolkit/utils.py,sha256=O2KPq4htIoUlL74V2zKftdqqTjRfezU9M-568zPLme0,6866
10
- well_log_toolkit/visualization.py,sha256=MqMK5J3LGCXc2YDT1tlmLC6oMWA-F-YBuLBh5rbrPpI,169985
10
+ well_log_toolkit/visualization.py,sha256=rzwSkqUOEeSnnuvPvNMQ56-F316JBk0XSqAdqyoRjEc,195237
11
11
  well_log_toolkit/well.py,sha256=Aav5Y-rui8YsJdvk7BFndNPUu1O9mcjwDApAGyqV9kw,104535
12
- well_log_toolkit-0.1.131.dist-info/METADATA,sha256=XC_qlZ-SFO5rFspWuxMXfvl1viLa-GpKZXKxWvA-RBc,59810
13
- well_log_toolkit-0.1.131.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
14
- well_log_toolkit-0.1.131.dist-info/top_level.txt,sha256=BMOo7OKLcZEnjo0wOLMclwzwTbYKYh31I8RGDOGSBdE,17
15
- well_log_toolkit-0.1.131.dist-info/RECORD,,
12
+ well_log_toolkit-0.1.133.dist-info/METADATA,sha256=F10zbt2b4FNQY76xM0eWAhG_y_ZABGkmY1nvem8doUY,59810
13
+ well_log_toolkit-0.1.133.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
14
+ well_log_toolkit-0.1.133.dist-info/top_level.txt,sha256=BMOo7OKLcZEnjo0wOLMclwzwTbYKYh31I8RGDOGSBdE,17
15
+ well_log_toolkit-0.1.133.dist-info/RECORD,,