well-log-toolkit 0.1.132__py3-none-any.whl → 0.1.133__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2853,6 +2853,16 @@ class Crossplot:
2853
2853
  Regression type to apply separately for each group (well or shape). Creates
2854
2854
  separate regression lines for each well or shape category. Accepts string or dict.
2855
2855
  Default: None
2856
+ regression_by_color_and_shape : str or dict, optional
2857
+ Regression type to apply separately for each combination of color AND shape groups.
2858
+ Creates separate regression lines for each (color, shape) combination. This is useful
2859
+ for analyzing how the relationship changes across both dimensions simultaneously
2860
+ (e.g., each well in each formation, each layer in each zone). Accepts string or dict.
2861
+ Default: None
2862
+ regression_by_shape_and_color : str or dict, optional
2863
+ Alias for regression_by_color_and_shape. Provided for convenience - both parameters
2864
+ do exactly the same thing. Use whichever order feels more natural.
2865
+ Default: None
2856
2866
 
2857
2867
  Examples
2858
2868
  --------
@@ -2970,6 +2980,8 @@ class Crossplot:
2970
2980
  regression: Optional[Union[str, dict]] = None,
2971
2981
  regression_by_color: Optional[Union[str, dict]] = None,
2972
2982
  regression_by_group: Optional[Union[str, dict]] = None,
2983
+ regression_by_color_and_shape: Optional[Union[str, dict]] = None,
2984
+ regression_by_shape_and_color: Optional[Union[str, dict]] = None,
2973
2985
  ):
2974
2986
  # Store wells as list
2975
2987
  if not isinstance(wells, list):
@@ -3055,6 +3067,19 @@ class Crossplot:
3055
3067
  self.regression_by_color = regression_by_color
3056
3068
  self.regression_by_group = regression_by_group
3057
3069
 
3070
+ # Handle regression_by_shape_and_color as alias for regression_by_color_and_shape
3071
+ if regression_by_shape_and_color is not None and regression_by_color_and_shape is not None:
3072
+ warnings.warn(
3073
+ "Both regression_by_color_and_shape and regression_by_shape_and_color were specified. "
3074
+ "These are aliases for the same feature. Using regression_by_color_and_shape."
3075
+ )
3076
+ self.regression_by_color_and_shape = regression_by_color_and_shape
3077
+ elif regression_by_shape_and_color is not None:
3078
+ # Use the alias
3079
+ self.regression_by_color_and_shape = regression_by_shape_and_color
3080
+ else:
3081
+ self.regression_by_color_and_shape = regression_by_color_and_shape
3082
+
3058
3083
  # Plot objects
3059
3084
  self.fig = None
3060
3085
  self.ax = None
@@ -3076,7 +3101,11 @@ class Crossplot:
3076
3101
  # Maps property role ('shape', 'color', 'size') to labels dict {0: 'label0', 1: 'label1', ...}
3077
3102
  self._discrete_labels = {}
3078
3103
 
3079
- def add_layer(self, x: str, y: str, label: str):
3104
+ # Legend placement tracking
3105
+ # Maps segment numbers (1-9) to legend type placed there
3106
+ self._occupied_segments = {}
3107
+
3108
+ def add_layer(self, x: str, y: str, label: str) -> 'Crossplot':
3080
3109
  """
3081
3110
  Add a new data layer to the crossplot.
3082
3111
 
@@ -3419,6 +3448,121 @@ class Crossplot:
3419
3448
 
3420
3449
  return best_pos, second_best_pos
3421
3450
 
3451
+ def _find_optimal_legend_segment(
3452
+ self,
3453
+ data: pd.DataFrame,
3454
+ legend_type: str,
3455
+ is_large: bool = False
3456
+ ) -> tuple[int, str]:
3457
+ """Find optimal segment for legend placement using priority-based algorithm.
3458
+
3459
+ Segments are numbered 1-9:
3460
+ 1 2 3 (upper left, upper center, upper right)
3461
+ 4 5 6 (center left, center, center right)
3462
+ 7 8 9 (lower left, lower center, lower right)
3463
+
3464
+ Checks segments in priority order: 1,9,4,6,3,7,2,8,5
3465
+ A segment is eligible if:
3466
+ - It has <10% of datapoints
3467
+ - It doesn't have a previous legend, EXCEPT:
3468
+ - Shape and color legends can share a segment if neither is very large
3469
+
3470
+ Args:
3471
+ data: DataFrame with 'x' and 'y' columns
3472
+ legend_type: Type of legend ('shape', 'color', 'size', 'regression', etc.)
3473
+ is_large: Whether this legend is considered large (many items)
3474
+
3475
+ Returns:
3476
+ Tuple of (segment_number, matplotlib_location_string)
3477
+ """
3478
+ # Get x and y bounds
3479
+ x_vals = data['x'].values
3480
+ y_vals = data['y'].values
3481
+
3482
+ # Handle log scales for binning
3483
+ if self.x_log:
3484
+ x_vals = np.log10(x_vals[x_vals > 0])
3485
+ if self.y_log:
3486
+ y_vals = np.log10(y_vals[y_vals > 0])
3487
+
3488
+ x_min, x_max = np.nanmin(x_vals), np.nanmax(x_vals)
3489
+ y_min, y_max = np.nanmin(y_vals), np.nanmax(y_vals)
3490
+
3491
+ # Create 3x3 grid and count points in each square
3492
+ x_bins = np.linspace(x_min, x_max, 4)
3493
+ y_bins = np.linspace(y_min, y_max, 4)
3494
+
3495
+ # Map segments 1-9 to grid positions (i, j)
3496
+ # Segment numbering:
3497
+ # 1 2 3
3498
+ # 4 5 6
3499
+ # 7 8 9
3500
+ segment_to_grid = {
3501
+ 1: (0, 2), # upper left
3502
+ 2: (1, 2), # upper center
3503
+ 3: (2, 2), # upper right
3504
+ 4: (0, 1), # center left
3505
+ 5: (1, 1), # center
3506
+ 6: (2, 1), # center right
3507
+ 7: (0, 0), # lower left
3508
+ 8: (1, 0), # lower center
3509
+ 9: (2, 0), # lower right
3510
+ }
3511
+
3512
+ # Map segments to matplotlib location strings
3513
+ segment_to_location = {
3514
+ 1: 'upper left',
3515
+ 2: 'upper center',
3516
+ 3: 'upper right',
3517
+ 4: 'center left',
3518
+ 5: 'center',
3519
+ 6: 'center right',
3520
+ 7: 'lower left',
3521
+ 8: 'lower center',
3522
+ 9: 'lower right',
3523
+ }
3524
+
3525
+ # Count points in each segment
3526
+ total_points = len(x_vals)
3527
+ segment_counts = {}
3528
+ for segment, (i, j) in segment_to_grid.items():
3529
+ x_mask = (x_vals >= x_bins[i]) & (x_vals < x_bins[i+1])
3530
+ y_mask = (y_vals >= y_bins[j]) & (y_vals < y_bins[j+1])
3531
+ count = np.sum(x_mask & y_mask)
3532
+ segment_counts[segment] = count
3533
+
3534
+ # Priority order: 1,9,4,6,3,7,2,8,5
3535
+ priority_order = [1, 9, 4, 6, 3, 7, 2, 8, 5]
3536
+
3537
+ # Check each segment in priority order
3538
+ for segment in priority_order:
3539
+ # Check datapoint percentage
3540
+ if total_points > 0:
3541
+ percentage = segment_counts[segment] / total_points
3542
+ if percentage >= 0.10: # 10% threshold
3543
+ continue
3544
+
3545
+ # Check if segment is occupied
3546
+ if segment in self._occupied_segments:
3547
+ existing_type = self._occupied_segments[segment]
3548
+
3549
+ # Allow shape and color to share if neither is very large
3550
+ shareable = {'shape', 'color'}
3551
+ if (legend_type in shareable and existing_type in shareable
3552
+ and not is_large
3553
+ and not self._occupied_segments.get(f'{segment}_large', False)):
3554
+ # Can share this segment
3555
+ pass
3556
+ else:
3557
+ # Segment occupied, try next
3558
+ continue
3559
+
3560
+ # Found eligible segment
3561
+ return segment, segment_to_location[segment]
3562
+
3563
+ # If no eligible segment found, use fallback (segment 1)
3564
+ return 1, segment_to_location[1]
3565
+
3422
3566
  def _store_discrete_labels(self, prop, role: str) -> None:
3423
3567
  """
3424
3568
  Store labels from a discrete property for later use in legends.
@@ -3673,9 +3817,18 @@ class Crossplot:
3673
3817
  regression_labels.append(line.get_label())
3674
3818
 
3675
3819
  if regression_handles:
3676
- # Get smart placement based on data density
3820
+ # Get smart placement based on data density using optimized segment algorithm
3677
3821
  if self._data is not None:
3678
- _, secondary_loc = self._find_best_legend_locations(self._data)
3822
+ # Determine if regression legend is large
3823
+ regression_is_large = len(regression_handles) > 5
3824
+ segment, secondary_loc = self._find_optimal_legend_segment(
3825
+ self._data,
3826
+ legend_type='regression',
3827
+ is_large=regression_is_large
3828
+ )
3829
+ # Mark segment as occupied
3830
+ self._occupied_segments[segment] = 'regression'
3831
+ self._occupied_segments[f'{segment}_large'] = regression_is_large
3679
3832
  else:
3680
3833
  # Fallback if data not available
3681
3834
  secondary_loc = 'lower right'
@@ -3712,7 +3865,7 @@ class Crossplot:
3712
3865
 
3713
3866
  def _add_automatic_regressions(self, data: pd.DataFrame) -> None:
3714
3867
  """Add automatic regressions based on initialization parameters."""
3715
- if not any([self.regression, self.regression_by_color, self.regression_by_group]):
3868
+ if not any([self.regression, self.regression_by_color, self.regression_by_group, self.regression_by_color_and_shape]):
3716
3869
  return
3717
3870
 
3718
3871
  total_points = len(data)
@@ -3771,12 +3924,54 @@ class Crossplot:
3771
3924
  )
3772
3925
  else:
3773
3926
  # Check if color is categorical (not continuous like depth)
3774
- if group_column == 'color_val' and (self.color == 'depth' or pd.api.types.is_numeric_dtype(data[group_column])):
3775
- # For continuous values, we can't create separate regressions
3776
- warnings.warn(
3777
- f"regression_by_color requires categorical color mapping, "
3778
- f"but '{self.color}' is continuous. Use regression_by_group instead."
3779
- )
3927
+ # Use _is_categorical_color() to properly handle discrete properties
3928
+ if group_column == 'color_val':
3929
+ is_categorical = self._is_categorical_color(data[group_column].values)
3930
+ if not is_categorical:
3931
+ # For continuous values, we can't create separate regressions
3932
+ warnings.warn(
3933
+ f"regression_by_color requires categorical color mapping, "
3934
+ f"but '{self.color}' is continuous. Use regression_by_group instead."
3935
+ )
3936
+ # Skip this section
3937
+ else:
3938
+ # Categorical values - group and create regressions
3939
+ color_groups = data.groupby(group_column)
3940
+ n_groups = len(color_groups)
3941
+
3942
+ # Validate regression count
3943
+ if regression_count + n_groups > total_points / 2:
3944
+ raise ValueError(
3945
+ f"Too many regression lines requested: {regression_count + n_groups} lines "
3946
+ f"for {total_points} data points (average < 2 points per line). "
3947
+ f"Reduce the number of groups or use a different regression strategy."
3948
+ )
3949
+
3950
+ # Get the actual colors used for each group in the plot
3951
+ group_colors_map = self._get_group_colors(data, group_column)
3952
+
3953
+ for idx, (group_name, group_data) in enumerate(color_groups):
3954
+ x_vals = group_data['x'].values
3955
+ y_vals = group_data['y'].values
3956
+ mask = np.isfinite(x_vals) & np.isfinite(y_vals)
3957
+ if np.sum(mask) >= 2:
3958
+ # Copy config and use the same color as the data group
3959
+ group_config = config.copy()
3960
+ if 'line_color' not in group_config:
3961
+ # Use the same color as the data points for this group
3962
+ group_config['line_color'] = group_colors_map.get(group_name, regression_colors[color_idx % len(regression_colors)])
3963
+
3964
+ # Skip legend update for all but last regression
3965
+ is_last = (idx == n_groups - 1)
3966
+ self._add_group_regression(
3967
+ x_vals[mask], y_vals[mask],
3968
+ reg_type,
3969
+ name=f"{group_label}={group_name}",
3970
+ config=group_config,
3971
+ update_legend=is_last
3972
+ )
3973
+ regression_count += 1
3974
+ color_idx += 1
3780
3975
  else:
3781
3976
  # Categorical values - group and create regressions
3782
3977
  color_groups = data.groupby(group_column)
@@ -3866,6 +4061,95 @@ class Crossplot:
3866
4061
  "Use shape='well' or set shape to a property name."
3867
4062
  )
3868
4063
 
4064
+ # Add regression by color AND shape combinations
4065
+ if self.regression_by_color_and_shape:
4066
+ config = self._parse_regression_config(self.regression_by_color_and_shape)
4067
+ reg_type = config['type']
4068
+
4069
+ # Determine color and shape columns
4070
+ color_col = None
4071
+ shape_col = None
4072
+ color_label = None
4073
+ shape_label = None
4074
+
4075
+ # Identify color column
4076
+ if self.color and 'color_val' in data.columns:
4077
+ # Check if categorical
4078
+ if self._is_categorical_color(data['color_val'].values):
4079
+ color_col = 'color_val'
4080
+ color_label = self.color
4081
+ elif self.shape == "well" and 'well' in data.columns:
4082
+ # When shape="well", wells provide colors
4083
+ color_col = 'well'
4084
+ color_label = 'well'
4085
+
4086
+ # Identify shape column
4087
+ if self.shape == "well" and 'well' in data.columns:
4088
+ shape_col = 'well'
4089
+ shape_label = 'well'
4090
+ elif self.shape and self.shape != "well" and 'shape_val' in data.columns:
4091
+ shape_col = 'shape_val'
4092
+ shape_label = self.shape
4093
+
4094
+ # Need both color and shape columns for this to work
4095
+ if color_col is None or shape_col is None:
4096
+ warnings.warn(
4097
+ "regression_by_color_and_shape requires both categorical color mapping AND shape/well grouping. "
4098
+ "Set both color and shape parameters, or use regression_by_color or regression_by_group instead."
4099
+ )
4100
+ elif color_col == shape_col:
4101
+ warnings.warn(
4102
+ "regression_by_color_and_shape requires DIFFERENT color and shape mappings. "
4103
+ "Currently both are mapped to the same property. Use regression_by_color or regression_by_group instead."
4104
+ )
4105
+ else:
4106
+ # Group by both color and shape
4107
+ combined_groups = data.groupby([color_col, shape_col])
4108
+ n_groups = len(combined_groups)
4109
+
4110
+ # Validate regression count
4111
+ if regression_count + n_groups > total_points / 2:
4112
+ raise ValueError(
4113
+ f"Too many regression lines requested: {regression_count + n_groups} lines "
4114
+ f"for {total_points} data points (average < 2 points per line). "
4115
+ f"Reduce the number of groups or use a simpler regression strategy."
4116
+ )
4117
+
4118
+ # Get color maps for both dimensions
4119
+ color_colors_map = self._get_group_colors(data, color_col)
4120
+ shape_colors_map = self._get_group_colors(data, shape_col)
4121
+
4122
+ for idx, ((color_val, shape_val), group_data) in enumerate(combined_groups):
4123
+ x_vals = group_data['x'].values
4124
+ y_vals = group_data['y'].values
4125
+ mask = np.isfinite(x_vals) & np.isfinite(y_vals)
4126
+ if np.sum(mask) >= 2:
4127
+ # Copy config and use appropriate color
4128
+ group_config = config.copy()
4129
+ if 'line_color' not in group_config:
4130
+ # Prefer color from color dimension, fallback to shape dimension
4131
+ group_config['line_color'] = color_colors_map.get(
4132
+ color_val,
4133
+ shape_colors_map.get(shape_val, regression_colors[color_idx % len(regression_colors)])
4134
+ )
4135
+
4136
+ # Create descriptive name with both dimensions
4137
+ color_display = self._get_display_label(color_val, 'color')
4138
+ shape_display = self._get_display_label(shape_val, 'shape')
4139
+ name = f"{color_label}={color_display}, {shape_label}={shape_display}"
4140
+
4141
+ # Skip legend update for all but last regression
4142
+ is_last = (idx == n_groups - 1)
4143
+ self._add_group_regression(
4144
+ x_vals[mask], y_vals[mask],
4145
+ reg_type,
4146
+ name=name,
4147
+ config=group_config,
4148
+ update_legend=is_last
4149
+ )
4150
+ regression_count += 1
4151
+ color_idx += 1
4152
+
3869
4153
  def _add_group_regression(
3870
4154
  self,
3871
4155
  x_vals: np.ndarray,
@@ -3961,6 +4245,9 @@ class Crossplot:
3961
4245
 
3962
4246
  def plot(self) -> 'Crossplot':
3963
4247
  """Generate the crossplot figure."""
4248
+ # Reset legend placement tracking for new plot
4249
+ self._occupied_segments = {}
4250
+
3964
4251
  # Prepare data
3965
4252
  data = self._prepare_data()
3966
4253
 
@@ -4138,10 +4425,26 @@ class Crossplot:
4138
4425
  label=self._get_display_label(cat, 'color'))
4139
4426
  for cat in unique_categories]
4140
4427
 
4428
+ # Determine if legend is large
4429
+ color_is_large = len(legend_elements) > 5
4430
+
4431
+ # Find optimal segment for color legend
4432
+ if self._data is not None:
4433
+ segment, location = self._find_optimal_legend_segment(
4434
+ self._data,
4435
+ legend_type='color',
4436
+ is_large=color_is_large
4437
+ )
4438
+ # Mark segment as occupied
4439
+ self._occupied_segments[segment] = 'color'
4440
+ self._occupied_segments[f'{segment}_large'] = color_is_large
4441
+ else:
4442
+ location = 'best'
4443
+
4141
4444
  colorbar_label = self.color if self.color != "depth" and self.color != "label" else "Category"
4142
4445
  legend = self.ax.legend(handles=legend_elements,
4143
4446
  title=colorbar_label,
4144
- loc='best',
4447
+ loc=location,
4145
4448
  frameon=True,
4146
4449
  framealpha=0.9,
4147
4450
  edgecolor='black')
@@ -4256,13 +4559,7 @@ class Crossplot:
4256
4559
  need_color_legend = self.color and is_categorical and self.show_legend
4257
4560
 
4258
4561
  if need_shape_legend and need_color_legend:
4259
- # Create grouped legends in the same region
4260
- # Get best location based on data density
4261
- if self._data is not None:
4262
- primary_loc, _ = self._find_best_legend_locations(self._data)
4263
- else:
4264
- primary_loc = 'best'
4265
-
4562
+ # Create grouped legends in the same region using optimized placement
4266
4563
  # Prepare shape legend handles (from scatter plots)
4267
4564
  shape_handles, _ = self.ax.get_legend_handles_labels()
4268
4565
 
@@ -4274,6 +4571,23 @@ class Crossplot:
4274
4571
  label=self._get_display_label(cat, 'color'))
4275
4572
  for cat in unique_categories]
4276
4573
 
4574
+ # Determine if legends are large (more than 5 items)
4575
+ shape_is_large = len(shape_handles) > 5
4576
+ color_is_large = len(color_handles) > 5
4577
+
4578
+ # Find optimal segment for the grouped legends
4579
+ if self._data is not None:
4580
+ segment, location = self._find_optimal_legend_segment(
4581
+ self._data,
4582
+ legend_type='shape',
4583
+ is_large=shape_is_large or color_is_large
4584
+ )
4585
+ # Mark segment as occupied by both shape and color
4586
+ self._occupied_segments[segment] = 'shape'
4587
+ self._occupied_segments[f'{segment}_large'] = shape_is_large or color_is_large
4588
+ else:
4589
+ location = 'best'
4590
+
4277
4591
  colorbar_label = self.color if self.color != "depth" and self.color != "label" else "Category"
4278
4592
 
4279
4593
  # Create grouped legends
@@ -4282,19 +4596,30 @@ class Crossplot:
4282
4596
  shape_title=group_label,
4283
4597
  color_handles=color_handles,
4284
4598
  color_title=colorbar_label,
4285
- location=primary_loc
4599
+ location=location
4286
4600
  )
4287
4601
 
4288
4602
  elif need_shape_legend:
4289
4603
  # Only shape legend needed
4604
+ shape_handles, _ = self.ax.get_legend_handles_labels()
4605
+ shape_is_large = len(shape_handles) > 5
4606
+
4607
+ # Find optimal segment
4290
4608
  if self._data is not None:
4291
- primary_loc, _ = self._find_best_legend_locations(self._data)
4609
+ segment, location = self._find_optimal_legend_segment(
4610
+ self._data,
4611
+ legend_type='shape',
4612
+ is_large=shape_is_large
4613
+ )
4614
+ # Mark segment as occupied
4615
+ self._occupied_segments[segment] = 'shape'
4616
+ self._occupied_segments[f'{segment}_large'] = shape_is_large
4292
4617
  else:
4293
- primary_loc = 'best'
4618
+ location = 'best'
4294
4619
 
4295
4620
  legend = self.ax.legend(
4296
4621
  title=group_label,
4297
- loc=primary_loc,
4622
+ loc=location,
4298
4623
  frameon=True,
4299
4624
  framealpha=0.9,
4300
4625
  edgecolor='black'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: well-log-toolkit
3
- Version: 0.1.132
3
+ Version: 0.1.133
4
4
  Summary: Fast LAS file processing with lazy loading and filtering for well log analysis
5
5
  Author-email: Kristian dF Kollsgård <kkollsg@gmail.com>
6
6
  License: MIT
@@ -7,9 +7,9 @@ well_log_toolkit/property.py,sha256=B-3mXNJmvIqjjMdsu1kgVSwMgEwbJ36wn_n_oppdJFw,
7
7
  well_log_toolkit/regression.py,sha256=7D3oI-1XVlFb-mOoHTxTTtUHERFyvQSBAzJzAGVoZnk,25192
8
8
  well_log_toolkit/statistics.py,sha256=_huPMbv2H3o9ezunjEM94mJknX5wPK8V4nDv2lIZZRw,16814
9
9
  well_log_toolkit/utils.py,sha256=O2KPq4htIoUlL74V2zKftdqqTjRfezU9M-568zPLme0,6866
10
- well_log_toolkit/visualization.py,sha256=204l_LFgcSfnpHti1Xc1iGF-KUVEyxkJNS88gkUDeXA,179959
10
+ well_log_toolkit/visualization.py,sha256=rzwSkqUOEeSnnuvPvNMQ56-F316JBk0XSqAdqyoRjEc,195237
11
11
  well_log_toolkit/well.py,sha256=Aav5Y-rui8YsJdvk7BFndNPUu1O9mcjwDApAGyqV9kw,104535
12
- well_log_toolkit-0.1.132.dist-info/METADATA,sha256=5IVBXhf2t9MQW4Gswct9F1IHl5JJnKOWDZxK4g2vUq4,59810
13
- well_log_toolkit-0.1.132.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
14
- well_log_toolkit-0.1.132.dist-info/top_level.txt,sha256=BMOo7OKLcZEnjo0wOLMclwzwTbYKYh31I8RGDOGSBdE,17
15
- well_log_toolkit-0.1.132.dist-info/RECORD,,
12
+ well_log_toolkit-0.1.133.dist-info/METADATA,sha256=F10zbt2b4FNQY76xM0eWAhG_y_ZABGkmY1nvem8doUY,59810
13
+ well_log_toolkit-0.1.133.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
14
+ well_log_toolkit-0.1.133.dist-info/top_level.txt,sha256=BMOo7OKLcZEnjo0wOLMclwzwTbYKYh31I8RGDOGSBdE,17
15
+ well_log_toolkit-0.1.133.dist-info/RECORD,,