well-log-toolkit 0.1.118__tar.gz → 0.1.120__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (20) hide show
  1. {well_log_toolkit-0.1.118 → well_log_toolkit-0.1.120}/PKG-INFO +4 -4
  2. {well_log_toolkit-0.1.118 → well_log_toolkit-0.1.120}/README.md +3 -3
  3. {well_log_toolkit-0.1.118 → well_log_toolkit-0.1.120}/pyproject.toml +1 -1
  4. {well_log_toolkit-0.1.118 → well_log_toolkit-0.1.120}/well_log_toolkit/regression.py +16 -6
  5. {well_log_toolkit-0.1.118 → well_log_toolkit-0.1.120}/well_log_toolkit/visualization.py +49 -5
  6. {well_log_toolkit-0.1.118 → well_log_toolkit-0.1.120}/well_log_toolkit/well.py +280 -93
  7. {well_log_toolkit-0.1.118 → well_log_toolkit-0.1.120}/well_log_toolkit.egg-info/PKG-INFO +4 -4
  8. {well_log_toolkit-0.1.118 → well_log_toolkit-0.1.120}/setup.cfg +0 -0
  9. {well_log_toolkit-0.1.118 → well_log_toolkit-0.1.120}/well_log_toolkit/__init__.py +0 -0
  10. {well_log_toolkit-0.1.118 → well_log_toolkit-0.1.120}/well_log_toolkit/exceptions.py +0 -0
  11. {well_log_toolkit-0.1.118 → well_log_toolkit-0.1.120}/well_log_toolkit/las_file.py +0 -0
  12. {well_log_toolkit-0.1.118 → well_log_toolkit-0.1.120}/well_log_toolkit/manager.py +0 -0
  13. {well_log_toolkit-0.1.118 → well_log_toolkit-0.1.120}/well_log_toolkit/operations.py +0 -0
  14. {well_log_toolkit-0.1.118 → well_log_toolkit-0.1.120}/well_log_toolkit/property.py +0 -0
  15. {well_log_toolkit-0.1.118 → well_log_toolkit-0.1.120}/well_log_toolkit/statistics.py +0 -0
  16. {well_log_toolkit-0.1.118 → well_log_toolkit-0.1.120}/well_log_toolkit/utils.py +0 -0
  17. {well_log_toolkit-0.1.118 → well_log_toolkit-0.1.120}/well_log_toolkit.egg-info/SOURCES.txt +0 -0
  18. {well_log_toolkit-0.1.118 → well_log_toolkit-0.1.120}/well_log_toolkit.egg-info/dependency_links.txt +0 -0
  19. {well_log_toolkit-0.1.118 → well_log_toolkit-0.1.120}/well_log_toolkit.egg-info/requires.txt +0 -0
  20. {well_log_toolkit-0.1.118 → well_log_toolkit-0.1.120}/well_log_toolkit.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: well-log-toolkit
3
- Version: 0.1.118
3
+ Version: 0.1.120
4
4
  Summary: Fast LAS file processing with lazy loading and filtering for well log analysis
5
5
  Author-email: Kristian dF Kollsgård <kkollsg@gmail.com>
6
6
  License: MIT
@@ -1704,14 +1704,14 @@ stats = well.PHIE.filter('Zone').sums_avg()
1704
1704
 
1705
1705
  **To DataFrame:**
1706
1706
  ```python
1707
- # All properties
1707
+ # All properties (default: errors if depths don't match exactly)
1708
1708
  df = well.data()
1709
1709
 
1710
1710
  # Specific properties
1711
1711
  df = well.data(include=['PHIE', 'SW', 'PERM'])
1712
1712
 
1713
- # Auto-resample to common depth grid
1714
- df = well.data(auto_resample=True)
1713
+ # Interpolate to common depth grid if depths don't align
1714
+ df = well.data(merge_method='resample')
1715
1715
 
1716
1716
  # Use labels for discrete properties
1717
1717
  df = well.data(discrete_labels=True)
@@ -1666,14 +1666,14 @@ stats = well.PHIE.filter('Zone').sums_avg()
1666
1666
 
1667
1667
  **To DataFrame:**
1668
1668
  ```python
1669
- # All properties
1669
+ # All properties (default: errors if depths don't match exactly)
1670
1670
  df = well.data()
1671
1671
 
1672
1672
  # Specific properties
1673
1673
  df = well.data(include=['PHIE', 'SW', 'PERM'])
1674
1674
 
1675
- # Auto-resample to common depth grid
1676
- df = well.data(auto_resample=True)
1675
+ # Interpolate to common depth grid if depths don't align
1676
+ df = well.data(merge_method='resample')
1677
1677
 
1678
1678
  # Use labels for discrete properties
1679
1679
  df = well.data(discrete_labels=True)
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "well-log-toolkit"
7
- version = "0.1.118"
7
+ version = "0.1.120"
8
8
  description = "Fast LAS file processing with lazy loading and filtering for well log analysis"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -70,13 +70,14 @@ class RegressionBase(ABC):
70
70
  """
71
71
  return self.predict(x)
72
72
 
73
- def _calculate_metrics(self, x: np.ndarray, y: np.ndarray, y_pred: np.ndarray) -> None:
73
+ def _calculate_metrics(self, x: np.ndarray, y: np.ndarray, y_pred: np.ndarray, use_log_space: bool = False) -> None:
74
74
  """Calculate R² and RMSE metrics.
75
75
 
76
76
  Args:
77
77
  x: Independent variable values
78
78
  y: Actual dependent variable values
79
79
  y_pred: Predicted dependent variable values
80
+ use_log_space: If True, calculate R² in log space (useful when y spans orders of magnitude)
80
81
  """
81
82
  # Store original data
82
83
  self.x_data = x
@@ -85,12 +86,21 @@ class RegressionBase(ABC):
85
86
  # Store x-axis range
86
87
  self.x_range = (float(np.min(x)), float(np.max(x)))
87
88
 
88
- # R² calculation
89
- ss_res = np.sum((y - y_pred) ** 2)
90
- ss_tot = np.sum((y - np.mean(y)) ** 2)
91
- self.r_squared = 1 - (ss_res / ss_tot) if ss_tot != 0 else 0.0
89
+ # R² calculation - in log space if requested
90
+ if use_log_space and np.all(y > 0) and np.all(y_pred > 0):
91
+ # Calculate in log space for data spanning orders of magnitude
92
+ log_y = np.log10(y)
93
+ log_y_pred = np.log10(y_pred)
94
+ ss_res = np.sum((log_y - log_y_pred) ** 2)
95
+ ss_tot = np.sum((log_y - np.mean(log_y)) ** 2)
96
+ self.r_squared = 1 - (ss_res / ss_tot) if ss_tot != 0 else 0.0
97
+ else:
98
+ # Standard R² in linear space
99
+ ss_res = np.sum((y - y_pred) ** 2)
100
+ ss_tot = np.sum((y - np.mean(y)) ** 2)
101
+ self.r_squared = 1 - (ss_res / ss_tot) if ss_tot != 0 else 0.0
92
102
 
93
- # RMSE calculation
103
+ # RMSE calculation (always in linear space)
94
104
  self.rmse = np.sqrt(np.mean((y - y_pred) ** 2))
95
105
 
96
106
  def _prepare_data(self, x: ArrayLike, y: ArrayLike) -> Tuple[np.ndarray, np.ndarray]:
@@ -3139,6 +3139,27 @@ class Crossplot:
3139
3139
  self._regressions[reg_type] = {}
3140
3140
  self._regressions[reg_type][identifier] = regression_obj
3141
3141
 
3142
+ def _get_group_colors(self, data: pd.DataFrame, group_column: str) -> dict:
3143
+ """Get the color assigned to each group in the plot.
3144
+
3145
+ Args:
3146
+ data: DataFrame with plotting data
3147
+ group_column: Column name used for grouping
3148
+
3149
+ Returns:
3150
+ Dictionary mapping group names to their colors
3151
+ """
3152
+ group_colors = {}
3153
+
3154
+ # Get unique groups in the same order as they'll appear in the plot
3155
+ groups = data.groupby(group_column)
3156
+
3157
+ for idx, (group_name, _) in enumerate(groups):
3158
+ # Use the same color assignment logic as _plot_by_groups
3159
+ group_colors[group_name] = DEFAULT_COLORS[idx % len(DEFAULT_COLORS)]
3160
+
3161
+ return group_colors
3162
+
3142
3163
  def _find_best_legend_locations(self, data: pd.DataFrame) -> tuple[str, str]:
3143
3164
  """Find the two best locations for legends based on data density.
3144
3165
 
@@ -3228,7 +3249,12 @@ class Crossplot:
3228
3249
 
3229
3250
  # Add R² on second line if requested (will be styled grey later)
3230
3251
  if include_r2:
3231
- return f"{first_line}\nR² = {reg.r_squared:.3f}"
3252
+ # Format R² with appropriate note
3253
+ if self.y_log:
3254
+ r2_label = f"R² = {reg.r_squared:.3f} (log)"
3255
+ else:
3256
+ r2_label = f"R² = {reg.r_squared:.3f}"
3257
+ return f"{first_line}\n{r2_label}"
3232
3258
  else:
3233
3259
  return first_line
3234
3260
 
@@ -3371,15 +3397,19 @@ class Crossplot:
3371
3397
  f"Reduce the number of groups or use a different regression strategy."
3372
3398
  )
3373
3399
 
3400
+ # Get the actual colors used for each group in the plot
3401
+ group_colors_map = self._get_group_colors(data, group_column)
3402
+
3374
3403
  for idx, (group_name, group_data) in enumerate(color_groups):
3375
3404
  x_vals = group_data['x'].values
3376
3405
  y_vals = group_data['y'].values
3377
3406
  mask = np.isfinite(x_vals) & np.isfinite(y_vals)
3378
3407
  if np.sum(mask) >= 2:
3379
- # Copy config and set default line color if not specified
3408
+ # Copy config and use the same color as the data group
3380
3409
  group_config = config.copy()
3381
3410
  if 'line_color' not in group_config:
3382
- group_config['line_color'] = regression_colors[color_idx % len(regression_colors)]
3411
+ # Use the same color as the data points for this group
3412
+ group_config['line_color'] = group_colors_map.get(group_name, regression_colors[color_idx % len(regression_colors)])
3383
3413
 
3384
3414
  # Skip legend update for all but last regression
3385
3415
  is_last = (idx == n_groups - 1)
@@ -3412,15 +3442,19 @@ class Crossplot:
3412
3442
  f"Reduce the number of groups or use a different regression strategy."
3413
3443
  )
3414
3444
 
3445
+ # Get the actual colors used for each group in the plot
3446
+ group_colors_map = self._get_group_colors(data, group_col)
3447
+
3415
3448
  for idx, (group_name, group_data) in enumerate(groups):
3416
3449
  x_vals = group_data['x'].values
3417
3450
  y_vals = group_data['y'].values
3418
3451
  mask = np.isfinite(x_vals) & np.isfinite(y_vals)
3419
3452
  if np.sum(mask) >= 2:
3420
- # Copy config and set default line color if not specified
3453
+ # Copy config and use the same color as the data group
3421
3454
  group_config = config.copy()
3422
3455
  if 'line_color' not in group_config:
3423
- group_config['line_color'] = regression_colors[color_idx % len(regression_colors)]
3456
+ # Use the same color as the data points for this group
3457
+ group_config['line_color'] = group_colors_map.get(group_name, regression_colors[color_idx % len(regression_colors)])
3424
3458
 
3425
3459
  # Skip legend update for all but last regression
3426
3460
  is_last = (idx == n_groups - 1)
@@ -3467,6 +3501,11 @@ class Crossplot:
3467
3501
  warnings.warn(f"Failed to fit {regression_type} regression for {name}: {e}")
3468
3502
  return
3469
3503
 
3504
+ # Recalculate R² in log space if y-axis is log scale
3505
+ if self.y_log:
3506
+ y_pred = reg.predict(x_vals)
3507
+ reg._calculate_metrics(x_vals, y_vals, y_pred, use_log_space=True)
3508
+
3470
3509
  # Store regression in nested structure
3471
3510
  self._store_regression(regression_type, name, reg)
3472
3511
 
@@ -3810,6 +3849,11 @@ class Crossplot:
3810
3849
  except ValueError as e:
3811
3850
  raise ValueError(f"Failed to fit {regression_type} regression: {e}")
3812
3851
 
3852
+ # Recalculate R² in log space if y-axis is log scale
3853
+ if self.y_log:
3854
+ y_pred = reg.predict(x_clean)
3855
+ reg._calculate_metrics(x_clean, y_clean, y_pred, use_log_space=True)
3856
+
3813
3857
  # Store regression in nested structure
3814
3858
  reg_name = name if name else regression_type
3815
3859
  self._store_regression(regression_type, reg_name, reg)
@@ -1108,9 +1108,43 @@ class Well:
1108
1108
  f"Available properties: {available or 'none'}"
1109
1109
  )
1110
1110
 
1111
+ @staticmethod
1112
+ def _is_regular_grid(depth: np.ndarray, tolerance: float = 1e-6) -> tuple[bool, Optional[float]]:
1113
+ """
1114
+ Check if a depth grid has regular spacing.
1115
+
1116
+ Parameters
1117
+ ----------
1118
+ depth : np.ndarray
1119
+ Depth values to check
1120
+ tolerance : float, default 1e-6
1121
+ Maximum allowed deviation from regular spacing
1122
+
1123
+ Returns
1124
+ -------
1125
+ tuple[bool, Optional[float]]
1126
+ (is_regular, step_size)
1127
+ - is_regular: True if grid is regular
1128
+ - step_size: The step size if regular, None otherwise
1129
+ """
1130
+ if len(depth) < 2:
1131
+ return False, None
1132
+
1133
+ # Calculate differences between consecutive depths
1134
+ diffs = np.diff(depth)
1135
+
1136
+ # Check if all differences are approximately equal
1137
+ mean_diff = np.mean(diffs)
1138
+ max_deviation = np.max(np.abs(diffs - mean_diff))
1139
+
1140
+ is_regular = max_deviation <= tolerance
1141
+ step_size = mean_diff if is_regular else None
1142
+
1143
+ return is_regular, step_size
1144
+
1111
1145
  def _merge_properties(
1112
1146
  self,
1113
- method: str = 'resample',
1147
+ method: str = 'match',
1114
1148
  sources: Optional[list[str]] = None,
1115
1149
  properties: Optional[list[str]] = None,
1116
1150
  depth_step: Optional[float] = None,
@@ -1125,18 +1159,26 @@ class Well:
1125
1159
 
1126
1160
  Parameters
1127
1161
  ----------
1128
- method : {'resample', 'concat'}, default 'resample'
1129
- Merge method
1162
+ method : {'match', 'resample', 'concat'}, default 'match'
1163
+ Merge method:
1164
+ - 'match': Use first source's depth grid. Continuous properties must have
1165
+ exact depth match (errors otherwise). Discrete properties are resampled
1166
+ using interval logic (since they define depth intervals).
1167
+ - 'resample': Use first source's depth grid (or depth_grid if provided),
1168
+ interpolate other sources to this grid. If first source has irregular
1169
+ spacing, errors when other sources extend beyond its range.
1170
+ - 'concat': Merge all unique depths, fill NaN where depth doesn't exist
1130
1171
  sources : list[str], optional
1131
- List of source names to include
1172
+ List of source names to include (required for 'match' and 'resample' methods)
1132
1173
  properties : list[str], optional
1133
1174
  List of property names to include
1134
1175
  depth_step : float, optional
1135
- For 'resample' method: step size for regular grid (default 0.1)
1176
+ Not used (deprecated - kept for backward compatibility)
1136
1177
  depth_range : tuple[float, float], optional
1137
- For 'resample' method: (min_depth, max_depth) for grid
1178
+ Not used (deprecated - kept for backward compatibility)
1138
1179
  depth_grid : np.ndarray, optional
1139
- For 'resample' method: explicit depth grid to use
1180
+ For 'resample' method: explicit depth grid to use. If None, uses first
1181
+ source's depth grid.
1140
1182
  source_name : str, optional
1141
1183
  Source name for merged properties. If None, generates 'merged_{method}'
1142
1184
 
@@ -1150,11 +1192,13 @@ class Well:
1150
1192
  ValueError
1151
1193
  If invalid method specified
1152
1194
  WellError
1153
- If no properties match the filters
1195
+ If no properties match the filters, if 'match' method detects
1196
+ incompatible depths, or if 'resample' with irregular grid detects
1197
+ extrapolation requirements
1154
1198
  """
1155
- if method not in {'resample', 'concat'}:
1199
+ if method not in {'resample', 'concat', 'match'}:
1156
1200
  raise ValueError(
1157
- f"method must be 'resample' or 'concat', got '{method}'"
1201
+ f"method must be 'resample', 'concat', or 'match', got '{method}'"
1158
1202
  )
1159
1203
 
1160
1204
  # Filter properties by sources and/or names
@@ -1197,38 +1241,76 @@ class Well:
1197
1241
  merged_properties = {}
1198
1242
 
1199
1243
  if method == 'resample':
1200
- # Create or use provided depth grid
1244
+ # Determine the reference depth grid
1201
1245
  if depth_grid is None:
1202
- if depth_step is None:
1203
- depth_step = 0.1
1204
-
1205
- if depth_range is None:
1206
- # Use min/max across selected properties
1207
- all_depths = [p.depth for p in props_to_merge.values()]
1208
- depth_range = (
1209
- min(d.min() for d in all_depths),
1210
- max(d.max() for d in all_depths)
1246
+ # Use first source's depth grid as reference
1247
+ if sources is None or len(sources) == 0:
1248
+ raise WellError(
1249
+ "For 'resample' method, you must specify sources list. "
1250
+ "The first source will be used as the reference depth grid."
1211
1251
  )
1212
1252
 
1213
- depth_grid = np.arange(
1214
- depth_range[0],
1215
- depth_range[1] + depth_step/2,
1216
- depth_step
1217
- )
1253
+ first_source_name = sources[0]
1254
+ if first_source_name not in self._sources:
1255
+ available_sources = ', '.join(self._sources.keys())
1256
+ raise WellError(
1257
+ f"First source '{first_source_name}' not found. "
1258
+ f"Available sources: {available_sources}"
1259
+ )
1218
1260
 
1219
- # Create new resampled properties
1261
+ # Get reference depth from first property in first source
1262
+ first_source_props = self._sources[first_source_name]['properties']
1263
+ if not first_source_props:
1264
+ raise WellError(f"First source '{first_source_name}' has no properties")
1265
+
1266
+ reference_depth = next(iter(first_source_props.values())).depth
1267
+
1268
+ # Check if the reference depth grid is regular
1269
+ is_regular, _ = self._is_regular_grid(reference_depth)
1270
+
1271
+ # Check other sources for values outside reference range
1272
+ ref_min, ref_max = reference_depth.min(), reference_depth.max()
1273
+
1274
+ for source in sources[1:]:
1275
+ if source not in self._sources:
1276
+ continue
1277
+
1278
+ source_props = self._sources[source]['properties']
1279
+ for prop_name, prop in source_props.items():
1280
+ # Check property name filter
1281
+ if properties is not None and prop_name not in properties:
1282
+ continue
1283
+
1284
+ # Check if this source has depth values outside reference range
1285
+ prop_min, prop_max = prop.depth.min(), prop.depth.max()
1286
+
1287
+ if prop_min < ref_min or prop_max > ref_max:
1288
+ if not is_regular:
1289
+ raise WellError(
1290
+ f"Cannot resample sources: source '{source}' has depth values "
1291
+ f"outside the range of first source '{first_source_name}' "
1292
+ f"[{ref_min:.2f}, {ref_max:.2f}], but the first source has an "
1293
+ f"irregular depth grid. Extrapolation is only allowed for regular grids. "
1294
+ f"Source '{source}' range: [{prop_min:.2f}, {prop_max:.2f}]. "
1295
+ f"Use method='concat' to merge all depths, or trim '{source}' to fit."
1296
+ )
1297
+ else:
1298
+ # Use explicitly provided depth grid (for backward compatibility)
1299
+ reference_depth = depth_grid
1300
+
1301
+ # Resample all properties to reference depth grid
1220
1302
  for name, prop in props_to_merge.items():
1221
1303
  resampled_values = Property._resample_to_grid(
1222
1304
  prop.depth,
1223
1305
  prop.values,
1224
- depth_grid,
1306
+ reference_depth,
1225
1307
  method='linear' if prop.type == 'continuous' else 'previous'
1226
1308
  )
1227
1309
 
1228
1310
  # Create new property with merged source
1229
1311
  merged_prop = Property(
1230
1312
  name=name,
1231
- depth=depth_grid.copy(),
1313
+ depth=reference_depth.copy(),
1232
1314
  values=resampled_values,
1233
1315
  parent_well=self,
1234
1316
  unit=prop.unit,
@@ -1243,7 +1325,7 @@ class Well:
1243
1325
 
1244
1326
  merged_properties[name] = merged_prop
1245
1327
 
1246
- else: # method == 'concat'
1328
+ elif method == 'concat':
1247
1329
  # Collect all unique depths from selected properties
1248
1330
  all_depths = []
1249
1331
  for prop in props_to_merge.values():
@@ -1280,11 +1362,100 @@ class Well:
1280
1362
 
1281
1363
  merged_properties[name] = merged_prop
1282
1364
 
1365
+ else: # method == 'match'
1366
+ # Get the first source's depth grid as reference
1367
+ if sources is None or len(sources) == 0:
1368
+ raise WellError(
1369
+ "For 'match' method, you must specify sources list. "
1370
+ "The first source will be used as the reference depth grid."
1371
+ )
1372
+
1373
+ first_source_name = sources[0]
1374
+ if first_source_name not in self._sources:
1375
+ available_sources = ', '.join(self._sources.keys())
1376
+ raise WellError(
1377
+ f"First source '{first_source_name}' not found. "
1378
+ f"Available sources: {available_sources}"
1379
+ )
1380
+
1381
+ # Get reference depth from first property in first source
1382
+ first_source_props = self._sources[first_source_name]['properties']
1383
+ if not first_source_props:
1384
+ raise WellError(f"First source '{first_source_name}' has no properties")
1385
+
1386
+ reference_depth = next(iter(first_source_props.values())).depth
1387
+ reference_depth_set = set(reference_depth)
1388
+
1389
+ # Check other sources for incompatible depths
1390
+ # Continuous properties must have exact depth match
1391
+ # Discrete properties can be resampled (they define intervals)
1392
+ for source in sources[1:]:
1393
+ if source not in self._sources:
1394
+ continue
1395
+
1396
+ source_props = self._sources[source]['properties']
1397
+ for prop_name, prop in source_props.items():
1398
+ # Check property name filter
1399
+ if properties is not None and prop_name not in properties:
1400
+ continue
1401
+
1402
+ # Only check continuous properties for exact depth match
1403
+ if prop.type == 'continuous':
1404
+ # Check if this source has depth values not in reference
1405
+ prop_depth_set = set(prop.depth)
1406
+ extra_depths = prop_depth_set - reference_depth_set
1407
+
1408
+ if extra_depths:
1409
+ raise WellError(
1410
+ f"Cannot match sources: continuous property '{prop_name}' from source '{source}' "
1411
+ f"has depth values that don't exist in first source '{first_source_name}'. "
1412
+ f"Found {len(extra_depths)} incompatible depth values. "
1413
+ f"Use method='resample' or method='concat' instead."
1414
+ )
1415
+
1416
+ # Create matched properties using reference depth
1417
+ for name, prop in props_to_merge.items():
1418
+ if prop.type == 'continuous':
1419
+ # Continuous properties: require exact depth match
1420
+ depth_to_value = dict(zip(prop.depth, prop.values))
1421
+
1422
+ # Match values to reference depth (NaN where depth doesn't exist in this property)
1423
+ matched_values = np.array([
1424
+ depth_to_value.get(d, np.nan) for d in reference_depth
1425
+ ])
1426
+ else:
1427
+ # Discrete properties: resample using 'previous' method (interval-based)
1428
+ # This works because discrete properties define intervals
1429
+ matched_values = Property._resample_to_grid(
1430
+ prop.depth,
1431
+ prop.values,
1432
+ reference_depth,
1433
+ method='previous'
1434
+ )
1435
+
1436
+ # Create new property with merged source
1437
+ merged_prop = Property(
1438
+ name=name,
1439
+ depth=reference_depth.copy(),
1440
+ values=matched_values,
1441
+ parent_well=self,
1442
+ unit=prop.unit,
1443
+ prop_type=prop.type,
1444
+ description=prop.description,
1445
+ null_value=-999.25,
1446
+ labels=prop.labels,
1447
+ source_las=None,
1448
+ source_name=source_name,
1449
+ original_name=prop.original_name
1450
+ )
1451
+
1452
+ merged_properties[name] = merged_prop
1453
+
1283
1454
  return merged_properties
1284
1455
 
1285
1456
  def merge(
1286
1457
  self,
1287
- method: str = 'resample',
1458
+ method: str = 'match',
1288
1459
  sources: Optional[list[str]] = None,
1289
1460
  properties: Optional[list[str]] = None,
1290
1461
  depth_step: Optional[float] = None,
@@ -1301,21 +1472,29 @@ class Well:
1301
1472
 
1302
1473
  Parameters
1303
1474
  ----------
1304
- method : {'resample', 'concat'}, default 'resample'
1475
+ method : {'match', 'resample', 'concat'}, default 'match'
1305
1476
  Merge method:
1306
- - 'resample': Interpolate all properties to a common regular depth grid
1477
+ - 'match': Use first source's depth grid as reference. Continuous properties
1478
+ must have exact depth match (errors otherwise). Discrete properties are
1479
+ automatically resampled using interval logic, since they define depth
1480
+ intervals. This is the safest option when depths should already align.
1481
+ - 'resample': Use first source's depth grid, interpolate other sources
1482
+ to match. If first source has regular spacing (e.g., every 0.1m),
1483
+ allows extrapolation for other sources. If irregular spacing, raises
1484
+ error when other sources extend beyond first source's range.
1307
1485
  - 'concat': Merge all unique depths, fill NaN where depth doesn't exist
1308
1486
  sources : list[str], optional
1309
- List of source names to include (e.g., ['log1.las', 'core.las'])
1310
- If None, includes properties from all sources
1487
+ List of source names to include (e.g., ['CorePerm', 'CorePor'])
1488
+ Required for 'match' and 'resample' methods. The first source is used
1489
+ as the reference depth grid.
1311
1490
  properties : list[str], optional
1312
1491
  List of property names to include. If None, includes all properties
1313
1492
  depth_step : float, optional
1314
- For 'resample' method: step size for regular grid (default 0.1)
1493
+ Not used (deprecated - kept for backward compatibility)
1315
1494
  depth_range : tuple[float, float], optional
1316
- For 'resample' method: (min_depth, max_depth) for grid
1495
+ Not used (deprecated - kept for backward compatibility)
1317
1496
  depth_grid : np.ndarray, optional
1318
- For 'resample' method: explicit depth grid to use
1497
+ Not used (deprecated - kept for backward compatibility)
1319
1498
  source_name : str, optional
1320
1499
  Custom source name for merged properties. If None, uses 'merged_{method}'
1321
1500
 
@@ -1329,25 +1508,37 @@ class Well:
1329
1508
  ValueError
1330
1509
  If invalid method specified
1331
1510
  WellError
1332
- If no properties match the filters
1511
+ If no properties match the filters, if 'match' method detects
1512
+ incompatible depths, or if 'resample' with irregular grid detects
1513
+ extrapolation requirements
1333
1514
 
1334
1515
  Examples
1335
1516
  --------
1336
- >>> # Resample log and core data to common 0.1m grid
1337
- >>> well.merge(method='resample', depth_step=0.1)
1517
+ >>> # Match sources with compatible depths (default, safest)
1518
+ >>> well.merge(
1519
+ ... sources=['CorePerm', 'CorePor'],
1520
+ ... source_name='CorePlugs'
1521
+ ... )
1522
+
1523
+ >>> # Resample to first source's grid (allows interpolation)
1524
+ >>> well.merge(
1525
+ ... method='resample',
1526
+ ... sources=['CompLogs', 'CorePerm'],
1527
+ ... source_name='Resampled'
1528
+ ... )
1338
1529
 
1339
1530
  >>> # Concatenate specific sources with custom name
1340
1531
  >>> well.merge(
1341
1532
  ... method='concat',
1342
- ... sources=['log1.las', 'log2.las'],
1533
+ ... sources=['log1', 'log2'],
1343
1534
  ... source_name='combined_logs'
1344
1535
  ... )
1345
1536
 
1346
- >>> # Resample only specific properties
1537
+ >>> # Match only specific properties
1347
1538
  >>> well.merge(
1348
- ... method='resample',
1349
- ... properties=['PHIE', 'CorePor'],
1350
- ... depth_step=0.05
1539
+ ... sources=['source1', 'source2'],
1540
+ ... properties=['PHIE', 'SW'],
1541
+ ... source_name='selected_props'
1351
1542
  ... )
1352
1543
  """
1353
1544
  # Get merged properties using internal method
@@ -1378,26 +1569,25 @@ class Well:
1378
1569
  reference_property: Optional[str] = None,
1379
1570
  include: Optional[Union[str, list[str]]] = None,
1380
1571
  exclude: Optional[Union[str, list[str]]] = None,
1381
- auto_resample: bool = True,
1382
- merge_method: str = 'resample',
1572
+ merge_method: str = 'match',
1383
1573
  discrete_labels: bool = True,
1384
1574
  clip_edges: bool = True,
1385
1575
  clip_to_property: Optional[str] = None
1386
1576
  ) -> pd.DataFrame:
1387
1577
  """
1388
- Export properties as DataFrame with optional resampling and filtering.
1578
+ Export properties as DataFrame with optional merging and filtering.
1389
1579
 
1390
- This method does NOT modify the original property depth grids. If auto_resample
1391
- is True, properties are temporarily merged for the DataFrame output only using
1392
- the specified merge method.
1580
+ This method does NOT modify the original property depth grids. Properties
1581
+ are temporarily aligned using the specified merge method for DataFrame
1582
+ output only.
1393
1583
 
1394
1584
  Parameters
1395
1585
  ----------
1396
1586
  reference_property : str, optional
1397
- Property to use as depth reference for resampling. If auto_resample
1398
- is True, all properties will be merged to this property's depth
1399
- grid. If not specified, defaults to the first property that was added
1400
- (typically the first property from the first LAS file loaded).
1587
+ Property to use as depth reference. All properties will be aligned
1588
+ to this property's depth grid using the merge_method. If not specified,
1589
+ defaults to the first property that was added (typically the first
1590
+ property from the first LAS file loaded).
1401
1591
  include : str or list[str], optional
1402
1592
  Property name(s) to include. If None, includes all properties.
1403
1593
  Can be a single string or a list of strings.
@@ -1405,12 +1595,11 @@ class Well:
1405
1595
  Property name(s) to exclude. If both include and exclude are
1406
1596
  specified, exclude overrides (removes properties from include list).
1407
1597
  Can be a single string or a list of strings.
1408
- auto_resample : bool, default True
1409
- If True, automatically merge all properties to the reference
1410
- property's depth grid (uses first property if not specified).
1411
- Set to False only if properties are already aligned.
1412
- merge_method : {'resample', 'concat'}, default 'resample'
1413
- Merge method to use when auto_resample is True:
1598
+ merge_method : {'match', 'resample', 'concat'}, default 'match'
1599
+ Method to align properties to reference depth grid:
1600
+ - 'match': Require exact depth match for continuous properties (errors if
1601
+ not aligned). Discrete properties are automatically resampled using
1602
+ interval logic. Safest option.
1414
1603
  - 'resample': Interpolate properties to reference depth grid
1415
1604
  - 'concat': Merge unique depths, fill NaN where missing
1416
1605
  discrete_labels : bool, default True
@@ -1431,17 +1620,21 @@ class Well:
1431
1620
  Raises
1432
1621
  ------
1433
1622
  WellError
1434
- If properties have different depth grids and auto_resample is False
1623
+ If properties have different depth grids and merge_method is 'match',
1624
+ or if merge requirements fail
1435
1625
  PropertyNotFoundError
1436
1626
  If reference_property or included properties are not found
1437
1627
 
1438
1628
  Examples
1439
1629
  --------
1440
- >>> # Export all properties (auto-resamples to first property's grid)
1630
+ >>> # Export all properties (errors if depths don't match exactly)
1441
1631
  >>> df = well.data()
1442
1632
 
1633
+ >>> # Export with interpolation if depths don't align
1634
+ >>> df = well.data(merge_method='resample')
1635
+
1443
1636
  >>> # Export with specific reference property
1444
- >>> df = well.data(reference_property='DEPT')
1637
+ >>> df = well.data(reference_property='PHIE')
1445
1638
 
1446
1639
  >>> # Include only specific properties
1447
1640
  >>> df = well.data(include=['PHIE', 'SW', 'PERM'])
@@ -1458,7 +1651,7 @@ class Well:
1458
1651
  >>> # Include with exclusions (exclude overrides)
1459
1652
  >>> df = well.data(include=['PHIE', 'SW', 'PERM', 'Zone'], exclude=['Zone'])
1460
1653
 
1461
- >>> # Use concat merge method instead of resample
1654
+ >>> # Use concat merge method to include all unique depths
1462
1655
  >>> df = well.data(merge_method='concat')
1463
1656
 
1464
1657
  >>> # Disable label mapping for discrete properties
@@ -1520,33 +1713,17 @@ class Well:
1520
1713
 
1521
1714
  depth = ref_prop.depth
1522
1715
 
1523
- # Get properties to export (either merged or original)
1524
- if auto_resample:
1525
- # Use merge method to get temporary merged properties
1526
- props_to_export = self._merge_properties(
1527
- method=merge_method,
1528
- properties=properties_filter,
1529
- depth_grid=depth,
1530
- source_name='temp_dataframe'
1531
- )
1532
- else:
1533
- # Use original properties (only unique ones)
1534
- if properties_filter is not None:
1535
- props_to_export = {
1536
- name: prop for name, (_, prop) in all_properties.items()
1537
- if name in properties_filter
1538
- }
1539
- else:
1540
- props_to_export = {name: prop for name, (_, prop) in all_properties.items()}
1541
-
1542
- # Verify all properties on same grid
1543
- for name, prop in props_to_export.items():
1544
- if not np.array_equal(prop.depth, depth):
1545
- raise WellError(
1546
- f"Cannot export to DataFrame: property '{name}' has different "
1547
- f"depth grid than reference '{ref_prop_name}'. Either set auto_resample=True or "
1548
- f"call well.merge() first to align all properties."
1549
- )
1716
+ # Always use merge method to align properties
1717
+ # The merge_method parameter controls the behavior:
1718
+ # - 'match': errors if grids don't align (safe default)
1719
+ # - 'resample': interpolates to reference grid
1720
+ # - 'concat': merges all unique depths
1721
+ props_to_export = self._merge_properties(
1722
+ method=merge_method,
1723
+ properties=properties_filter,
1724
+ depth_grid=depth,
1725
+ source_name='temp_dataframe'
1726
+ )
1550
1727
 
1551
1728
  # Build DataFrame
1552
1729
  data = {'DEPT': depth}
@@ -1625,7 +1802,13 @@ class Well:
1625
1802
  Convert well properties to a LasFile object.
1626
1803
 
1627
1804
  This creates a LasFile object from the well's properties. If properties have
1628
- different depth grids, they will be automatically merged using the resample method.
1805
+ different depth grids, they will be automatically aligned using interpolation
1806
+ (resample method) to the first property's depth grid. This ensures the exported
1807
+ LAS file has a consistent depth grid.
1808
+
1809
+ Note: Unlike the data() method which defaults to 'match' for safety, this method
1810
+ always resamples to ensure LAS export succeeds. If you need strict depth alignment
1811
+ checking, use well.merge(method='match') before calling to_las().
1629
1812
 
1630
1813
  Parameters
1631
1814
  ----------
@@ -1667,6 +1850,10 @@ class Well:
1667
1850
  >>> las = well.to_las()
1668
1851
  >>> # ... modify via las.set_data(df) if needed ...
1669
1852
  >>> las.export('output.las')
1853
+
1854
+ >>> # If strict depth alignment is required, merge first
1855
+ >>> well.merge(method='match', sources=['source1', 'source2'])
1856
+ >>> las = well.to_las()
1670
1857
  """
1671
1858
  if not self._sources:
1672
1859
  raise WellError("No properties to export")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: well-log-toolkit
3
- Version: 0.1.118
3
+ Version: 0.1.120
4
4
  Summary: Fast LAS file processing with lazy loading and filtering for well log analysis
5
5
  Author-email: Kristian dF Kollsgård <kkollsg@gmail.com>
6
6
  License: MIT
@@ -1704,14 +1704,14 @@ stats = well.PHIE.filter('Zone').sums_avg()
1704
1704
 
1705
1705
  **To DataFrame:**
1706
1706
  ```python
1707
- # All properties
1707
+ # All properties (default: errors if depths don't match exactly)
1708
1708
  df = well.data()
1709
1709
 
1710
1710
  # Specific properties
1711
1711
  df = well.data(include=['PHIE', 'SW', 'PERM'])
1712
1712
 
1713
- # Auto-resample to common depth grid
1714
- df = well.data(auto_resample=True)
1713
+ # Interpolate to common depth grid if depths don't align
1714
+ df = well.data(merge_method='resample')
1715
1715
 
1716
1716
  # Use labels for discrete properties
1717
1717
  df = well.data(discrete_labels=True)