well-log-toolkit 0.1.117__tar.gz → 0.1.119__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/PKG-INFO +1 -1
- {well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/pyproject.toml +1 -1
- {well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/well_log_toolkit/regression.py +16 -6
- {well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/well_log_toolkit/visualization.py +163 -73
- {well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/well_log_toolkit.egg-info/PKG-INFO +1 -1
- {well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/README.md +0 -0
- {well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/setup.cfg +0 -0
- {well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/well_log_toolkit/__init__.py +0 -0
- {well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/well_log_toolkit/exceptions.py +0 -0
- {well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/well_log_toolkit/las_file.py +0 -0
- {well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/well_log_toolkit/manager.py +0 -0
- {well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/well_log_toolkit/operations.py +0 -0
- {well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/well_log_toolkit/property.py +0 -0
- {well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/well_log_toolkit/statistics.py +0 -0
- {well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/well_log_toolkit/utils.py +0 -0
- {well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/well_log_toolkit/well.py +0 -0
- {well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/well_log_toolkit.egg-info/SOURCES.txt +0 -0
- {well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/well_log_toolkit.egg-info/dependency_links.txt +0 -0
- {well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/well_log_toolkit.egg-info/requires.txt +0 -0
- {well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/well_log_toolkit.egg-info/top_level.txt +0 -0
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "well-log-toolkit"
|
|
7
|
-
version = "0.1.
|
|
7
|
+
version = "0.1.119"
|
|
8
8
|
description = "Fast LAS file processing with lazy loading and filtering for well log analysis"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.9"
|
|
@@ -70,13 +70,14 @@ class RegressionBase(ABC):
|
|
|
70
70
|
"""
|
|
71
71
|
return self.predict(x)
|
|
72
72
|
|
|
73
|
-
def _calculate_metrics(self, x: np.ndarray, y: np.ndarray, y_pred: np.ndarray) -> None:
|
|
73
|
+
def _calculate_metrics(self, x: np.ndarray, y: np.ndarray, y_pred: np.ndarray, use_log_space: bool = False) -> None:
|
|
74
74
|
"""Calculate R² and RMSE metrics.
|
|
75
75
|
|
|
76
76
|
Args:
|
|
77
77
|
x: Independent variable values
|
|
78
78
|
y: Actual dependent variable values
|
|
79
79
|
y_pred: Predicted dependent variable values
|
|
80
|
+
use_log_space: If True, calculate R² in log space (useful when y spans orders of magnitude)
|
|
80
81
|
"""
|
|
81
82
|
# Store original data
|
|
82
83
|
self.x_data = x
|
|
@@ -85,12 +86,21 @@ class RegressionBase(ABC):
|
|
|
85
86
|
# Store x-axis range
|
|
86
87
|
self.x_range = (float(np.min(x)), float(np.max(x)))
|
|
87
88
|
|
|
88
|
-
# R² calculation
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
89
|
+
# R² calculation - in log space if requested
|
|
90
|
+
if use_log_space and np.all(y > 0) and np.all(y_pred > 0):
|
|
91
|
+
# Calculate R² in log space for data spanning orders of magnitude
|
|
92
|
+
log_y = np.log10(y)
|
|
93
|
+
log_y_pred = np.log10(y_pred)
|
|
94
|
+
ss_res = np.sum((log_y - log_y_pred) ** 2)
|
|
95
|
+
ss_tot = np.sum((log_y - np.mean(log_y)) ** 2)
|
|
96
|
+
self.r_squared = 1 - (ss_res / ss_tot) if ss_tot != 0 else 0.0
|
|
97
|
+
else:
|
|
98
|
+
# Standard R² in linear space
|
|
99
|
+
ss_res = np.sum((y - y_pred) ** 2)
|
|
100
|
+
ss_tot = np.sum((y - np.mean(y)) ** 2)
|
|
101
|
+
self.r_squared = 1 - (ss_res / ss_tot) if ss_tot != 0 else 0.0
|
|
92
102
|
|
|
93
|
-
# RMSE calculation
|
|
103
|
+
# RMSE calculation (always in linear space)
|
|
94
104
|
self.rmse = np.sqrt(np.mean((y - y_pred) ** 2))
|
|
95
105
|
|
|
96
106
|
def _prepare_data(self, x: ArrayLike, y: ArrayLike) -> Tuple[np.ndarray, np.ndarray]:
|
|
@@ -3139,6 +3139,88 @@ class Crossplot:
|
|
|
3139
3139
|
self._regressions[reg_type] = {}
|
|
3140
3140
|
self._regressions[reg_type][identifier] = regression_obj
|
|
3141
3141
|
|
|
3142
|
+
def _get_group_colors(self, data: pd.DataFrame, group_column: str) -> dict:
|
|
3143
|
+
"""Get the color assigned to each group in the plot.
|
|
3144
|
+
|
|
3145
|
+
Args:
|
|
3146
|
+
data: DataFrame with plotting data
|
|
3147
|
+
group_column: Column name used for grouping
|
|
3148
|
+
|
|
3149
|
+
Returns:
|
|
3150
|
+
Dictionary mapping group names to their colors
|
|
3151
|
+
"""
|
|
3152
|
+
group_colors = {}
|
|
3153
|
+
|
|
3154
|
+
# Get unique groups in the same order as they'll appear in the plot
|
|
3155
|
+
groups = data.groupby(group_column)
|
|
3156
|
+
|
|
3157
|
+
for idx, (group_name, _) in enumerate(groups):
|
|
3158
|
+
# Use the same color assignment logic as _plot_by_groups
|
|
3159
|
+
group_colors[group_name] = DEFAULT_COLORS[idx % len(DEFAULT_COLORS)]
|
|
3160
|
+
|
|
3161
|
+
return group_colors
|
|
3162
|
+
|
|
3163
|
+
def _find_best_legend_locations(self, data: pd.DataFrame) -> tuple[str, str]:
|
|
3164
|
+
"""Find the two best locations for legends based on data density.
|
|
3165
|
+
|
|
3166
|
+
Divides the plot into a 3x3 grid and finds the two squares with the least data points.
|
|
3167
|
+
|
|
3168
|
+
Args:
|
|
3169
|
+
data: DataFrame with 'x' and 'y' columns
|
|
3170
|
+
|
|
3171
|
+
Returns:
|
|
3172
|
+
Tuple of (primary_location, secondary_location) as matplotlib location strings
|
|
3173
|
+
"""
|
|
3174
|
+
# Get x and y bounds
|
|
3175
|
+
x_vals = data['x'].values
|
|
3176
|
+
y_vals = data['y'].values
|
|
3177
|
+
|
|
3178
|
+
# Handle log scales for binning
|
|
3179
|
+
if self.x_log:
|
|
3180
|
+
x_vals = np.log10(x_vals[x_vals > 0])
|
|
3181
|
+
if self.y_log:
|
|
3182
|
+
y_vals = np.log10(y_vals[y_vals > 0])
|
|
3183
|
+
|
|
3184
|
+
x_min, x_max = np.nanmin(x_vals), np.nanmax(x_vals)
|
|
3185
|
+
y_min, y_max = np.nanmin(y_vals), np.nanmax(y_vals)
|
|
3186
|
+
|
|
3187
|
+
# Create 3x3 grid and count points in each square
|
|
3188
|
+
x_bins = np.linspace(x_min, x_max, 4)
|
|
3189
|
+
y_bins = np.linspace(y_min, y_max, 4)
|
|
3190
|
+
|
|
3191
|
+
# Count points in each of 9 squares
|
|
3192
|
+
counts = {}
|
|
3193
|
+
for i in range(3):
|
|
3194
|
+
for j in range(3):
|
|
3195
|
+
x_mask = (x_vals >= x_bins[i]) & (x_vals < x_bins[i+1])
|
|
3196
|
+
y_mask = (y_vals >= y_bins[j]) & (y_vals < y_bins[j+1])
|
|
3197
|
+
counts[(i, j)] = np.sum(x_mask & y_mask)
|
|
3198
|
+
|
|
3199
|
+
# Map grid positions to matplotlib location strings
|
|
3200
|
+
# Grid: (0,2) (1,2) (2,2) -> upper left, upper center, upper right
|
|
3201
|
+
# (0,1) (1,1) (2,1) -> center left, center, center right
|
|
3202
|
+
# (0,0) (1,0) (2,0) -> lower left, lower center, lower right
|
|
3203
|
+
position_map = {
|
|
3204
|
+
(0, 2): 'upper left',
|
|
3205
|
+
(1, 2): 'upper center',
|
|
3206
|
+
(2, 2): 'upper right',
|
|
3207
|
+
(0, 1): 'center left',
|
|
3208
|
+
(1, 1): 'center',
|
|
3209
|
+
(2, 1): 'center right',
|
|
3210
|
+
(0, 0): 'lower left',
|
|
3211
|
+
(1, 0): 'lower center',
|
|
3212
|
+
(2, 0): 'lower right',
|
|
3213
|
+
}
|
|
3214
|
+
|
|
3215
|
+
# Sort squares by count (ascending)
|
|
3216
|
+
sorted_squares = sorted(counts.items(), key=lambda x: x[1])
|
|
3217
|
+
|
|
3218
|
+
# Get two best locations
|
|
3219
|
+
best_pos = position_map[sorted_squares[0][0]]
|
|
3220
|
+
second_best_pos = position_map[sorted_squares[1][0]]
|
|
3221
|
+
|
|
3222
|
+
return best_pos, second_best_pos
|
|
3223
|
+
|
|
3142
3224
|
def _format_regression_label(self, name: str, reg, include_equation: bool = None, include_r2: bool = None) -> str:
|
|
3143
3225
|
"""Format a modern, compact regression label.
|
|
3144
3226
|
|
|
@@ -3156,26 +3238,25 @@ class Crossplot:
|
|
|
3156
3238
|
if include_r2 is None:
|
|
3157
3239
|
include_r2 = self.show_regression_r2
|
|
3158
3240
|
|
|
3159
|
-
#
|
|
3160
|
-
|
|
3161
|
-
|
|
3162
|
-
# Add equation and R² on same line if both shown, more compact
|
|
3163
|
-
metrics = []
|
|
3241
|
+
# Format: "Name (equation)" with R² on second line
|
|
3242
|
+
# Equation and R² will be colored grey in the legend update method
|
|
3243
|
+
first_line = name
|
|
3164
3244
|
if include_equation:
|
|
3165
3245
|
eq = reg.equation()
|
|
3166
|
-
#
|
|
3167
|
-
|
|
3168
|
-
|
|
3246
|
+
eq = eq.replace(' ', '') # Remove spaces for compactness
|
|
3247
|
+
# Add equation in parentheses (will be styled grey later)
|
|
3248
|
+
first_line = f"{name} ({eq})"
|
|
3169
3249
|
|
|
3250
|
+
# Add R² on second line if requested (will be styled grey later)
|
|
3170
3251
|
if include_r2:
|
|
3171
|
-
#
|
|
3172
|
-
|
|
3173
|
-
|
|
3174
|
-
|
|
3175
|
-
|
|
3176
|
-
|
|
3177
|
-
|
|
3178
|
-
|
|
3252
|
+
# Format R² with appropriate note
|
|
3253
|
+
if self.y_log:
|
|
3254
|
+
r2_label = f"R² = {reg.r_squared:.3f} (log)"
|
|
3255
|
+
else:
|
|
3256
|
+
r2_label = f"R² = {reg.r_squared:.3f}"
|
|
3257
|
+
return f"{first_line}\n{r2_label}"
|
|
3258
|
+
else:
|
|
3259
|
+
return first_line
|
|
3179
3260
|
|
|
3180
3261
|
def _update_regression_legend(self) -> None:
|
|
3181
3262
|
"""Create or update the separate regression legend with smart placement."""
|
|
@@ -3199,61 +3280,43 @@ class Crossplot:
|
|
|
3199
3280
|
regression_labels.append(line.get_label())
|
|
3200
3281
|
|
|
3201
3282
|
if regression_handles:
|
|
3202
|
-
#
|
|
3203
|
-
|
|
3204
|
-
|
|
3205
|
-
|
|
3206
|
-
|
|
3207
|
-
'lower
|
|
3208
|
-
|
|
3209
|
-
|
|
3210
|
-
|
|
3211
|
-
|
|
3212
|
-
#
|
|
3213
|
-
|
|
3214
|
-
|
|
3215
|
-
|
|
3216
|
-
|
|
3217
|
-
|
|
3218
|
-
|
|
3219
|
-
|
|
3220
|
-
|
|
3221
|
-
|
|
3222
|
-
|
|
3223
|
-
|
|
3224
|
-
|
|
3225
|
-
|
|
3226
|
-
|
|
3227
|
-
loc=locations[0], # Try first preference
|
|
3228
|
-
frameon=True,
|
|
3229
|
-
framealpha=0.95,
|
|
3230
|
-
edgecolor='#cccccc',
|
|
3231
|
-
fancybox=False,
|
|
3232
|
-
shadow=False,
|
|
3233
|
-
fontsize=9,
|
|
3234
|
-
title='Regressions',
|
|
3235
|
-
title_fontsize=10
|
|
3236
|
-
)
|
|
3237
|
-
except Exception:
|
|
3238
|
-
# Fallback to 'best' if specific location fails
|
|
3239
|
-
self.regression_legend = self.ax.legend(
|
|
3240
|
-
regression_handles,
|
|
3241
|
-
regression_labels,
|
|
3242
|
-
loc='best',
|
|
3243
|
-
frameon=True,
|
|
3244
|
-
framealpha=0.95,
|
|
3245
|
-
edgecolor='#cccccc',
|
|
3246
|
-
fancybox=False,
|
|
3247
|
-
shadow=False,
|
|
3248
|
-
fontsize=9,
|
|
3249
|
-
title='Regressions',
|
|
3250
|
-
title_fontsize=10
|
|
3251
|
-
)
|
|
3283
|
+
# Get smart placement based on data density
|
|
3284
|
+
if self._data is not None:
|
|
3285
|
+
_, secondary_loc = self._find_best_legend_locations(self._data)
|
|
3286
|
+
else:
|
|
3287
|
+
# Fallback if data not available
|
|
3288
|
+
secondary_loc = 'lower right'
|
|
3289
|
+
|
|
3290
|
+
# Import legend from matplotlib
|
|
3291
|
+
from matplotlib.legend import Legend
|
|
3292
|
+
|
|
3293
|
+
# Create regression legend at secondary location
|
|
3294
|
+
self.regression_legend = Legend(
|
|
3295
|
+
self.ax,
|
|
3296
|
+
regression_handles,
|
|
3297
|
+
regression_labels,
|
|
3298
|
+
loc=secondary_loc,
|
|
3299
|
+
frameon=True,
|
|
3300
|
+
framealpha=0.95,
|
|
3301
|
+
edgecolor='#cccccc',
|
|
3302
|
+
fancybox=False,
|
|
3303
|
+
shadow=False,
|
|
3304
|
+
fontsize=9,
|
|
3305
|
+
title='Regressions',
|
|
3306
|
+
title_fontsize=10
|
|
3307
|
+
)
|
|
3252
3308
|
|
|
3253
|
-
# Modern styling
|
|
3309
|
+
# Modern styling with grey text for equation and R²
|
|
3254
3310
|
self.regression_legend.get_frame().set_linewidth(0.8)
|
|
3255
3311
|
self.regression_legend.get_title().set_fontweight('600')
|
|
3256
3312
|
|
|
3313
|
+
# Set text color to grey for all labels
|
|
3314
|
+
for text in self.regression_legend.get_texts():
|
|
3315
|
+
text.set_color('#555555')
|
|
3316
|
+
|
|
3317
|
+
# Add as artist to avoid replacing the primary legend
|
|
3318
|
+
self.ax.add_artist(self.regression_legend)
|
|
3319
|
+
|
|
3257
3320
|
def _add_automatic_regressions(self, data: pd.DataFrame) -> None:
|
|
3258
3321
|
"""Add automatic regressions based on initialization parameters."""
|
|
3259
3322
|
if not any([self.regression, self.regression_by_color, self.regression_by_group]):
|
|
@@ -3334,15 +3397,19 @@ class Crossplot:
|
|
|
3334
3397
|
f"Reduce the number of groups or use a different regression strategy."
|
|
3335
3398
|
)
|
|
3336
3399
|
|
|
3400
|
+
# Get the actual colors used for each group in the plot
|
|
3401
|
+
group_colors_map = self._get_group_colors(data, group_column)
|
|
3402
|
+
|
|
3337
3403
|
for idx, (group_name, group_data) in enumerate(color_groups):
|
|
3338
3404
|
x_vals = group_data['x'].values
|
|
3339
3405
|
y_vals = group_data['y'].values
|
|
3340
3406
|
mask = np.isfinite(x_vals) & np.isfinite(y_vals)
|
|
3341
3407
|
if np.sum(mask) >= 2:
|
|
3342
|
-
# Copy config and
|
|
3408
|
+
# Copy config and use the same color as the data group
|
|
3343
3409
|
group_config = config.copy()
|
|
3344
3410
|
if 'line_color' not in group_config:
|
|
3345
|
-
|
|
3411
|
+
# Use the same color as the data points for this group
|
|
3412
|
+
group_config['line_color'] = group_colors_map.get(group_name, regression_colors[color_idx % len(regression_colors)])
|
|
3346
3413
|
|
|
3347
3414
|
# Skip legend update for all but last regression
|
|
3348
3415
|
is_last = (idx == n_groups - 1)
|
|
@@ -3375,15 +3442,19 @@ class Crossplot:
|
|
|
3375
3442
|
f"Reduce the number of groups or use a different regression strategy."
|
|
3376
3443
|
)
|
|
3377
3444
|
|
|
3445
|
+
# Get the actual colors used for each group in the plot
|
|
3446
|
+
group_colors_map = self._get_group_colors(data, group_col)
|
|
3447
|
+
|
|
3378
3448
|
for idx, (group_name, group_data) in enumerate(groups):
|
|
3379
3449
|
x_vals = group_data['x'].values
|
|
3380
3450
|
y_vals = group_data['y'].values
|
|
3381
3451
|
mask = np.isfinite(x_vals) & np.isfinite(y_vals)
|
|
3382
3452
|
if np.sum(mask) >= 2:
|
|
3383
|
-
# Copy config and
|
|
3453
|
+
# Copy config and use the same color as the data group
|
|
3384
3454
|
group_config = config.copy()
|
|
3385
3455
|
if 'line_color' not in group_config:
|
|
3386
|
-
|
|
3456
|
+
# Use the same color as the data points for this group
|
|
3457
|
+
group_config['line_color'] = group_colors_map.get(group_name, regression_colors[color_idx % len(regression_colors)])
|
|
3387
3458
|
|
|
3388
3459
|
# Skip legend update for all but last regression
|
|
3389
3460
|
is_last = (idx == n_groups - 1)
|
|
@@ -3430,6 +3501,11 @@ class Crossplot:
|
|
|
3430
3501
|
warnings.warn(f"Failed to fit {regression_type} regression for {name}: {e}")
|
|
3431
3502
|
return
|
|
3432
3503
|
|
|
3504
|
+
# Recalculate R² in log space if y-axis is log scale
|
|
3505
|
+
if self.y_log:
|
|
3506
|
+
y_pred = reg.predict(x_vals)
|
|
3507
|
+
reg._calculate_metrics(x_vals, y_vals, y_pred, use_log_space=True)
|
|
3508
|
+
|
|
3433
3509
|
# Store regression in nested structure
|
|
3434
3510
|
self._store_regression(regression_type, name, reg)
|
|
3435
3511
|
|
|
@@ -3672,17 +3748,26 @@ class Crossplot:
|
|
|
3672
3748
|
if first_scatter is None and self.color:
|
|
3673
3749
|
first_scatter = scatter
|
|
3674
3750
|
|
|
3675
|
-
# Add legend
|
|
3751
|
+
# Add legend with smart placement
|
|
3676
3752
|
if self.show_legend:
|
|
3753
|
+
# Get best location based on data density
|
|
3754
|
+
if self._data is not None:
|
|
3755
|
+
primary_loc, _ = self._find_best_legend_locations(self._data)
|
|
3756
|
+
else:
|
|
3757
|
+
primary_loc = 'best'
|
|
3758
|
+
|
|
3677
3759
|
legend = self.ax.legend(
|
|
3678
3760
|
title=group_label,
|
|
3679
|
-
loc=
|
|
3761
|
+
loc=primary_loc,
|
|
3680
3762
|
frameon=True,
|
|
3681
3763
|
framealpha=0.9,
|
|
3682
3764
|
edgecolor='black'
|
|
3683
3765
|
)
|
|
3684
3766
|
legend.get_title().set_fontweight('bold')
|
|
3685
3767
|
|
|
3768
|
+
# Store the primary legend so it persists when regression legend is added
|
|
3769
|
+
self.ax.add_artist(legend)
|
|
3770
|
+
|
|
3686
3771
|
# Add colorbar if using color mapping
|
|
3687
3772
|
if self.color and self.show_colorbar and first_scatter:
|
|
3688
3773
|
self.colorbar = self.fig.colorbar(first_scatter, ax=self.ax)
|
|
@@ -3764,6 +3849,11 @@ class Crossplot:
|
|
|
3764
3849
|
except ValueError as e:
|
|
3765
3850
|
raise ValueError(f"Failed to fit {regression_type} regression: {e}")
|
|
3766
3851
|
|
|
3852
|
+
# Recalculate R² in log space if y-axis is log scale
|
|
3853
|
+
if self.y_log:
|
|
3854
|
+
y_pred = reg.predict(x_clean)
|
|
3855
|
+
reg._calculate_metrics(x_clean, y_clean, y_pred, use_log_space=True)
|
|
3856
|
+
|
|
3767
3857
|
# Store regression in nested structure
|
|
3768
3858
|
reg_name = name if name else regression_type
|
|
3769
3859
|
self._store_regression(regression_type, reg_name, reg)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/well_log_toolkit.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
{well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/well_log_toolkit.egg-info/requires.txt
RENAMED
|
File without changes
|
{well_log_toolkit-0.1.117 → well_log_toolkit-0.1.119}/well_log_toolkit.egg-info/top_level.txt
RENAMED
|
File without changes
|