subsurface-terra 2025.1.0rc14__py3-none-any.whl → 2025.1.0rc15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
subsurface/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '2025.1.0rc14'
21
- __version_tuple__ = version_tuple = (2025, 1, 0, 'rc14')
20
+ __version__ = version = '2025.1.0rc15'
21
+ __version_tuple__ = version_tuple = (2025, 1, 0, 'rc15')
@@ -0,0 +1,234 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ import xarray as xr
4
+ from scipy.interpolate import interp1d
5
+ from typing import Tuple, Optional, Union, List, Any
6
+
7
+ from ...structs.base_structures import UnstructuredData
8
+ from ...structs.base_structures._unstructured_data_constructor import raw_attributes_to_dict_data_arrays
9
+ from ...structs.unstructured_elements import LineSet
10
+
11
+
12
+ def combine_survey_and_attrs(attrs: pd.DataFrame, survey_trajectory: LineSet,well_id_mapper: dict[str, int]) -> UnstructuredData:
13
+ # Import moved to top for clarity and possibly avoiding repeated imports if called multiple times
14
+
15
+ # Ensure all columns in lith exist in new_attrs, if not, add them as NaN
16
+ new_attrs = _map_attrs_to_measured_depths(attrs, survey_trajectory, well_id_mapper)
17
+
18
+ # Construct the final xarray dict without intermediate variable
19
+ points_attributes_xarray_dict: dict[str, xr.DataArray] = raw_attributes_to_dict_data_arrays(
20
+ default_attributes_name="vertex_attrs",
21
+ n_items=survey_trajectory.data.data["vertex_attrs"].shape[0], # TODO: Can I look this on new_attrs to remove line 11?
22
+ dims=["points", "vertex_attr"],
23
+ raw_attributes=new_attrs
24
+ )
25
+
26
+ # Inline construction of UnstructuredData
27
+ return UnstructuredData.from_data_arrays_dict(
28
+ xarray_dict={
29
+ "vertex" : survey_trajectory.data.data["vertex"],
30
+ "cells" : survey_trajectory.data.data["cells"],
31
+ "vertex_attrs": points_attributes_xarray_dict["vertex_attrs"],
32
+ "cell_attrs" : survey_trajectory.data.data["cell_attrs"]
33
+ },
34
+ xarray_attributes=survey_trajectory.data.data.attrs,
35
+ default_cells_attributes_name=survey_trajectory.data.cells_attr_name,
36
+ default_points_attributes_name=survey_trajectory.data.vertex_attr_name
37
+ )
38
+
39
+ def _prepare_categorical_data(attrs: pd.DataFrame) -> pd.DataFrame:
40
+ """
41
+ Prepare categorical data for interpolation by converting categorical columns to numeric IDs.
42
+
43
+ Args:
44
+ attrs: DataFrame containing attribute data
45
+
46
+ Returns:
47
+ Modified DataFrame with categorical data prepared for interpolation
48
+ """
49
+ # Create a copy to avoid modifying the original
50
+ attrs_copy = attrs.copy()
51
+
52
+ # If component lith exists but lith_ids doesn't, create lith_ids
53
+ if 'component lith' in attrs_copy.columns and 'lith_ids' not in attrs_copy.columns:
54
+ attrs_copy['lith_ids'], _ = pd.factorize(attrs_copy['component lith'], use_na_sentinel=True)
55
+
56
+ return attrs_copy
57
+
58
+
59
+ def _prepare_new_attributes(attrs: pd.DataFrame, survey_trajectory: LineSet) -> pd.DataFrame:
60
+ """
61
+ Prepare the new attributes DataFrame by adding missing columns from attrs.
62
+
63
+ Args:
64
+ attrs: DataFrame containing attribute data
65
+ survey_trajectory: LineSet containing trajectory data
66
+
67
+ Returns:
68
+ New attributes DataFrame with all necessary columns
69
+ """
70
+ # Start with a copy of the existing attributes DataFrame
71
+ new_attrs = survey_trajectory.data.points_attributes.copy()
72
+
73
+ # Add missing columns from attrs, preserving their dtypes
74
+ for col in attrs.columns.difference(new_attrs.columns):
75
+ new_attrs[col] = np.nan if pd.api.types.is_numeric_dtype(attrs[col]) else None
76
+
77
+ return new_attrs
78
+
79
+
80
+ def _get_interpolation_locations(attrs_well: pd.DataFrame, well_name: str) -> np.ndarray:
81
+ """
82
+ Determine the locations to use for interpolation based on top and base values.
83
+
84
+ Args:
85
+ attrs_well: DataFrame containing well attribute data
86
+ well_name: Name of the current well
87
+
88
+ Returns:
89
+ Array of location values to use for interpolation
90
+ """
91
+ if "base" not in attrs_well.columns:
92
+ raise ValueError(f"Base column must be present in the file for well '{well_name}'.")
93
+ elif "top" not in attrs_well.columns:
94
+ return attrs_well['base'].values
95
+ else:
96
+ return ((attrs_well['top'] + attrs_well['base']) / 2).values
97
+
98
+
99
+ def _nearest_neighbor_categorical_interpolation(
100
+ x_locations: np.ndarray,
101
+ y_values: np.ndarray,
102
+ target_depths: np.ndarray
103
+ ) -> np.ndarray:
104
+ """
105
+ Custom nearest neighbor interpolation for categorical data.
106
+
107
+ This function finds the nearest source point for each target point
108
+ and assigns the corresponding categorical value.
109
+
110
+ Args:
111
+ x_locations: Array of source locations
112
+ y_values: Array of categorical values at source locations
113
+ target_depths: Array of target depths for interpolation
114
+
115
+ Returns:
116
+ Array of interpolated categorical values
117
+ """
118
+ # Initialize output array with NaN or None values
119
+ result = np.full(target_depths.shape, np.nan, dtype=object)
120
+
121
+ # For each target depth, find the nearest source location
122
+ for i, depth in enumerate(target_depths):
123
+ # Calculate distances to all source locations
124
+ distances = np.abs(x_locations - depth)
125
+
126
+ # Find the index of the minimum distance
127
+ if len(distances) > 0:
128
+ nearest_idx = np.argmin(distances)
129
+ result[i] = y_values[nearest_idx]
130
+
131
+ return result
132
+
133
+
134
+ def _interpolate_attribute(
135
+ attr_values: pd.Series,
136
+ x_locations: np.ndarray,
137
+ target_depths: np.ndarray,
138
+ column_name: str,
139
+ is_categorical: bool
140
+ ) -> np.ndarray:
141
+ """
142
+ Interpolate attribute values to target depths.
143
+
144
+ Args:
145
+ attr_values: Series containing attribute values
146
+ x_locations: Array of source locations for interpolation
147
+ target_depths: Array of target depths for interpolation
148
+ column_name: Name of the column being interpolated
149
+ is_categorical: Whether the attribute is categorical
150
+
151
+ Returns:
152
+ Array of interpolated values
153
+ """
154
+ # For categorical data or specific columns, use custom nearest neighbor interpolation
155
+ if is_categorical or column_name in ['lith_ids', 'component lith']:
156
+ return _nearest_neighbor_categorical_interpolation(
157
+ x_locations=x_locations,
158
+ y_values=attr_values.values,
159
+ target_depths=target_depths
160
+ )
161
+ else:
162
+ # For numerical data, use scipy's interp1d with linear interpolation
163
+ interp_func = interp1d(
164
+ x=x_locations,
165
+ y=attr_values.values,
166
+ bounds_error=False,
167
+ fill_value=np.nan,
168
+ kind='linear'
169
+ )
170
+ return interp_func(target_depths)
171
+
172
+
173
+ def _map_attrs_to_measured_depths(attrs: pd.DataFrame, survey_trajectory: LineSet, well_id_mapper: dict[str, int]) -> pd.DataFrame:
174
+ """
175
+ Map attributes to measured depths for each well.
176
+
177
+ Args:
178
+ attrs: DataFrame containing attribute data
179
+ survey_trajectory: LineSet containing trajectory data
180
+ well_id_mapper: Dictionary mapping well names to IDs
181
+
182
+ Returns:
183
+ DataFrame with attributes mapped to measured depths
184
+ """
185
+ # Extract trajectory data
186
+ trajectory: xr.DataArray = survey_trajectory.data.data["vertex_attrs"]
187
+ trajectory_well_id: xr.DataArray = trajectory.sel({'vertex_attr': 'well_id'})
188
+ measured_depths: np.ndarray = trajectory.sel({'vertex_attr': 'measured_depths'}).values.astype(np.float64)
189
+
190
+ # Prepare data
191
+ attrs: pd.DataFrame = _prepare_categorical_data(attrs)
192
+ new_attrs: pd.DataFrame = _prepare_new_attributes(attrs, survey_trajectory)
193
+
194
+ # Process each well
195
+ for well_name in well_id_mapper:
196
+ # Skip wells not in the attributes DataFrame
197
+ if well_name not in attrs.index:
198
+ print(f"Well '{well_name}' does not exist in the attributes DataFrame.")
199
+ continue
200
+
201
+ # Get well data
202
+ attrs_well = attrs.loc[[well_name]]
203
+ well_id = well_id_mapper.get(well_name)
204
+ well_mask = (trajectory_well_id == well_id).values
205
+ well_depths = measured_depths[well_mask]
206
+
207
+ # Get interpolation locations
208
+ interp_locations = _get_interpolation_locations(attrs_well, well_name)
209
+
210
+ # Interpolate each attribute
211
+ for col in attrs_well.columns:
212
+ # Skip location and ID columns
213
+ if col in ['top', 'base', 'well_id']:
214
+ continue
215
+
216
+ attr_values = attrs_well[col]
217
+ is_categorical = attr_values.dtype == 'O' or isinstance(attr_values.dtype, pd.CategoricalDtype)
218
+
219
+ # Skip columns that can't be interpolated and aren't categorical
220
+ if is_categorical and col not in ['lith_ids', 'component lith']:
221
+ continue
222
+
223
+ # Interpolate and assign values
224
+ interpolated_values = _interpolate_attribute(
225
+ attr_values,
226
+ interp_locations,
227
+ well_depths,
228
+ col,
229
+ is_categorical
230
+ )
231
+
232
+ new_attrs.loc[well_mask, col] = interpolated_values
233
+
234
+ return new_attrs
@@ -0,0 +1,163 @@
1
+ from typing import Hashable, Optional
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+
6
+ from subsurface import optional_requirements
7
+ from ...structs.base_structures import UnstructuredData
8
+
9
+
10
+ def data_frame_to_unstructured_data(survey_df: 'pd.DataFrame', number_nodes: int, attr_df: Optional['pd.DataFrame'] = None,
11
+ duplicate_attr_depths: bool = False) -> UnstructuredData:
12
+ wp = optional_requirements.require_wellpathpy()
13
+
14
+ cum_vertex: np.ndarray = np.empty((0, 3), dtype=np.float32)
15
+ cells: np.ndarray = np.empty((0, 2), dtype=np.int_)
16
+ cell_attr: pd.DataFrame = pd.DataFrame(columns=['well_id'], dtype=np.float32)
17
+ vertex_attr: pd.DataFrame = pd.DataFrame()
18
+
19
+ for e, (borehole_id, data) in enumerate(survey_df.groupby(level=0)):
20
+ dev = wp.deviation(
21
+ md=data['md'].values,
22
+ inc=data['inc'].values,
23
+ azi=data['azi'].values
24
+ )
25
+
26
+ md_min = dev.md.min()
27
+ md_max = dev.md.max()
28
+
29
+ attr_depths = _grab_depths_from_attr(
30
+ attr_df=attr_df,
31
+ borehole_id=borehole_id,
32
+ duplicate_attr_depths=duplicate_attr_depths,
33
+ md_max=md_max,
34
+ md_min=md_min
35
+ )
36
+
37
+ # Now combine attr_depths with depths
38
+ md_min = dev.md.min()
39
+ md_max = dev.md.max()
40
+ depths = np.linspace(md_min, md_max, number_nodes)
41
+ depths = np.union1d(depths, attr_depths)
42
+ depths.sort()
43
+
44
+ # Resample positions at depths
45
+ pos = dev.minimum_curvature().resample(depths=depths)
46
+ vertex_count = cum_vertex.shape[0]
47
+
48
+ this_well_vertex = np.vstack([pos.easting, pos.northing, pos.depth]).T
49
+ cum_vertex = np.vstack([cum_vertex, this_well_vertex])
50
+ measured_depths = _calculate_distances(array_of_vertices=this_well_vertex)
51
+
52
+ n_vertex_shift_0 = np.arange(0, len(pos.depth) - 1, dtype=np.int_)
53
+ n_vertex_shift_1 = np.arange(1, len(pos.depth), dtype=np.int_)
54
+ cell_per_well = np.vstack([n_vertex_shift_0, n_vertex_shift_1]).T + vertex_count
55
+ cells = np.vstack([cells, cell_per_well])
56
+
57
+ attribute_values = np.isin(depths, attr_depths)
58
+
59
+ vertex_attr_per_well = pd.DataFrame({
60
+ 'well_id' : [e] * len(pos.depth),
61
+ 'measured_depths': measured_depths,
62
+ 'is_attr_point' : attribute_values,
63
+ })
64
+
65
+ vertex_attr = pd.concat([vertex_attr, vertex_attr_per_well], ignore_index=True)
66
+
67
+ # Add the id (e), to cell_attr
68
+ cell_attr = pd.concat([cell_attr, pd.DataFrame({'well_id': [e] * len(cell_per_well)})], ignore_index=True)
69
+
70
+ unstruct = UnstructuredData.from_array(
71
+ vertex=cum_vertex,
72
+ cells=cells.astype(int),
73
+ vertex_attr=vertex_attr.reset_index(drop=True),
74
+ cells_attr=cell_attr.reset_index(drop=True)
75
+ )
76
+
77
+ unstruct.data.attrs["well_id_mapper"] = {well_id: e for e, well_id in enumerate(survey_df.index.unique(level=0))}
78
+
79
+ return unstruct
80
+
81
+
82
+ def _grab_depths_from_attr(
83
+ attr_df: pd.DataFrame,
84
+ borehole_id: Hashable,
85
+ duplicate_attr_depths: bool,
86
+ md_max: float,
87
+ md_min: float
88
+ ) -> np.ndarray:
89
+ # Initialize attr_depths and attr_labels as empty arrays
90
+ attr_depths = np.array([], dtype=float)
91
+ attr_labels = np.array([], dtype='<U4') # Initialize labels for 'top' and 'base'
92
+
93
+ if attr_df is None or ("top" not in attr_df.columns and "base" not in attr_df.columns):
94
+ return attr_depths
95
+
96
+ try:
97
+ vals = attr_df.loc[borehole_id]
98
+
99
+ tops = np.array([], dtype=float)
100
+ bases = np.array([], dtype=float)
101
+
102
+ if 'top' in vals:
103
+ if isinstance(vals, pd.DataFrame):
104
+ tops = vals['top'].values.flatten()
105
+ else:
106
+ tops = np.array([vals['top']])
107
+ # Convert to float and remove NaNs
108
+ tops = tops.astype(float)
109
+ tops = tops[~np.isnan(tops)]
110
+ # Clip to within md range
111
+ tops = tops[(tops >= md_min) & (tops <= md_max)]
112
+
113
+ if 'base' in vals:
114
+ if isinstance(vals, pd.DataFrame):
115
+ bases = vals['base'].values.flatten()
116
+ else:
117
+ bases = np.array([vals['base']])
118
+ # Convert to float and remove NaNs
119
+ bases = bases.astype(float)
120
+ bases = bases[~np.isnan(bases)]
121
+ # Clip to within md range
122
+ bases = bases[(bases >= md_min) & (bases <= md_max)]
123
+
124
+ # Combine tops and bases into attr_depths with labels
125
+ attr_depths = np.concatenate((tops, bases))
126
+ attr_labels = np.array(['top'] * len(tops) + ['base'] * len(bases))
127
+
128
+ # Drop duplicates while preserving order
129
+ _, unique_indices = np.unique(attr_depths, return_index=True)
130
+ attr_depths = attr_depths[unique_indices]
131
+ attr_labels = attr_labels[unique_indices]
132
+
133
+ except KeyError:
134
+ # No attributes for this borehole_id or missing columns
135
+ attr_depths = np.array([], dtype=float)
136
+ attr_labels = np.array([], dtype='<U4')
137
+
138
+ # If duplicate_attr_depths is True, duplicate attr_depths with a tiny offset
139
+ if duplicate_attr_depths and len(attr_depths) > 0:
140
+ tiny_offset = (md_max - md_min) * 1e-6 # A tiny fraction of the depth range
141
+ # Create offsets: +tiny_offset for 'top', -tiny_offset for 'base'
142
+ offsets = np.where(attr_labels == 'top', tiny_offset, -tiny_offset)
143
+ duplicated_attr_depths = attr_depths + offsets
144
+ # Ensure the duplicated depths are within the md range
145
+ valid_indices = (duplicated_attr_depths >= md_min) & (duplicated_attr_depths <= md_max)
146
+ duplicated_attr_depths = duplicated_attr_depths[valid_indices]
147
+ # Original attribute depths
148
+ original_attr_depths = attr_depths
149
+ # Combine originals and duplicates
150
+ attr_depths = np.hstack([original_attr_depths, duplicated_attr_depths])
151
+
152
+ return attr_depths
153
+
154
+
155
+ def _calculate_distances(array_of_vertices: np.ndarray) -> np.ndarray:
156
+ # Calculate the differences between consecutive points
157
+ differences = np.diff(array_of_vertices, axis=0)
158
+
159
+ # Calculate the Euclidean distance for each pair of consecutive points
160
+ distances = np.linalg.norm(differences, axis=1)
161
+ # Insert a 0 at the beginning to represent the starting point at the surface
162
+ measured_depths = np.insert(np.cumsum(distances), 0, 0)
163
+ return measured_depths
@@ -1,7 +1,7 @@
1
1
  import numpy as np
2
2
  import pandas as pd
3
3
  from dataclasses import dataclass
4
- from typing import Hashable
4
+ from typing import Hashable, Literal
5
5
 
6
6
  from ._combine_trajectories import create_combined_trajectory, MergeOptions
7
7
  from .collars import Collars
@@ -69,10 +69,10 @@ class BoreholeSet:
69
69
  # I need to implement the survey to and then name the files accordingly
70
70
  bytearray_le_collars: bytes = self.collars.data.to_binary()
71
71
  bytearray_le_trajectory: bytes = self.combined_trajectory.data.to_binary()
72
-
72
+
73
73
  new_file = open(f"{path}_collars.le", "wb")
74
74
  new_file.write(bytearray_le_collars)
75
-
75
+
76
76
  new_file = open(f"{path}_trajectory.le", "wb")
77
77
  new_file.write(bytearray_le_trajectory)
78
78
  return True
@@ -88,13 +88,37 @@ class BoreholeSet:
88
88
 
89
89
  return component_lith_arrays
90
90
 
91
- def get_bottom_coords_for_each_lith(self) -> dict[Hashable, np.ndarray]:
91
+ def get_bottom_coords_for_each_lith(self, group_by: Literal['component lith', 'lith_ids'] = 'lith_ids') -> dict[Hashable, np.ndarray]:
92
+ """
93
+ Retrieves the bottom coordinates for each lithological component or lith ID from
94
+ the merged vertex data arrays.
95
+
96
+ This function groups the merged data by either 'component lith' or 'lith_ids',
97
+ then extracts the coordinates of the bottommost vertices for each well. It
98
+ returns a dictionary where keys are either lithological component identifiers
99
+ or lith IDs, and values are arrays of 3D coordinates representing the bottom
100
+ vertices.
101
+
102
+ Args:
103
+ group_by (Literal['component lith', 'lith_ids']): Specifies the grouping
104
+ column to use for lithological components. Acceptable values are either
105
+ 'component lith' or 'lith_ids'. Defaults to 'lith_ids'.
106
+
107
+ Returns:
108
+ dict[Hashable, np.ndarray]: A dictionary mapping the lithological component
109
+ or lith ID to an array of 3D coordinates ([X, Y, Z]) corresponding to the
110
+ bottom vertices for each well.
111
+
112
+ Raises:
113
+ ValueError: If no groups are found from the specified `group_by` column.
114
+ """
92
115
  merged_df = self._merge_vertex_data_arrays_to_dataframe()
93
116
  component_lith_arrays = {}
94
- groupby = merged_df.groupby('lith_ids')
95
- if groupby.ngroups == 0:
117
+ group = merged_df.groupby(group_by)
118
+
119
+ if group.ngroups == 0:
96
120
  raise ValueError("No components found")
97
- for lith, group in groupby:
121
+ for lith, group in group:
98
122
  lith = int(lith)
99
123
  first_vertices = group.groupby('well_id').last().reset_index()
100
124
  array = first_vertices[['X', 'Y', 'Z']].values
@@ -1,15 +1,12 @@
1
- import warnings
2
-
1
+ from dataclasses import dataclass
3
2
  from typing import Union, Hashable, Optional
4
3
 
5
4
  import pandas as pd
6
- from dataclasses import dataclass
7
- import numpy as np
8
- import xarray as xr
9
5
 
10
- from subsurface import optional_requirements
11
- from ...structs.unstructured_elements import LineSet
6
+ from ._map_attrs_to_survey import combine_survey_and_attrs
7
+ from ._survey_to_unstruct import data_frame_to_unstructured_data
12
8
  from ...structs.base_structures import UnstructuredData
9
+ from ...structs.unstructured_elements import LineSet
13
10
 
14
11
  NUMBER_NODES = 30
15
12
  RADIUS = 10
@@ -39,7 +36,7 @@ class Survey:
39
36
  :return: A Survey object representing the input data.
40
37
 
41
38
  """
42
- trajectories: UnstructuredData = _data_frame_to_unstructured_data(
39
+ trajectories: UnstructuredData = data_frame_to_unstructured_data(
43
40
  survey_df=_correct_angles(survey_df),
44
41
  attr_df=attr_df,
45
42
  number_nodes=number_nodes,
@@ -61,145 +58,11 @@ class Survey:
61
58
  return self.well_id_mapper.get(well_string_id, None)
62
59
 
63
60
  def update_survey_with_lith(self, lith: pd.DataFrame):
64
- unstruct: UnstructuredData = _combine_survey_and_attrs(lith, self)
61
+ unstruct: UnstructuredData = combine_survey_and_attrs(lith, self.survey_trajectory, self.well_id_mapper)
65
62
  self.survey_trajectory.data = unstruct
66
63
 
67
64
  def update_survey_with_attr(self, attrs: pd.DataFrame):
68
- self.survey_trajectory.data = _combine_survey_and_attrs(attrs, self)
69
-
70
-
71
- def _combine_survey_and_attr(lith: pd.DataFrame, survey: Survey) -> UnstructuredData:
72
- pass
73
-
74
-
75
- def _combine_survey_and_attrs(attrs: pd.DataFrame, survey: Survey) -> UnstructuredData:
76
- # Import moved to top for clarity and possibly avoiding repeated imports if called multiple times
77
- from ...structs.base_structures._unstructured_data_constructor import raw_attributes_to_dict_data_arrays
78
-
79
- # Accessing trajectory data more succinctly
80
- trajectory: xr.DataArray = survey.survey_trajectory.data.data["vertex_attrs"]
81
- # Ensure all columns in lith exist in new_attrs, if not, add them as NaN
82
-
83
- new_attrs = _map_attrs_to_measured_depths(attrs, survey)
84
-
85
- # Construct the final xarray dict without intermediate variable
86
- points_attributes_xarray_dict = raw_attributes_to_dict_data_arrays(
87
- default_attributes_name="vertex_attrs",
88
- n_items=trajectory.shape[0], # TODO: Can I look this on new_attrs to remove line 11?
89
- dims=["points", "vertex_attr"],
90
- raw_attributes=new_attrs
91
- )
92
-
93
- # Inline construction of UnstructuredData
94
- return UnstructuredData.from_data_arrays_dict(
95
- xarray_dict={
96
- "vertex" : survey.survey_trajectory.data.data["vertex"],
97
- "cells" : survey.survey_trajectory.data.data["cells"],
98
- "vertex_attrs": points_attributes_xarray_dict["vertex_attrs"],
99
- "cell_attrs" : survey.survey_trajectory.data.data["cell_attrs"]
100
- },
101
- xarray_attributes=survey.survey_trajectory.data.data.attrs,
102
- default_cells_attributes_name=survey.survey_trajectory.data.cells_attr_name,
103
- default_points_attributes_name=survey.survey_trajectory.data.vertex_attr_name
104
- )
105
-
106
-
107
- def _map_attrs_to_measured_depths(attrs: pd.DataFrame, survey: Survey) -> pd.DataFrame:
108
- trajectory: xr.DataArray = survey.survey_trajectory.data.data["vertex_attrs"]
109
- trajectory_well_id: xr.DataArray = trajectory.sel({'vertex_attr': 'well_id'})
110
- measured_depths: np.ndarray = trajectory.sel({'vertex_attr': 'measured_depths'}).values.astype(np.float64)
111
-
112
- # Start with a copy of the existing attributes DataFrame
113
- new_attrs = survey.survey_trajectory.data.points_attributes.copy()
114
- if 'component lith' in attrs.columns and 'lith_ids' not in attrs.columns:
115
- # Factorize lith components directly in-place
116
- attrs['lith_ids'], _ = pd.factorize(attrs['component lith'], use_na_sentinel=True)
117
- else:
118
- pass
119
-
120
- # Add missing columns from attrs, preserving their dtypes
121
- for col in attrs.columns.difference(new_attrs.columns):
122
- new_attrs[col] = np.nan if pd.api.types.is_numeric_dtype(attrs[col]) else None
123
-
124
- # Align well IDs between attrs and trajectory, perform interpolation, and map the attributes
125
- # Loop dict
126
- for survey_well_name in survey.well_id_mapper:
127
- # Select rows corresponding to the current well ID
128
-
129
- # use the well_id to get all the elements of attrs that have the well_id as index
130
- if survey_well_name in attrs.index:
131
- attrs_well = attrs.loc[[survey_well_name]]
132
- # Proceed with processing attrs_well
133
- else:
134
- print(f"Well '{survey_well_name}' does not exist in the attributes DataFrame.")
135
- continue
136
-
137
- survey_well_id = survey.get_well_num_id(survey_well_name)
138
- trajectory_well_mask = (trajectory_well_id == survey_well_id).values
139
-
140
- # Apply mask to measured depths for the current well
141
- well_measured_depths = measured_depths[trajectory_well_mask]
142
-
143
- if "base" not in attrs_well.columns:
144
- raise ValueError(f"Base column must be present in the file for well '{survey_well_name}'.")
145
- elif "top" not in attrs_well.columns:
146
- location_values_to_interpolate = attrs_well['base']
147
- else:
148
- location_values_to_interpolate = (attrs_well['top'] + attrs_well['base']) / 2
149
-
150
- # Interpolation for each attribute column
151
- for col in attrs_well.columns:
152
- # Interpolate the attribute values based on the measured depths
153
- if col in ['top', 'base', 'well_id']:
154
- continue
155
- attr_to_interpolate = attrs_well[col]
156
- # make sure the attr_to_interpolate is not a string
157
- if attr_to_interpolate.dtype == 'O' or isinstance(attr_to_interpolate.dtype, pd.CategoricalDtype):
158
- continue
159
- if col in ['lith_ids', 'component lith']:
160
- interp_kind = 'nearest'
161
- else:
162
- interp_kind = 'linear'
163
-
164
- from scipy.interpolate import interp1d
165
- interp_func = interp1d(
166
- x=location_values_to_interpolate,
167
- y=attr_to_interpolate,
168
- bounds_error=False,
169
- fill_value=np.nan,
170
- kind=interp_kind
171
- )
172
-
173
- # Assign the interpolated values to the new_attrs DataFrame
174
- vals = interp_func(well_measured_depths)
175
- new_attrs.loc[trajectory_well_mask, col] = vals
176
-
177
- return new_attrs
178
-
179
-
180
- def _map_attrs_to_measured_depths_(attrs: pd.DataFrame, new_attrs: pd.DataFrame, survey: Survey):
181
- warnings.warn("This function is obsolete. Use _map_attrs_to_measured_depths instead.", DeprecationWarning)
182
-
183
- trajectory: xr.DataArray = survey.survey_trajectory.data.data["vertex_attrs"]
184
- well_ids: xr.DataArray = trajectory.sel({'vertex_attr': 'well_id'})
185
- measured_depths: xr.DataArray = trajectory.sel({'vertex_attr': 'measured_depths'})
186
-
187
- new_columns = attrs.columns.difference(new_attrs.columns)
188
- new_attrs = pd.concat([new_attrs, pd.DataFrame(columns=new_columns)], axis=1)
189
- for index, row in attrs.iterrows():
190
- well_id = survey.get_well_num_id(index)
191
- if well_id is None:
192
- print(f'Well ID {index} not found in survey trajectory. Skipping lithology assignment.')
193
-
194
- well_id_mask = well_ids == well_id
195
-
196
- # TODO: Here we are going to need to interpolate
197
-
198
- spatial_mask = ((measured_depths <= row['top']) & (measured_depths >= row['base']))
199
- mask = well_id_mask & spatial_mask
200
-
201
- new_attrs.loc[mask.values, attrs.columns] = row.values
202
- return new_attrs
65
+ self.survey_trajectory.data = combine_survey_and_attrs(attrs, self.survey_trajectory, self.well_id_mapper)
203
66
 
204
67
 
205
68
  def _correct_angles(df: pd.DataFrame) -> pd.DataFrame:
@@ -221,160 +84,3 @@ def _correct_angles(df: pd.DataFrame) -> pd.DataFrame:
221
84
  df['azi'] = df['azi'].apply(correct_azimuth)
222
85
 
223
86
  return df
224
-
225
-
226
- def _data_frame_to_unstructured_data(survey_df: 'pd.DataFrame', number_nodes: int, attr_df: Optional['pd.DataFrame'] = None,
227
- duplicate_attr_depths: bool = False) -> UnstructuredData:
228
-
229
- wp = optional_requirements.require_wellpathpy()
230
-
231
- cum_vertex: np.ndarray = np.empty((0, 3), dtype=np.float32)
232
- cells: np.ndarray = np.empty((0, 2), dtype=np.int_)
233
- cell_attr: pd.DataFrame = pd.DataFrame(columns=['well_id'], dtype=np.float32)
234
- vertex_attr: pd.DataFrame = pd.DataFrame()
235
-
236
- for e, (borehole_id, data) in enumerate(survey_df.groupby(level=0)):
237
- dev = wp.deviation(
238
- md=data['md'].values,
239
- inc=data['inc'].values,
240
- azi=data['azi'].values
241
- )
242
-
243
- md_min = dev.md.min()
244
- md_max = dev.md.max()
245
-
246
- attr_depths = _grab_depths_from_attr(
247
- attr_df=attr_df,
248
- borehole_id=borehole_id,
249
- duplicate_attr_depths=duplicate_attr_depths,
250
- md_max=md_max,
251
- md_min=md_min
252
- )
253
-
254
- # Now combine attr_depths with depths
255
- md_min = dev.md.min()
256
- md_max = dev.md.max()
257
- depths = np.linspace(md_min, md_max, number_nodes)
258
- depths = np.union1d(depths, attr_depths)
259
- depths.sort()
260
-
261
- # Resample positions at depths
262
- pos = dev.minimum_curvature().resample(depths=depths)
263
- vertex_count = cum_vertex.shape[0]
264
-
265
- this_well_vertex = np.vstack([pos.easting, pos.northing, pos.depth]).T
266
- cum_vertex = np.vstack([cum_vertex, this_well_vertex])
267
- measured_depths = _calculate_distances(array_of_vertices=this_well_vertex)
268
-
269
- n_vertex_shift_0 = np.arange(0, len(pos.depth) - 1, dtype=np.int_)
270
- n_vertex_shift_1 = np.arange(1, len(pos.depth), dtype=np.int_)
271
- cell_per_well = np.vstack([n_vertex_shift_0, n_vertex_shift_1]).T + vertex_count
272
- cells = np.vstack([cells, cell_per_well])
273
-
274
- attribute_values = np.isin(depths, attr_depths)
275
-
276
- vertex_attr_per_well = pd.DataFrame({
277
- 'well_id' : [e] * len(pos.depth),
278
- 'measured_depths': measured_depths,
279
- 'is_attr_point' : attribute_values,
280
- })
281
-
282
- vertex_attr = pd.concat([vertex_attr, vertex_attr_per_well], ignore_index=True)
283
-
284
- # Add the id (e), to cell_attr
285
- cell_attr = pd.concat([cell_attr, pd.DataFrame({'well_id': [e] * len(cell_per_well)})], ignore_index=True)
286
-
287
- unstruct = UnstructuredData.from_array(
288
- vertex=cum_vertex,
289
- cells=cells.astype(int),
290
- vertex_attr=vertex_attr.reset_index(drop=True),
291
- cells_attr=cell_attr.reset_index(drop=True)
292
- )
293
-
294
- unstruct.data.attrs["well_id_mapper"] = {well_id: e for e, well_id in enumerate(survey_df.index.unique(level=0))}
295
-
296
- return unstruct
297
-
298
-
299
- def _grab_depths_from_attr(
300
- attr_df: pd.DataFrame,
301
- borehole_id: Hashable,
302
- duplicate_attr_depths: bool,
303
- md_max: float,
304
- md_min: float
305
- ) -> np.ndarray:
306
- # Initialize attr_depths and attr_labels as empty arrays
307
- attr_depths = np.array([], dtype=float)
308
- attr_labels = np.array([], dtype='<U4') # Initialize labels for 'top' and 'base'
309
-
310
- if attr_df is None or ("top" not in attr_df.columns and "base" not in attr_df.columns):
311
- return attr_depths
312
-
313
- try:
314
- vals = attr_df.loc[borehole_id]
315
-
316
- tops = np.array([], dtype=float)
317
- bases = np.array([], dtype=float)
318
-
319
- if 'top' in vals:
320
- if isinstance(vals, pd.DataFrame):
321
- tops = vals['top'].values.flatten()
322
- else:
323
- tops = np.array([vals['top']])
324
- # Convert to float and remove NaNs
325
- tops = tops.astype(float)
326
- tops = tops[~np.isnan(tops)]
327
- # Clip to within md range
328
- tops = tops[(tops >= md_min) & (tops <= md_max)]
329
-
330
- if 'base' in vals:
331
- if isinstance(vals, pd.DataFrame):
332
- bases = vals['base'].values.flatten()
333
- else:
334
- bases = np.array([vals['base']])
335
- # Convert to float and remove NaNs
336
- bases = bases.astype(float)
337
- bases = bases[~np.isnan(bases)]
338
- # Clip to within md range
339
- bases = bases[(bases >= md_min) & (bases <= md_max)]
340
-
341
- # Combine tops and bases into attr_depths with labels
342
- attr_depths = np.concatenate((tops, bases))
343
- attr_labels = np.array(['top'] * len(tops) + ['base'] * len(bases))
344
-
345
- # Drop duplicates while preserving order
346
- _, unique_indices = np.unique(attr_depths, return_index=True)
347
- attr_depths = attr_depths[unique_indices]
348
- attr_labels = attr_labels[unique_indices]
349
-
350
- except KeyError:
351
- # No attributes for this borehole_id or missing columns
352
- attr_depths = np.array([], dtype=float)
353
- attr_labels = np.array([], dtype='<U4')
354
-
355
- # If duplicate_attr_depths is True, duplicate attr_depths with a tiny offset
356
- if duplicate_attr_depths and len(attr_depths) > 0:
357
- tiny_offset = (md_max - md_min) * 1e-6 # A tiny fraction of the depth range
358
- # Create offsets: +tiny_offset for 'top', -tiny_offset for 'base'
359
- offsets = np.where(attr_labels == 'top', tiny_offset, -tiny_offset)
360
- duplicated_attr_depths = attr_depths + offsets
361
- # Ensure the duplicated depths are within the md range
362
- valid_indices = (duplicated_attr_depths >= md_min) & (duplicated_attr_depths <= md_max)
363
- duplicated_attr_depths = duplicated_attr_depths[valid_indices]
364
- # Original attribute depths
365
- original_attr_depths = attr_depths
366
- # Combine originals and duplicates
367
- attr_depths = np.hstack([original_attr_depths, duplicated_attr_depths])
368
-
369
- return attr_depths
370
-
371
-
372
- def _calculate_distances(array_of_vertices: np.ndarray) -> np.ndarray:
373
- # Calculate the differences between consecutive points
374
- differences = np.diff(array_of_vertices, axis=0)
375
-
376
- # Calculate the Euclidean distance for each pair of consecutive points
377
- distances = np.linalg.norm(differences, axis=1)
378
- # Insert a 0 at the beginning to represent the starting point at the surface
379
- measured_depths = np.insert(np.cumsum(distances), 0, 0)
380
- return measured_depths
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: subsurface_terra
3
- Version: 2025.1.0rc14
3
+ Version: 2025.1.0rc15
4
4
  Summary: Subsurface data types and utilities. This version is the one used by Terranigma Solutions. Please feel free to take anything in this repository for the original one.
5
5
  Home-page: https://softwareunderground.github.io/subsurface
6
6
  Author: Software Underground
@@ -1,5 +1,5 @@
1
1
  subsurface/__init__.py,sha256=0D2rCUem3fiHsXFXXSmwheLiPS4cXxEdfWdFBj0b-cY,930
2
- subsurface/_version.py,sha256=6hCz-0xeTsPVqYirvA5x7rupXsfeGqd4ZcT7_v8NHgo,550
2
+ subsurface/_version.py,sha256=oyGHFt2uSORzD3yojwTGuhpm_zC63z5TJljBVjwFw70,550
3
3
  subsurface/optional_requirements.py,sha256=Wg36RqxzPiLtN-3qSg5K9QVEeXCB0-EjSzHERAoO8EE,2883
4
4
  subsurface/api/__init__.py,sha256=E3R1fNn_d5XQdlZyjtzBcH_bYRzAvOc3xV38qFLqbZY,369
5
5
  subsurface/api/interfaces/__init__.py,sha256=rqUtJyMLicobcyhmr74TepjmUQAEmlazKT3vjV_n3aA,6
@@ -11,9 +11,11 @@ subsurface/core/geological_formats/__init__.py,sha256=jOyPsC3ZEMFljo9SGk0ym7cmBZ
11
11
  subsurface/core/geological_formats/fault.py,sha256=Zldf9VT4Gzo0NWe_UvBf5kOgxZtg2T9eWaWPqxeffWw,1547
12
12
  subsurface/core/geological_formats/boreholes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  subsurface/core/geological_formats/boreholes/_combine_trajectories.py,sha256=U5VribebcMAag0DOKnna983g1BXAGLKCddGra2g3Nos,5246
14
- subsurface/core/geological_formats/boreholes/boreholes.py,sha256=bWGEPMFHPVyGO8Kv3uBo9U8_MLXlEEkdraXx86eURJY,5364
14
+ subsurface/core/geological_formats/boreholes/_map_attrs_to_survey.py,sha256=EH_gAY7d4v9Yyae4sEtWYQ7Ngb3_PN-TNszqjRjuEQ8,9013
15
+ subsurface/core/geological_formats/boreholes/_survey_to_unstruct.py,sha256=kO-bOl60zHl2Xj0ESzKlzqNno4s4LBiEOmeziSX2HVI,6593
16
+ subsurface/core/geological_formats/boreholes/boreholes.py,sha256=ZTZHjwihpw0a4nFpiO7O5lZI19I1JXMTKnGSohyURbE,6545
15
17
  subsurface/core/geological_formats/boreholes/collars.py,sha256=o1I8bS0XqWa2fS0n6XZVKXsuBHknXO2Z_5sdlFc_GAE,750
16
- subsurface/core/geological_formats/boreholes/survey.py,sha256=aaENR0ksT_lK0BYR7HL73KNZ44eo3Ag353xQdVcLOz0,16219
18
+ subsurface/core/geological_formats/boreholes/survey.py,sha256=0aVLn0byYXRnqqdWpJlXEQzOYM-aZmQ6scnU-CrY6qI,3586
17
19
  subsurface/core/reader_helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
20
  subsurface/core/reader_helpers/reader_unstruct.py,sha256=Lme1ano-dQrKhLCwrokcpKqa28DFxDaGAs3ub8MAHtY,397
19
21
  subsurface/core/reader_helpers/readers_data.py,sha256=Vewi8pqv-zooRIhffTM52eWZeP6l9MnHkD9LZj6c1LU,4995
@@ -89,8 +91,8 @@ subsurface/modules/writer/to_rex/material_encoder.py,sha256=zGlqF9X_Civ9VvtGwo-I
89
91
  subsurface/modules/writer/to_rex/mesh_encoder.py,sha256=6TBtJhYJEAMEBHxQkbweXrJO1jIUx1ClM8l5ajVCrLc,6443
90
92
  subsurface/modules/writer/to_rex/to_rex.py,sha256=njsm2d3e69pRVfF_TOC_hexvXPmgNTZdJvhbnXcvyIo,3800
91
93
  subsurface/modules/writer/to_rex/utils.py,sha256=HEpJ95LjHOK24ePpmLpPP5uFyv6i_kN3AWh031q-1Uc,379
92
- subsurface_terra-2025.1.0rc14.dist-info/licenses/LICENSE,sha256=GSXh9K5TZauM89BeGbYg07oST_HMhOTiZoEGaUeKBtA,11606
93
- subsurface_terra-2025.1.0rc14.dist-info/METADATA,sha256=RrT1uQXG_FQJNwVpkyTo1ozaLYC3bPVZr1mU_SulRHA,7094
94
- subsurface_terra-2025.1.0rc14.dist-info/WHEEL,sha256=wXxTzcEDnjrTwFYjLPcsW_7_XihufBwmpiBeiXNBGEA,91
95
- subsurface_terra-2025.1.0rc14.dist-info/top_level.txt,sha256=f32R_tUSf83CfkpB4vjv5m2XcD8TmDX9h7F4rnEXt5A,11
96
- subsurface_terra-2025.1.0rc14.dist-info/RECORD,,
94
+ subsurface_terra-2025.1.0rc15.dist-info/licenses/LICENSE,sha256=GSXh9K5TZauM89BeGbYg07oST_HMhOTiZoEGaUeKBtA,11606
95
+ subsurface_terra-2025.1.0rc15.dist-info/METADATA,sha256=3mtnSQiHih7wOyT6c-cZzFtrmA4uVUBFSEMAbttMKtQ,7094
96
+ subsurface_terra-2025.1.0rc15.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
97
+ subsurface_terra-2025.1.0rc15.dist-info/top_level.txt,sha256=f32R_tUSf83CfkpB4vjv5m2XcD8TmDX9h7F4rnEXt5A,11
98
+ subsurface_terra-2025.1.0rc15.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.1.0)
2
+ Generator: setuptools (80.8.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5