subsurface-terra 2025.1.0rc14__py3-none-any.whl → 2025.1.0rc16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. subsurface/__init__.py +31 -31
  2. subsurface/_version.py +34 -21
  3. subsurface/api/__init__.py +13 -13
  4. subsurface/api/interfaces/__init__.py +3 -3
  5. subsurface/api/interfaces/stream.py +136 -136
  6. subsurface/api/reader/read_wells.py +78 -78
  7. subsurface/core/geological_formats/boreholes/_combine_trajectories.py +117 -117
  8. subsurface/core/geological_formats/boreholes/_map_attrs_to_survey.py +236 -0
  9. subsurface/core/geological_formats/boreholes/_survey_to_unstruct.py +163 -0
  10. subsurface/core/geological_formats/boreholes/boreholes.py +140 -116
  11. subsurface/core/geological_formats/boreholes/collars.py +26 -26
  12. subsurface/core/geological_formats/boreholes/survey.py +86 -380
  13. subsurface/core/geological_formats/fault.py +47 -47
  14. subsurface/core/reader_helpers/reader_unstruct.py +11 -11
  15. subsurface/core/reader_helpers/readers_data.py +130 -130
  16. subsurface/core/reader_helpers/readers_wells.py +13 -13
  17. subsurface/core/structs/__init__.py +3 -3
  18. subsurface/core/structs/base_structures/__init__.py +2 -2
  19. subsurface/core/structs/base_structures/_liquid_earth_mesh.py +121 -121
  20. subsurface/core/structs/base_structures/_unstructured_data_constructor.py +70 -70
  21. subsurface/core/structs/base_structures/base_structures_enum.py +6 -6
  22. subsurface/core/structs/base_structures/structured_data.py +282 -282
  23. subsurface/core/structs/base_structures/unstructured_data.py +319 -319
  24. subsurface/core/structs/structured_elements/octree_mesh.py +10 -10
  25. subsurface/core/structs/structured_elements/structured_grid.py +59 -59
  26. subsurface/core/structs/structured_elements/structured_mesh.py +9 -9
  27. subsurface/core/structs/unstructured_elements/__init__.py +3 -3
  28. subsurface/core/structs/unstructured_elements/line_set.py +72 -72
  29. subsurface/core/structs/unstructured_elements/point_set.py +43 -43
  30. subsurface/core/structs/unstructured_elements/tetrahedron_mesh.py +35 -35
  31. subsurface/core/structs/unstructured_elements/triangular_surface.py +62 -62
  32. subsurface/core/utils/utils_core.py +38 -38
  33. subsurface/modules/reader/__init__.py +13 -13
  34. subsurface/modules/reader/faults/faults.py +80 -80
  35. subsurface/modules/reader/from_binary.py +46 -46
  36. subsurface/modules/reader/mesh/_GOCAD_mesh.py +82 -82
  37. subsurface/modules/reader/mesh/_trimesh_reader.py +447 -447
  38. subsurface/modules/reader/mesh/csv_mesh_reader.py +53 -53
  39. subsurface/modules/reader/mesh/dxf_reader.py +177 -177
  40. subsurface/modules/reader/mesh/glb_reader.py +30 -30
  41. subsurface/modules/reader/mesh/mx_reader.py +232 -232
  42. subsurface/modules/reader/mesh/obj_reader.py +53 -53
  43. subsurface/modules/reader/mesh/omf_mesh_reader.py +43 -43
  44. subsurface/modules/reader/mesh/surface_reader.py +56 -56
  45. subsurface/modules/reader/mesh/surfaces_api.py +41 -41
  46. subsurface/modules/reader/profiles/__init__.py +3 -3
  47. subsurface/modules/reader/profiles/profiles_core.py +197 -197
  48. subsurface/modules/reader/read_netcdf.py +38 -38
  49. subsurface/modules/reader/topography/__init__.py +7 -7
  50. subsurface/modules/reader/topography/topo_core.py +100 -100
  51. subsurface/modules/reader/volume/read_grav3d.py +478 -428
  52. subsurface/modules/reader/volume/read_volume.py +327 -230
  53. subsurface/modules/reader/volume/segy_reader.py +105 -105
  54. subsurface/modules/reader/volume/seismic.py +173 -173
  55. subsurface/modules/reader/volume/volume_utils.py +43 -43
  56. subsurface/modules/reader/wells/DEP/__init__.py +43 -43
  57. subsurface/modules/reader/wells/DEP/_well_files_reader.py +167 -167
  58. subsurface/modules/reader/wells/DEP/_wells_api.py +61 -61
  59. subsurface/modules/reader/wells/DEP/_welly_reader.py +180 -180
  60. subsurface/modules/reader/wells/DEP/pandas_to_welly.py +212 -212
  61. subsurface/modules/reader/wells/_read_to_df.py +57 -57
  62. subsurface/modules/reader/wells/read_borehole_interface.py +148 -148
  63. subsurface/modules/reader/wells/wells_utils.py +68 -68
  64. subsurface/modules/tools/mocking_aux.py +104 -104
  65. subsurface/modules/visualization/__init__.py +2 -2
  66. subsurface/modules/visualization/to_pyvista.py +320 -320
  67. subsurface/modules/writer/to_binary.py +12 -12
  68. subsurface/modules/writer/to_rex/common.py +78 -78
  69. subsurface/modules/writer/to_rex/data_struct.py +74 -74
  70. subsurface/modules/writer/to_rex/gempy_to_rexfile.py +791 -791
  71. subsurface/modules/writer/to_rex/material_encoder.py +44 -44
  72. subsurface/modules/writer/to_rex/mesh_encoder.py +152 -152
  73. subsurface/modules/writer/to_rex/to_rex.py +115 -115
  74. subsurface/modules/writer/to_rex/utils.py +15 -15
  75. subsurface/optional_requirements.py +116 -116
  76. {subsurface_terra-2025.1.0rc14.dist-info → subsurface_terra-2025.1.0rc16.dist-info}/METADATA +194 -194
  77. subsurface_terra-2025.1.0rc16.dist-info/RECORD +98 -0
  78. {subsurface_terra-2025.1.0rc14.dist-info → subsurface_terra-2025.1.0rc16.dist-info}/WHEEL +1 -1
  79. {subsurface_terra-2025.1.0rc14.dist-info → subsurface_terra-2025.1.0rc16.dist-info}/licenses/LICENSE +203 -203
  80. subsurface_terra-2025.1.0rc14.dist-info/RECORD +0 -96
  81. {subsurface_terra-2025.1.0rc14.dist-info → subsurface_terra-2025.1.0rc16.dist-info}/top_level.txt +0 -0
@@ -1,117 +1,117 @@
1
- import enum
2
- import numpy as np
3
- import pandas as pd
4
-
5
- from .collars import Collars
6
- from .survey import Survey
7
- from ...structs import LineSet
8
- from ...structs.base_structures import UnstructuredData
9
-
10
-
11
- class MergeOptions(enum.Enum):
12
- RAISE = enum.auto()
13
- INTERSECT = enum.auto()
14
-
15
-
16
- def create_combined_trajectory(collars: Collars, survey: Survey, merge_option: MergeOptions, slice_: slice):
17
- collar_df = _create_collar_df(collars, slice_, survey.well_id_mapper)
18
- survey_df_vertex = _create_survey_df(survey)
19
-
20
- if merge_option == MergeOptions.RAISE:
21
- raise NotImplementedError("RAISE merge option not implemented")
22
- _validate_matching_ids(collar_df, survey_df_vertex)
23
- elif merge_option == MergeOptions.INTERSECT:
24
- return _Intersect.process_intersection(collar_df, survey_df_vertex, survey)
25
- else:
26
- raise ValueError("Unsupported merge option")
27
-
28
-
29
- def _create_collar_df(collars, slice_, well_id_mapper: dict ):
30
- collar_df = pd.DataFrame(collars.collar_loc.points[slice_], columns=['X', 'Y', 'Z'])
31
- selected_collars:list = collars.ids[slice_]
32
- collar_df['well_id'] = pd.Series(selected_collars).map(well_id_mapper)
33
- return collar_df
34
-
35
-
36
- def _create_survey_df(survey):
37
- survey_df_vertex = pd.DataFrame(survey.survey_trajectory.data.vertex, columns=['X', 'Y', 'Z'])
38
- vertex_attrs = survey.survey_trajectory.data.points_attributes
39
- id_int_vertex = vertex_attrs['well_id']
40
- survey_df_vertex['well_id'] = id_int_vertex.map(pd.Series(survey.ids))
41
- return survey_df_vertex
42
-
43
-
44
- def _validate_matching_ids(collar_df, survey_df_vertex):
45
- missing_from_survey = set(collar_df['well_id']) - set(survey_df_vertex['well_id'])
46
- missing_from_collar = set(survey_df_vertex['well_id']) - set(collar_df['well_id'])
47
- if missing_from_survey or missing_from_collar:
48
- raise ValueError(f"Collars and survey ids do not match. Missing in survey: {missing_from_survey}, Missing in collars: {missing_from_collar}")
49
-
50
-
51
- class _Intersect:
52
- """This class is just to create a namespace for the intersection method"""
53
- @staticmethod
54
- def process_intersection(collar_df: pd.DataFrame, survey_df_vertex: pd.DataFrame, survey: Survey) -> LineSet:
55
-
56
- combined_df_vertex = pd.merge(
57
- left=survey_df_vertex,
58
- right=collar_df,
59
- how='outer',
60
- indicator=True,
61
- on='well_id',
62
- suffixes=('_collar', '_survey')
63
- )
64
- combined_df_vertex = combined_df_vertex[combined_df_vertex['_merge'].isin(['left_only', 'both']) ]
65
-
66
- vertex_attrs = survey.survey_trajectory.data.points_attributes
67
- if len(combined_df_vertex) != len(vertex_attrs):
68
- raise ValueError("Vertex and vertex attributes have different lengths")
69
- _Intersect._add_collar_coordinates(combined_df_vertex)
70
-
71
- combined_df_cells = _Intersect._generate_cells(combined_df_vertex, survey)
72
- cell_attributes = survey.survey_trajectory.data.cell_attributes
73
- if len(combined_df_cells) != len(cell_attributes):
74
- raise ValueError("Cells and cell attributes have different lengths")
75
-
76
- line_set: LineSet = _Intersect._create_line_set(combined_df_vertex, combined_df_cells, survey)
77
-
78
- line_set.data.data.attrs.update(survey.survey_trajectory.data.data.attrs)
79
- line_set.data.data.attrs.update(collar_df.attrs)
80
-
81
- return line_set
82
-
83
- @staticmethod
84
- def _add_collar_coordinates(combined_df_vertex: pd.DataFrame):
85
- combined_df_vertex['X_survey'] += combined_df_vertex['X_collar']
86
- combined_df_vertex['Y_survey'] += combined_df_vertex['Y_collar']
87
- combined_df_vertex['Z_survey'] += combined_df_vertex['Z_collar']
88
-
89
- @staticmethod
90
- def _generate_cells(combined_df_vertex: pd.DataFrame, survey: Survey) -> pd.DataFrame:
91
- combined_df_cells = []
92
- previous_index = 0
93
- for e, well_id in enumerate(survey.ids):
94
- df_vertex_well = combined_df_vertex[combined_df_vertex['well_id'] == well_id]
95
- indices = np.arange(len(df_vertex_well)) + previous_index
96
- previous_index += len(df_vertex_well)
97
- cells = np.array([indices[:-1], indices[1:]]).T
98
- df_cells_well = pd.DataFrame(cells, columns=['cell1', 'cell2'])
99
- df_cells_well['well_id'] = well_id
100
- df_cells_well['well_id_int'] = e
101
- combined_df_cells.append(df_cells_well)
102
-
103
- return pd.concat(combined_df_cells, ignore_index=True)
104
-
105
- @staticmethod
106
- def _create_line_set(combined_df_vertex: pd.DataFrame, combined_df_cells: pd.DataFrame, survey: Survey) -> LineSet:
107
- vertex_attrs = survey.survey_trajectory.data.points_attributes
108
- cell_attributes = survey.survey_trajectory.data.cell_attributes
109
-
110
- combined_trajectory_unstruct = UnstructuredData.from_array(
111
- vertex=combined_df_vertex[['X_survey', 'Y_survey', 'Z_survey']].values,
112
- cells=combined_df_cells[['cell1', 'cell2']].values,
113
- vertex_attr=vertex_attrs,
114
- cells_attr=cell_attributes
115
- )
116
-
117
- return LineSet(data=combined_trajectory_unstruct, radius=500)
1
+ import enum
2
+ import numpy as np
3
+ import pandas as pd
4
+
5
+ from .collars import Collars
6
+ from .survey import Survey
7
+ from ...structs import LineSet
8
+ from ...structs.base_structures import UnstructuredData
9
+
10
+
11
+ class MergeOptions(enum.Enum):
12
+ RAISE = enum.auto()
13
+ INTERSECT = enum.auto()
14
+
15
+
16
+ def create_combined_trajectory(collars: Collars, survey: Survey, merge_option: MergeOptions, slice_: slice):
17
+ collar_df = _create_collar_df(collars, slice_, survey.well_id_mapper)
18
+ survey_df_vertex = _create_survey_df(survey)
19
+
20
+ if merge_option == MergeOptions.RAISE:
21
+ raise NotImplementedError("RAISE merge option not implemented")
22
+ _validate_matching_ids(collar_df, survey_df_vertex)
23
+ elif merge_option == MergeOptions.INTERSECT:
24
+ return _Intersect.process_intersection(collar_df, survey_df_vertex, survey)
25
+ else:
26
+ raise ValueError("Unsupported merge option")
27
+
28
+
29
+ def _create_collar_df(collars, slice_, well_id_mapper: dict ):
30
+ collar_df = pd.DataFrame(collars.collar_loc.points[slice_], columns=['X', 'Y', 'Z'])
31
+ selected_collars:list = collars.ids[slice_]
32
+ collar_df['well_id'] = pd.Series(selected_collars).map(well_id_mapper)
33
+ return collar_df
34
+
35
+
36
+ def _create_survey_df(survey):
37
+ survey_df_vertex = pd.DataFrame(survey.survey_trajectory.data.vertex, columns=['X', 'Y', 'Z'])
38
+ vertex_attrs = survey.survey_trajectory.data.points_attributes
39
+ id_int_vertex = vertex_attrs['well_id']
40
+ survey_df_vertex['well_id'] = id_int_vertex.map(pd.Series(survey.ids))
41
+ return survey_df_vertex
42
+
43
+
44
+ def _validate_matching_ids(collar_df, survey_df_vertex):
45
+ missing_from_survey = set(collar_df['well_id']) - set(survey_df_vertex['well_id'])
46
+ missing_from_collar = set(survey_df_vertex['well_id']) - set(collar_df['well_id'])
47
+ if missing_from_survey or missing_from_collar:
48
+ raise ValueError(f"Collars and survey ids do not match. Missing in survey: {missing_from_survey}, Missing in collars: {missing_from_collar}")
49
+
50
+
51
+ class _Intersect:
52
+ """This class is just to create a namespace for the intersection method"""
53
+ @staticmethod
54
+ def process_intersection(collar_df: pd.DataFrame, survey_df_vertex: pd.DataFrame, survey: Survey) -> LineSet:
55
+
56
+ combined_df_vertex = pd.merge(
57
+ left=survey_df_vertex,
58
+ right=collar_df,
59
+ how='outer',
60
+ indicator=True,
61
+ on='well_id',
62
+ suffixes=('_collar', '_survey')
63
+ )
64
+ combined_df_vertex = combined_df_vertex[combined_df_vertex['_merge'].isin(['left_only', 'both']) ]
65
+
66
+ vertex_attrs = survey.survey_trajectory.data.points_attributes
67
+ if len(combined_df_vertex) != len(vertex_attrs):
68
+ raise ValueError("Vertex and vertex attributes have different lengths")
69
+ _Intersect._add_collar_coordinates(combined_df_vertex)
70
+
71
+ combined_df_cells = _Intersect._generate_cells(combined_df_vertex, survey)
72
+ cell_attributes = survey.survey_trajectory.data.cell_attributes
73
+ if len(combined_df_cells) != len(cell_attributes):
74
+ raise ValueError("Cells and cell attributes have different lengths")
75
+
76
+ line_set: LineSet = _Intersect._create_line_set(combined_df_vertex, combined_df_cells, survey)
77
+
78
+ line_set.data.data.attrs.update(survey.survey_trajectory.data.data.attrs)
79
+ line_set.data.data.attrs.update(collar_df.attrs)
80
+
81
+ return line_set
82
+
83
+ @staticmethod
84
+ def _add_collar_coordinates(combined_df_vertex: pd.DataFrame):
85
+ combined_df_vertex['X_survey'] += combined_df_vertex['X_collar']
86
+ combined_df_vertex['Y_survey'] += combined_df_vertex['Y_collar']
87
+ combined_df_vertex['Z_survey'] += combined_df_vertex['Z_collar']
88
+
89
+ @staticmethod
90
+ def _generate_cells(combined_df_vertex: pd.DataFrame, survey: Survey) -> pd.DataFrame:
91
+ combined_df_cells = []
92
+ previous_index = 0
93
+ for e, well_id in enumerate(survey.ids):
94
+ df_vertex_well = combined_df_vertex[combined_df_vertex['well_id'] == well_id]
95
+ indices = np.arange(len(df_vertex_well)) + previous_index
96
+ previous_index += len(df_vertex_well)
97
+ cells = np.array([indices[:-1], indices[1:]]).T
98
+ df_cells_well = pd.DataFrame(cells, columns=['cell1', 'cell2'])
99
+ df_cells_well['well_id'] = well_id
100
+ df_cells_well['well_id_int'] = e
101
+ combined_df_cells.append(df_cells_well)
102
+
103
+ return pd.concat(combined_df_cells, ignore_index=True)
104
+
105
+ @staticmethod
106
+ def _create_line_set(combined_df_vertex: pd.DataFrame, combined_df_cells: pd.DataFrame, survey: Survey) -> LineSet:
107
+ vertex_attrs = survey.survey_trajectory.data.points_attributes
108
+ cell_attributes = survey.survey_trajectory.data.cell_attributes
109
+
110
+ combined_trajectory_unstruct = UnstructuredData.from_array(
111
+ vertex=combined_df_vertex[['X_survey', 'Y_survey', 'Z_survey']].values,
112
+ cells=combined_df_cells[['cell1', 'cell2']].values,
113
+ vertex_attr=vertex_attrs,
114
+ cells_attr=cell_attributes
115
+ )
116
+
117
+ return LineSet(data=combined_trajectory_unstruct, radius=500)
@@ -0,0 +1,236 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ import xarray as xr
4
+ from typing import Tuple, Optional, Union, List, Any
5
+
6
+ from subsurface import optional_requirements
7
+ from ...structs.base_structures import UnstructuredData
8
+ from ...structs.base_structures._unstructured_data_constructor import raw_attributes_to_dict_data_arrays
9
+ from ...structs.unstructured_elements import LineSet
10
+
11
+
12
+ def combine_survey_and_attrs(attrs: pd.DataFrame, survey_trajectory: LineSet,well_id_mapper: dict[str, int]) -> UnstructuredData:
13
+ # Import moved to top for clarity and possibly avoiding repeated imports if called multiple times
14
+
15
+ # Ensure all columns in lith exist in new_attrs, if not, add them as NaN
16
+ new_attrs = _map_attrs_to_measured_depths(attrs, survey_trajectory, well_id_mapper)
17
+
18
+ # Construct the final xarray dict without intermediate variable
19
+ points_attributes_xarray_dict: dict[str, xr.DataArray] = raw_attributes_to_dict_data_arrays(
20
+ default_attributes_name="vertex_attrs",
21
+ n_items=survey_trajectory.data.data["vertex_attrs"].shape[0], # TODO: Can I look this on new_attrs to remove line 11?
22
+ dims=["points", "vertex_attr"],
23
+ raw_attributes=new_attrs
24
+ )
25
+
26
+ # Inline construction of UnstructuredData
27
+ return UnstructuredData.from_data_arrays_dict(
28
+ xarray_dict={
29
+ "vertex" : survey_trajectory.data.data["vertex"],
30
+ "cells" : survey_trajectory.data.data["cells"],
31
+ "vertex_attrs": points_attributes_xarray_dict["vertex_attrs"],
32
+ "cell_attrs" : survey_trajectory.data.data["cell_attrs"]
33
+ },
34
+ xarray_attributes=survey_trajectory.data.data.attrs,
35
+ default_cells_attributes_name=survey_trajectory.data.cells_attr_name,
36
+ default_points_attributes_name=survey_trajectory.data.vertex_attr_name
37
+ )
38
+
39
+
40
+ def _prepare_categorical_data(attrs: pd.DataFrame) -> pd.DataFrame:
41
+ """
42
+ Prepare categorical data for interpolation by converting categorical columns to numeric IDs.
43
+
44
+ Args:
45
+ attrs: DataFrame containing attribute data
46
+
47
+ Returns:
48
+ Modified DataFrame with categorical data prepared for interpolation
49
+ """
50
+ # Create a copy to avoid modifying the original
51
+ attrs_copy = attrs.copy()
52
+
53
+ # If component lith exists but lith_ids doesn't, create lith_ids
54
+ if 'component lith' in attrs_copy.columns and 'lith_ids' not in attrs_copy.columns:
55
+ attrs_copy['lith_ids'], _ = pd.factorize(attrs_copy['component lith'], use_na_sentinel=True)
56
+
57
+ return attrs_copy
58
+
59
+
60
+ def _prepare_new_attributes(attrs: pd.DataFrame, survey_trajectory: LineSet) -> pd.DataFrame:
61
+ """
62
+ Prepare the new attributes DataFrame by adding missing columns from attrs.
63
+
64
+ Args:
65
+ attrs: DataFrame containing attribute data
66
+ survey_trajectory: LineSet containing trajectory data
67
+
68
+ Returns:
69
+ New attributes DataFrame with all necessary columns
70
+ """
71
+ # Start with a copy of the existing attributes DataFrame
72
+ new_attrs = survey_trajectory.data.points_attributes.copy()
73
+
74
+ # Add missing columns from attrs, preserving their dtypes
75
+ for col in attrs.columns.difference(new_attrs.columns):
76
+ new_attrs[col] = np.nan if pd.api.types.is_numeric_dtype(attrs[col]) else None
77
+
78
+ return new_attrs
79
+
80
+
81
+ def _get_interpolation_locations(attrs_well: pd.DataFrame, well_name: str) -> np.ndarray:
82
+ """
83
+ Determine the locations to use for interpolation based on top and base values.
84
+
85
+ Args:
86
+ attrs_well: DataFrame containing well attribute data
87
+ well_name: Name of the current well
88
+
89
+ Returns:
90
+ Array of location values to use for interpolation
91
+ """
92
+ if "base" not in attrs_well.columns:
93
+ raise ValueError(f"Base column must be present in the file for well '{well_name}'.")
94
+ elif "top" not in attrs_well.columns:
95
+ return attrs_well['base'].values
96
+ else:
97
+ return ((attrs_well['top'] + attrs_well['base']) / 2).values
98
+
99
+
100
+ def _nearest_neighbor_categorical_interpolation(
101
+ x_locations: np.ndarray,
102
+ y_values: np.ndarray,
103
+ target_depths: np.ndarray
104
+ ) -> np.ndarray:
105
+ """
106
+ Custom nearest neighbor interpolation for categorical data.
107
+
108
+ This function finds the nearest source point for each target point
109
+ and assigns the corresponding categorical value.
110
+
111
+ Args:
112
+ x_locations: Array of source locations
113
+ y_values: Array of categorical values at source locations
114
+ target_depths: Array of target depths for interpolation
115
+
116
+ Returns:
117
+ Array of interpolated categorical values
118
+ """
119
+ # Initialize output array with NaN or None values
120
+ result = np.full(target_depths.shape, np.nan, dtype=object)
121
+
122
+ # For each target depth, find the nearest source location
123
+ for i, depth in enumerate(target_depths):
124
+ # Calculate distances to all source locations
125
+ distances = np.abs(x_locations - depth)
126
+
127
+ # Find the index of the minimum distance
128
+ if len(distances) > 0:
129
+ nearest_idx = np.argmin(distances)
130
+ result[i] = y_values[nearest_idx]
131
+
132
+ return result
133
+
134
+
135
+ def _interpolate_attribute(
136
+ attr_values: pd.Series,
137
+ x_locations: np.ndarray,
138
+ target_depths: np.ndarray,
139
+ column_name: str,
140
+ is_categorical: bool
141
+ ) -> np.ndarray:
142
+ """
143
+ Interpolate attribute values to target depths.
144
+
145
+ Args:
146
+ attr_values: Series containing attribute values
147
+ x_locations: Array of source locations for interpolation
148
+ target_depths: Array of target depths for interpolation
149
+ column_name: Name of the column being interpolated
150
+ is_categorical: Whether the attribute is categorical
151
+
152
+ Returns:
153
+ Array of interpolated values
154
+ """
155
+ # For categorical data or specific columns, use custom nearest neighbor interpolation
156
+ if is_categorical or column_name in ['lith_ids', 'component lith']:
157
+ return _nearest_neighbor_categorical_interpolation(
158
+ x_locations=x_locations,
159
+ y_values=attr_values.values,
160
+ target_depths=target_depths
161
+ )
162
+ else:
163
+ # For numerical data, use scipy's interp1d with linear interpolation
164
+ scipy = optional_requirements.require_scipy()
165
+ interp_func = scipy.interpolate.interp1d(
166
+ x=x_locations,
167
+ y=attr_values.values,
168
+ bounds_error=False,
169
+ fill_value=np.nan,
170
+ kind='linear'
171
+ )
172
+ return interp_func(target_depths)
173
+
174
+
175
+ def _map_attrs_to_measured_depths(attrs: pd.DataFrame, survey_trajectory: LineSet, well_id_mapper: dict[str, int]) -> pd.DataFrame:
176
+ """
177
+ Map attributes to measured depths for each well.
178
+
179
+ Args:
180
+ attrs: DataFrame containing attribute data
181
+ survey_trajectory: LineSet containing trajectory data
182
+ well_id_mapper: Dictionary mapping well names to IDs
183
+
184
+ Returns:
185
+ DataFrame with attributes mapped to measured depths
186
+ """
187
+ # Extract trajectory data
188
+ trajectory: xr.DataArray = survey_trajectory.data.data["vertex_attrs"]
189
+ trajectory_well_id: xr.DataArray = trajectory.sel({'vertex_attr': 'well_id'})
190
+ measured_depths: np.ndarray = trajectory.sel({'vertex_attr': 'measured_depths'}).values.astype(np.float64)
191
+
192
+ # Prepare data
193
+ attrs: pd.DataFrame = _prepare_categorical_data(attrs)
194
+ new_attrs: pd.DataFrame = _prepare_new_attributes(attrs, survey_trajectory)
195
+
196
+ # Process each well
197
+ for well_name in well_id_mapper:
198
+ # Skip wells not in the attributes DataFrame
199
+ if well_name not in attrs.index:
200
+ print(f"Well '{well_name}' does not exist in the attributes DataFrame.")
201
+ continue
202
+
203
+ # Get well data
204
+ attrs_well = attrs.loc[[well_name]]
205
+ well_id = well_id_mapper.get(well_name)
206
+ well_mask = (trajectory_well_id == well_id).values
207
+ well_depths = measured_depths[well_mask]
208
+
209
+ # Get interpolation locations
210
+ interp_locations = _get_interpolation_locations(attrs_well, well_name)
211
+
212
+ # Interpolate each attribute
213
+ for col in attrs_well.columns:
214
+ # Skip location and ID columns
215
+ if col in ['top', 'base', 'well_id']:
216
+ continue
217
+
218
+ attr_values = attrs_well[col]
219
+ is_categorical = attr_values.dtype == 'O' or isinstance(attr_values.dtype, pd.CategoricalDtype)
220
+
221
+ # Skip columns that can't be interpolated and aren't categorical
222
+ if is_categorical and col not in ['lith_ids', 'component lith']:
223
+ continue
224
+
225
+ # Interpolate and assign values
226
+ interpolated_values = _interpolate_attribute(
227
+ attr_values,
228
+ interp_locations,
229
+ well_depths,
230
+ col,
231
+ is_categorical
232
+ )
233
+
234
+ new_attrs.loc[well_mask, col] = interpolated_values
235
+
236
+ return new_attrs
@@ -0,0 +1,163 @@
1
+ from typing import Hashable, Optional
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+
6
+ from subsurface import optional_requirements
7
+ from ...structs.base_structures import UnstructuredData
8
+
9
+
10
+ def data_frame_to_unstructured_data(survey_df: 'pd.DataFrame', number_nodes: int, attr_df: Optional['pd.DataFrame'] = None,
11
+ duplicate_attr_depths: bool = False) -> UnstructuredData:
12
+ wp = optional_requirements.require_wellpathpy()
13
+
14
+ cum_vertex: np.ndarray = np.empty((0, 3), dtype=np.float32)
15
+ cells: np.ndarray = np.empty((0, 2), dtype=np.int_)
16
+ cell_attr: pd.DataFrame = pd.DataFrame(columns=['well_id'], dtype=np.float32)
17
+ vertex_attr: pd.DataFrame = pd.DataFrame()
18
+
19
+ for e, (borehole_id, data) in enumerate(survey_df.groupby(level=0)):
20
+ dev = wp.deviation(
21
+ md=data['md'].values,
22
+ inc=data['inc'].values,
23
+ azi=data['azi'].values
24
+ )
25
+
26
+ md_min = dev.md.min()
27
+ md_max = dev.md.max()
28
+
29
+ attr_depths = _grab_depths_from_attr(
30
+ attr_df=attr_df,
31
+ borehole_id=borehole_id,
32
+ duplicate_attr_depths=duplicate_attr_depths,
33
+ md_max=md_max,
34
+ md_min=md_min
35
+ )
36
+
37
+ # Now combine attr_depths with depths
38
+ md_min = dev.md.min()
39
+ md_max = dev.md.max()
40
+ depths = np.linspace(md_min, md_max, number_nodes)
41
+ depths = np.union1d(depths, attr_depths)
42
+ depths.sort()
43
+
44
+ # Resample positions at depths
45
+ pos = dev.minimum_curvature().resample(depths=depths)
46
+ vertex_count = cum_vertex.shape[0]
47
+
48
+ this_well_vertex = np.vstack([pos.easting, pos.northing, pos.depth]).T
49
+ cum_vertex = np.vstack([cum_vertex, this_well_vertex])
50
+ measured_depths = _calculate_distances(array_of_vertices=this_well_vertex)
51
+
52
+ n_vertex_shift_0 = np.arange(0, len(pos.depth) - 1, dtype=np.int_)
53
+ n_vertex_shift_1 = np.arange(1, len(pos.depth), dtype=np.int_)
54
+ cell_per_well = np.vstack([n_vertex_shift_0, n_vertex_shift_1]).T + vertex_count
55
+ cells = np.vstack([cells, cell_per_well])
56
+
57
+ attribute_values = np.isin(depths, attr_depths)
58
+
59
+ vertex_attr_per_well = pd.DataFrame({
60
+ 'well_id' : [e] * len(pos.depth),
61
+ 'measured_depths': measured_depths,
62
+ 'is_attr_point' : attribute_values,
63
+ })
64
+
65
+ vertex_attr = pd.concat([vertex_attr, vertex_attr_per_well], ignore_index=True)
66
+
67
+ # Add the id (e), to cell_attr
68
+ cell_attr = pd.concat([cell_attr, pd.DataFrame({'well_id': [e] * len(cell_per_well)})], ignore_index=True)
69
+
70
+ unstruct = UnstructuredData.from_array(
71
+ vertex=cum_vertex,
72
+ cells=cells.astype(int),
73
+ vertex_attr=vertex_attr.reset_index(drop=True),
74
+ cells_attr=cell_attr.reset_index(drop=True)
75
+ )
76
+
77
+ unstruct.data.attrs["well_id_mapper"] = {well_id: e for e, well_id in enumerate(survey_df.index.unique(level=0))}
78
+
79
+ return unstruct
80
+
81
+
82
+ def _grab_depths_from_attr(
83
+ attr_df: pd.DataFrame,
84
+ borehole_id: Hashable,
85
+ duplicate_attr_depths: bool,
86
+ md_max: float,
87
+ md_min: float
88
+ ) -> np.ndarray:
89
+ # Initialize attr_depths and attr_labels as empty arrays
90
+ attr_depths = np.array([], dtype=float)
91
+ attr_labels = np.array([], dtype='<U4') # Initialize labels for 'top' and 'base'
92
+
93
+ if attr_df is None or ("top" not in attr_df.columns and "base" not in attr_df.columns):
94
+ return attr_depths
95
+
96
+ try:
97
+ vals = attr_df.loc[borehole_id]
98
+
99
+ tops = np.array([], dtype=float)
100
+ bases = np.array([], dtype=float)
101
+
102
+ if 'top' in vals:
103
+ if isinstance(vals, pd.DataFrame):
104
+ tops = vals['top'].values.flatten()
105
+ else:
106
+ tops = np.array([vals['top']])
107
+ # Convert to float and remove NaNs
108
+ tops = tops.astype(float)
109
+ tops = tops[~np.isnan(tops)]
110
+ # Clip to within md range
111
+ tops = tops[(tops >= md_min) & (tops <= md_max)]
112
+
113
+ if 'base' in vals:
114
+ if isinstance(vals, pd.DataFrame):
115
+ bases = vals['base'].values.flatten()
116
+ else:
117
+ bases = np.array([vals['base']])
118
+ # Convert to float and remove NaNs
119
+ bases = bases.astype(float)
120
+ bases = bases[~np.isnan(bases)]
121
+ # Clip to within md range
122
+ bases = bases[(bases >= md_min) & (bases <= md_max)]
123
+
124
+ # Combine tops and bases into attr_depths with labels
125
+ attr_depths = np.concatenate((tops, bases))
126
+ attr_labels = np.array(['top'] * len(tops) + ['base'] * len(bases))
127
+
128
+ # Drop duplicates while preserving order
129
+ _, unique_indices = np.unique(attr_depths, return_index=True)
130
+ attr_depths = attr_depths[unique_indices]
131
+ attr_labels = attr_labels[unique_indices]
132
+
133
+ except KeyError:
134
+ # No attributes for this borehole_id or missing columns
135
+ attr_depths = np.array([], dtype=float)
136
+ attr_labels = np.array([], dtype='<U4')
137
+
138
+ # If duplicate_attr_depths is True, duplicate attr_depths with a tiny offset
139
+ if duplicate_attr_depths and len(attr_depths) > 0:
140
+ tiny_offset = (md_max - md_min) * 1e-6 # A tiny fraction of the depth range
141
+ # Create offsets: +tiny_offset for 'top', -tiny_offset for 'base'
142
+ offsets = np.where(attr_labels == 'top', tiny_offset, -tiny_offset)
143
+ duplicated_attr_depths = attr_depths + offsets
144
+ # Ensure the duplicated depths are within the md range
145
+ valid_indices = (duplicated_attr_depths >= md_min) & (duplicated_attr_depths <= md_max)
146
+ duplicated_attr_depths = duplicated_attr_depths[valid_indices]
147
+ # Original attribute depths
148
+ original_attr_depths = attr_depths
149
+ # Combine originals and duplicates
150
+ attr_depths = np.hstack([original_attr_depths, duplicated_attr_depths])
151
+
152
+ return attr_depths
153
+
154
+
155
+ def _calculate_distances(array_of_vertices: np.ndarray) -> np.ndarray:
156
+ # Calculate the differences between consecutive points
157
+ differences = np.diff(array_of_vertices, axis=0)
158
+
159
+ # Calculate the Euclidean distance for each pair of consecutive points
160
+ distances = np.linalg.norm(differences, axis=1)
161
+ # Insert a 0 at the beginning to represent the starting point at the surface
162
+ measured_depths = np.insert(np.cumsum(distances), 0, 0)
163
+ return measured_depths