subsurface-terra 2025.1.0rc14__py3-none-any.whl → 2025.1.0rc16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- subsurface/__init__.py +31 -31
- subsurface/_version.py +34 -21
- subsurface/api/__init__.py +13 -13
- subsurface/api/interfaces/__init__.py +3 -3
- subsurface/api/interfaces/stream.py +136 -136
- subsurface/api/reader/read_wells.py +78 -78
- subsurface/core/geological_formats/boreholes/_combine_trajectories.py +117 -117
- subsurface/core/geological_formats/boreholes/_map_attrs_to_survey.py +236 -0
- subsurface/core/geological_formats/boreholes/_survey_to_unstruct.py +163 -0
- subsurface/core/geological_formats/boreholes/boreholes.py +140 -116
- subsurface/core/geological_formats/boreholes/collars.py +26 -26
- subsurface/core/geological_formats/boreholes/survey.py +86 -380
- subsurface/core/geological_formats/fault.py +47 -47
- subsurface/core/reader_helpers/reader_unstruct.py +11 -11
- subsurface/core/reader_helpers/readers_data.py +130 -130
- subsurface/core/reader_helpers/readers_wells.py +13 -13
- subsurface/core/structs/__init__.py +3 -3
- subsurface/core/structs/base_structures/__init__.py +2 -2
- subsurface/core/structs/base_structures/_liquid_earth_mesh.py +121 -121
- subsurface/core/structs/base_structures/_unstructured_data_constructor.py +70 -70
- subsurface/core/structs/base_structures/base_structures_enum.py +6 -6
- subsurface/core/structs/base_structures/structured_data.py +282 -282
- subsurface/core/structs/base_structures/unstructured_data.py +319 -319
- subsurface/core/structs/structured_elements/octree_mesh.py +10 -10
- subsurface/core/structs/structured_elements/structured_grid.py +59 -59
- subsurface/core/structs/structured_elements/structured_mesh.py +9 -9
- subsurface/core/structs/unstructured_elements/__init__.py +3 -3
- subsurface/core/structs/unstructured_elements/line_set.py +72 -72
- subsurface/core/structs/unstructured_elements/point_set.py +43 -43
- subsurface/core/structs/unstructured_elements/tetrahedron_mesh.py +35 -35
- subsurface/core/structs/unstructured_elements/triangular_surface.py +62 -62
- subsurface/core/utils/utils_core.py +38 -38
- subsurface/modules/reader/__init__.py +13 -13
- subsurface/modules/reader/faults/faults.py +80 -80
- subsurface/modules/reader/from_binary.py +46 -46
- subsurface/modules/reader/mesh/_GOCAD_mesh.py +82 -82
- subsurface/modules/reader/mesh/_trimesh_reader.py +447 -447
- subsurface/modules/reader/mesh/csv_mesh_reader.py +53 -53
- subsurface/modules/reader/mesh/dxf_reader.py +177 -177
- subsurface/modules/reader/mesh/glb_reader.py +30 -30
- subsurface/modules/reader/mesh/mx_reader.py +232 -232
- subsurface/modules/reader/mesh/obj_reader.py +53 -53
- subsurface/modules/reader/mesh/omf_mesh_reader.py +43 -43
- subsurface/modules/reader/mesh/surface_reader.py +56 -56
- subsurface/modules/reader/mesh/surfaces_api.py +41 -41
- subsurface/modules/reader/profiles/__init__.py +3 -3
- subsurface/modules/reader/profiles/profiles_core.py +197 -197
- subsurface/modules/reader/read_netcdf.py +38 -38
- subsurface/modules/reader/topography/__init__.py +7 -7
- subsurface/modules/reader/topography/topo_core.py +100 -100
- subsurface/modules/reader/volume/read_grav3d.py +478 -428
- subsurface/modules/reader/volume/read_volume.py +327 -230
- subsurface/modules/reader/volume/segy_reader.py +105 -105
- subsurface/modules/reader/volume/seismic.py +173 -173
- subsurface/modules/reader/volume/volume_utils.py +43 -43
- subsurface/modules/reader/wells/DEP/__init__.py +43 -43
- subsurface/modules/reader/wells/DEP/_well_files_reader.py +167 -167
- subsurface/modules/reader/wells/DEP/_wells_api.py +61 -61
- subsurface/modules/reader/wells/DEP/_welly_reader.py +180 -180
- subsurface/modules/reader/wells/DEP/pandas_to_welly.py +212 -212
- subsurface/modules/reader/wells/_read_to_df.py +57 -57
- subsurface/modules/reader/wells/read_borehole_interface.py +148 -148
- subsurface/modules/reader/wells/wells_utils.py +68 -68
- subsurface/modules/tools/mocking_aux.py +104 -104
- subsurface/modules/visualization/__init__.py +2 -2
- subsurface/modules/visualization/to_pyvista.py +320 -320
- subsurface/modules/writer/to_binary.py +12 -12
- subsurface/modules/writer/to_rex/common.py +78 -78
- subsurface/modules/writer/to_rex/data_struct.py +74 -74
- subsurface/modules/writer/to_rex/gempy_to_rexfile.py +791 -791
- subsurface/modules/writer/to_rex/material_encoder.py +44 -44
- subsurface/modules/writer/to_rex/mesh_encoder.py +152 -152
- subsurface/modules/writer/to_rex/to_rex.py +115 -115
- subsurface/modules/writer/to_rex/utils.py +15 -15
- subsurface/optional_requirements.py +116 -116
- {subsurface_terra-2025.1.0rc14.dist-info → subsurface_terra-2025.1.0rc16.dist-info}/METADATA +194 -194
- subsurface_terra-2025.1.0rc16.dist-info/RECORD +98 -0
- {subsurface_terra-2025.1.0rc14.dist-info → subsurface_terra-2025.1.0rc16.dist-info}/WHEEL +1 -1
- {subsurface_terra-2025.1.0rc14.dist-info → subsurface_terra-2025.1.0rc16.dist-info}/licenses/LICENSE +203 -203
- subsurface_terra-2025.1.0rc14.dist-info/RECORD +0 -96
- {subsurface_terra-2025.1.0rc14.dist-info → subsurface_terra-2025.1.0rc16.dist-info}/top_level.txt +0 -0
|
@@ -1,117 +1,117 @@
|
|
|
1
|
-
import enum
|
|
2
|
-
import numpy as np
|
|
3
|
-
import pandas as pd
|
|
4
|
-
|
|
5
|
-
from .collars import Collars
|
|
6
|
-
from .survey import Survey
|
|
7
|
-
from ...structs import LineSet
|
|
8
|
-
from ...structs.base_structures import UnstructuredData
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class MergeOptions(enum.Enum):
|
|
12
|
-
RAISE = enum.auto()
|
|
13
|
-
INTERSECT = enum.auto()
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def create_combined_trajectory(collars: Collars, survey: Survey, merge_option: MergeOptions, slice_: slice):
|
|
17
|
-
collar_df = _create_collar_df(collars, slice_, survey.well_id_mapper)
|
|
18
|
-
survey_df_vertex = _create_survey_df(survey)
|
|
19
|
-
|
|
20
|
-
if merge_option == MergeOptions.RAISE:
|
|
21
|
-
raise NotImplementedError("RAISE merge option not implemented")
|
|
22
|
-
_validate_matching_ids(collar_df, survey_df_vertex)
|
|
23
|
-
elif merge_option == MergeOptions.INTERSECT:
|
|
24
|
-
return _Intersect.process_intersection(collar_df, survey_df_vertex, survey)
|
|
25
|
-
else:
|
|
26
|
-
raise ValueError("Unsupported merge option")
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def _create_collar_df(collars, slice_, well_id_mapper: dict ):
|
|
30
|
-
collar_df = pd.DataFrame(collars.collar_loc.points[slice_], columns=['X', 'Y', 'Z'])
|
|
31
|
-
selected_collars:list = collars.ids[slice_]
|
|
32
|
-
collar_df['well_id'] = pd.Series(selected_collars).map(well_id_mapper)
|
|
33
|
-
return collar_df
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
def _create_survey_df(survey):
|
|
37
|
-
survey_df_vertex = pd.DataFrame(survey.survey_trajectory.data.vertex, columns=['X', 'Y', 'Z'])
|
|
38
|
-
vertex_attrs = survey.survey_trajectory.data.points_attributes
|
|
39
|
-
id_int_vertex = vertex_attrs['well_id']
|
|
40
|
-
survey_df_vertex['well_id'] = id_int_vertex.map(pd.Series(survey.ids))
|
|
41
|
-
return survey_df_vertex
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
def _validate_matching_ids(collar_df, survey_df_vertex):
|
|
45
|
-
missing_from_survey = set(collar_df['well_id']) - set(survey_df_vertex['well_id'])
|
|
46
|
-
missing_from_collar = set(survey_df_vertex['well_id']) - set(collar_df['well_id'])
|
|
47
|
-
if missing_from_survey or missing_from_collar:
|
|
48
|
-
raise ValueError(f"Collars and survey ids do not match. Missing in survey: {missing_from_survey}, Missing in collars: {missing_from_collar}")
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
class _Intersect:
|
|
52
|
-
"""This class is just to create a namespace for the intersection method"""
|
|
53
|
-
@staticmethod
|
|
54
|
-
def process_intersection(collar_df: pd.DataFrame, survey_df_vertex: pd.DataFrame, survey: Survey) -> LineSet:
|
|
55
|
-
|
|
56
|
-
combined_df_vertex = pd.merge(
|
|
57
|
-
left=survey_df_vertex,
|
|
58
|
-
right=collar_df,
|
|
59
|
-
how='outer',
|
|
60
|
-
indicator=True,
|
|
61
|
-
on='well_id',
|
|
62
|
-
suffixes=('_collar', '_survey')
|
|
63
|
-
)
|
|
64
|
-
combined_df_vertex = combined_df_vertex[combined_df_vertex['_merge'].isin(['left_only', 'both']) ]
|
|
65
|
-
|
|
66
|
-
vertex_attrs = survey.survey_trajectory.data.points_attributes
|
|
67
|
-
if len(combined_df_vertex) != len(vertex_attrs):
|
|
68
|
-
raise ValueError("Vertex and vertex attributes have different lengths")
|
|
69
|
-
_Intersect._add_collar_coordinates(combined_df_vertex)
|
|
70
|
-
|
|
71
|
-
combined_df_cells = _Intersect._generate_cells(combined_df_vertex, survey)
|
|
72
|
-
cell_attributes = survey.survey_trajectory.data.cell_attributes
|
|
73
|
-
if len(combined_df_cells) != len(cell_attributes):
|
|
74
|
-
raise ValueError("Cells and cell attributes have different lengths")
|
|
75
|
-
|
|
76
|
-
line_set: LineSet = _Intersect._create_line_set(combined_df_vertex, combined_df_cells, survey)
|
|
77
|
-
|
|
78
|
-
line_set.data.data.attrs.update(survey.survey_trajectory.data.data.attrs)
|
|
79
|
-
line_set.data.data.attrs.update(collar_df.attrs)
|
|
80
|
-
|
|
81
|
-
return line_set
|
|
82
|
-
|
|
83
|
-
@staticmethod
|
|
84
|
-
def _add_collar_coordinates(combined_df_vertex: pd.DataFrame):
|
|
85
|
-
combined_df_vertex['X_survey'] += combined_df_vertex['X_collar']
|
|
86
|
-
combined_df_vertex['Y_survey'] += combined_df_vertex['Y_collar']
|
|
87
|
-
combined_df_vertex['Z_survey'] += combined_df_vertex['Z_collar']
|
|
88
|
-
|
|
89
|
-
@staticmethod
|
|
90
|
-
def _generate_cells(combined_df_vertex: pd.DataFrame, survey: Survey) -> pd.DataFrame:
|
|
91
|
-
combined_df_cells = []
|
|
92
|
-
previous_index = 0
|
|
93
|
-
for e, well_id in enumerate(survey.ids):
|
|
94
|
-
df_vertex_well = combined_df_vertex[combined_df_vertex['well_id'] == well_id]
|
|
95
|
-
indices = np.arange(len(df_vertex_well)) + previous_index
|
|
96
|
-
previous_index += len(df_vertex_well)
|
|
97
|
-
cells = np.array([indices[:-1], indices[1:]]).T
|
|
98
|
-
df_cells_well = pd.DataFrame(cells, columns=['cell1', 'cell2'])
|
|
99
|
-
df_cells_well['well_id'] = well_id
|
|
100
|
-
df_cells_well['well_id_int'] = e
|
|
101
|
-
combined_df_cells.append(df_cells_well)
|
|
102
|
-
|
|
103
|
-
return pd.concat(combined_df_cells, ignore_index=True)
|
|
104
|
-
|
|
105
|
-
@staticmethod
|
|
106
|
-
def _create_line_set(combined_df_vertex: pd.DataFrame, combined_df_cells: pd.DataFrame, survey: Survey) -> LineSet:
|
|
107
|
-
vertex_attrs = survey.survey_trajectory.data.points_attributes
|
|
108
|
-
cell_attributes = survey.survey_trajectory.data.cell_attributes
|
|
109
|
-
|
|
110
|
-
combined_trajectory_unstruct = UnstructuredData.from_array(
|
|
111
|
-
vertex=combined_df_vertex[['X_survey', 'Y_survey', 'Z_survey']].values,
|
|
112
|
-
cells=combined_df_cells[['cell1', 'cell2']].values,
|
|
113
|
-
vertex_attr=vertex_attrs,
|
|
114
|
-
cells_attr=cell_attributes
|
|
115
|
-
)
|
|
116
|
-
|
|
117
|
-
return LineSet(data=combined_trajectory_unstruct, radius=500)
|
|
1
|
+
import enum
|
|
2
|
+
import numpy as np
|
|
3
|
+
import pandas as pd
|
|
4
|
+
|
|
5
|
+
from .collars import Collars
|
|
6
|
+
from .survey import Survey
|
|
7
|
+
from ...structs import LineSet
|
|
8
|
+
from ...structs.base_structures import UnstructuredData
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class MergeOptions(enum.Enum):
|
|
12
|
+
RAISE = enum.auto()
|
|
13
|
+
INTERSECT = enum.auto()
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def create_combined_trajectory(collars: Collars, survey: Survey, merge_option: MergeOptions, slice_: slice):
|
|
17
|
+
collar_df = _create_collar_df(collars, slice_, survey.well_id_mapper)
|
|
18
|
+
survey_df_vertex = _create_survey_df(survey)
|
|
19
|
+
|
|
20
|
+
if merge_option == MergeOptions.RAISE:
|
|
21
|
+
raise NotImplementedError("RAISE merge option not implemented")
|
|
22
|
+
_validate_matching_ids(collar_df, survey_df_vertex)
|
|
23
|
+
elif merge_option == MergeOptions.INTERSECT:
|
|
24
|
+
return _Intersect.process_intersection(collar_df, survey_df_vertex, survey)
|
|
25
|
+
else:
|
|
26
|
+
raise ValueError("Unsupported merge option")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _create_collar_df(collars, slice_, well_id_mapper: dict ):
|
|
30
|
+
collar_df = pd.DataFrame(collars.collar_loc.points[slice_], columns=['X', 'Y', 'Z'])
|
|
31
|
+
selected_collars:list = collars.ids[slice_]
|
|
32
|
+
collar_df['well_id'] = pd.Series(selected_collars).map(well_id_mapper)
|
|
33
|
+
return collar_df
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _create_survey_df(survey):
|
|
37
|
+
survey_df_vertex = pd.DataFrame(survey.survey_trajectory.data.vertex, columns=['X', 'Y', 'Z'])
|
|
38
|
+
vertex_attrs = survey.survey_trajectory.data.points_attributes
|
|
39
|
+
id_int_vertex = vertex_attrs['well_id']
|
|
40
|
+
survey_df_vertex['well_id'] = id_int_vertex.map(pd.Series(survey.ids))
|
|
41
|
+
return survey_df_vertex
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _validate_matching_ids(collar_df, survey_df_vertex):
|
|
45
|
+
missing_from_survey = set(collar_df['well_id']) - set(survey_df_vertex['well_id'])
|
|
46
|
+
missing_from_collar = set(survey_df_vertex['well_id']) - set(collar_df['well_id'])
|
|
47
|
+
if missing_from_survey or missing_from_collar:
|
|
48
|
+
raise ValueError(f"Collars and survey ids do not match. Missing in survey: {missing_from_survey}, Missing in collars: {missing_from_collar}")
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class _Intersect:
|
|
52
|
+
"""This class is just to create a namespace for the intersection method"""
|
|
53
|
+
@staticmethod
|
|
54
|
+
def process_intersection(collar_df: pd.DataFrame, survey_df_vertex: pd.DataFrame, survey: Survey) -> LineSet:
|
|
55
|
+
|
|
56
|
+
combined_df_vertex = pd.merge(
|
|
57
|
+
left=survey_df_vertex,
|
|
58
|
+
right=collar_df,
|
|
59
|
+
how='outer',
|
|
60
|
+
indicator=True,
|
|
61
|
+
on='well_id',
|
|
62
|
+
suffixes=('_collar', '_survey')
|
|
63
|
+
)
|
|
64
|
+
combined_df_vertex = combined_df_vertex[combined_df_vertex['_merge'].isin(['left_only', 'both']) ]
|
|
65
|
+
|
|
66
|
+
vertex_attrs = survey.survey_trajectory.data.points_attributes
|
|
67
|
+
if len(combined_df_vertex) != len(vertex_attrs):
|
|
68
|
+
raise ValueError("Vertex and vertex attributes have different lengths")
|
|
69
|
+
_Intersect._add_collar_coordinates(combined_df_vertex)
|
|
70
|
+
|
|
71
|
+
combined_df_cells = _Intersect._generate_cells(combined_df_vertex, survey)
|
|
72
|
+
cell_attributes = survey.survey_trajectory.data.cell_attributes
|
|
73
|
+
if len(combined_df_cells) != len(cell_attributes):
|
|
74
|
+
raise ValueError("Cells and cell attributes have different lengths")
|
|
75
|
+
|
|
76
|
+
line_set: LineSet = _Intersect._create_line_set(combined_df_vertex, combined_df_cells, survey)
|
|
77
|
+
|
|
78
|
+
line_set.data.data.attrs.update(survey.survey_trajectory.data.data.attrs)
|
|
79
|
+
line_set.data.data.attrs.update(collar_df.attrs)
|
|
80
|
+
|
|
81
|
+
return line_set
|
|
82
|
+
|
|
83
|
+
@staticmethod
|
|
84
|
+
def _add_collar_coordinates(combined_df_vertex: pd.DataFrame):
|
|
85
|
+
combined_df_vertex['X_survey'] += combined_df_vertex['X_collar']
|
|
86
|
+
combined_df_vertex['Y_survey'] += combined_df_vertex['Y_collar']
|
|
87
|
+
combined_df_vertex['Z_survey'] += combined_df_vertex['Z_collar']
|
|
88
|
+
|
|
89
|
+
@staticmethod
|
|
90
|
+
def _generate_cells(combined_df_vertex: pd.DataFrame, survey: Survey) -> pd.DataFrame:
|
|
91
|
+
combined_df_cells = []
|
|
92
|
+
previous_index = 0
|
|
93
|
+
for e, well_id in enumerate(survey.ids):
|
|
94
|
+
df_vertex_well = combined_df_vertex[combined_df_vertex['well_id'] == well_id]
|
|
95
|
+
indices = np.arange(len(df_vertex_well)) + previous_index
|
|
96
|
+
previous_index += len(df_vertex_well)
|
|
97
|
+
cells = np.array([indices[:-1], indices[1:]]).T
|
|
98
|
+
df_cells_well = pd.DataFrame(cells, columns=['cell1', 'cell2'])
|
|
99
|
+
df_cells_well['well_id'] = well_id
|
|
100
|
+
df_cells_well['well_id_int'] = e
|
|
101
|
+
combined_df_cells.append(df_cells_well)
|
|
102
|
+
|
|
103
|
+
return pd.concat(combined_df_cells, ignore_index=True)
|
|
104
|
+
|
|
105
|
+
@staticmethod
|
|
106
|
+
def _create_line_set(combined_df_vertex: pd.DataFrame, combined_df_cells: pd.DataFrame, survey: Survey) -> LineSet:
|
|
107
|
+
vertex_attrs = survey.survey_trajectory.data.points_attributes
|
|
108
|
+
cell_attributes = survey.survey_trajectory.data.cell_attributes
|
|
109
|
+
|
|
110
|
+
combined_trajectory_unstruct = UnstructuredData.from_array(
|
|
111
|
+
vertex=combined_df_vertex[['X_survey', 'Y_survey', 'Z_survey']].values,
|
|
112
|
+
cells=combined_df_cells[['cell1', 'cell2']].values,
|
|
113
|
+
vertex_attr=vertex_attrs,
|
|
114
|
+
cells_attr=cell_attributes
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
return LineSet(data=combined_trajectory_unstruct, radius=500)
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import xarray as xr
|
|
4
|
+
from typing import Tuple, Optional, Union, List, Any
|
|
5
|
+
|
|
6
|
+
from subsurface import optional_requirements
|
|
7
|
+
from ...structs.base_structures import UnstructuredData
|
|
8
|
+
from ...structs.base_structures._unstructured_data_constructor import raw_attributes_to_dict_data_arrays
|
|
9
|
+
from ...structs.unstructured_elements import LineSet
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def combine_survey_and_attrs(attrs: pd.DataFrame, survey_trajectory: LineSet,well_id_mapper: dict[str, int]) -> UnstructuredData:
|
|
13
|
+
# Import moved to top for clarity and possibly avoiding repeated imports if called multiple times
|
|
14
|
+
|
|
15
|
+
# Ensure all columns in lith exist in new_attrs, if not, add them as NaN
|
|
16
|
+
new_attrs = _map_attrs_to_measured_depths(attrs, survey_trajectory, well_id_mapper)
|
|
17
|
+
|
|
18
|
+
# Construct the final xarray dict without intermediate variable
|
|
19
|
+
points_attributes_xarray_dict: dict[str, xr.DataArray] = raw_attributes_to_dict_data_arrays(
|
|
20
|
+
default_attributes_name="vertex_attrs",
|
|
21
|
+
n_items=survey_trajectory.data.data["vertex_attrs"].shape[0], # TODO: Can I look this on new_attrs to remove line 11?
|
|
22
|
+
dims=["points", "vertex_attr"],
|
|
23
|
+
raw_attributes=new_attrs
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
# Inline construction of UnstructuredData
|
|
27
|
+
return UnstructuredData.from_data_arrays_dict(
|
|
28
|
+
xarray_dict={
|
|
29
|
+
"vertex" : survey_trajectory.data.data["vertex"],
|
|
30
|
+
"cells" : survey_trajectory.data.data["cells"],
|
|
31
|
+
"vertex_attrs": points_attributes_xarray_dict["vertex_attrs"],
|
|
32
|
+
"cell_attrs" : survey_trajectory.data.data["cell_attrs"]
|
|
33
|
+
},
|
|
34
|
+
xarray_attributes=survey_trajectory.data.data.attrs,
|
|
35
|
+
default_cells_attributes_name=survey_trajectory.data.cells_attr_name,
|
|
36
|
+
default_points_attributes_name=survey_trajectory.data.vertex_attr_name
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _prepare_categorical_data(attrs: pd.DataFrame) -> pd.DataFrame:
|
|
41
|
+
"""
|
|
42
|
+
Prepare categorical data for interpolation by converting categorical columns to numeric IDs.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
attrs: DataFrame containing attribute data
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
Modified DataFrame with categorical data prepared for interpolation
|
|
49
|
+
"""
|
|
50
|
+
# Create a copy to avoid modifying the original
|
|
51
|
+
attrs_copy = attrs.copy()
|
|
52
|
+
|
|
53
|
+
# If component lith exists but lith_ids doesn't, create lith_ids
|
|
54
|
+
if 'component lith' in attrs_copy.columns and 'lith_ids' not in attrs_copy.columns:
|
|
55
|
+
attrs_copy['lith_ids'], _ = pd.factorize(attrs_copy['component lith'], use_na_sentinel=True)
|
|
56
|
+
|
|
57
|
+
return attrs_copy
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _prepare_new_attributes(attrs: pd.DataFrame, survey_trajectory: LineSet) -> pd.DataFrame:
|
|
61
|
+
"""
|
|
62
|
+
Prepare the new attributes DataFrame by adding missing columns from attrs.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
attrs: DataFrame containing attribute data
|
|
66
|
+
survey_trajectory: LineSet containing trajectory data
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
New attributes DataFrame with all necessary columns
|
|
70
|
+
"""
|
|
71
|
+
# Start with a copy of the existing attributes DataFrame
|
|
72
|
+
new_attrs = survey_trajectory.data.points_attributes.copy()
|
|
73
|
+
|
|
74
|
+
# Add missing columns from attrs, preserving their dtypes
|
|
75
|
+
for col in attrs.columns.difference(new_attrs.columns):
|
|
76
|
+
new_attrs[col] = np.nan if pd.api.types.is_numeric_dtype(attrs[col]) else None
|
|
77
|
+
|
|
78
|
+
return new_attrs
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _get_interpolation_locations(attrs_well: pd.DataFrame, well_name: str) -> np.ndarray:
|
|
82
|
+
"""
|
|
83
|
+
Determine the locations to use for interpolation based on top and base values.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
attrs_well: DataFrame containing well attribute data
|
|
87
|
+
well_name: Name of the current well
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
Array of location values to use for interpolation
|
|
91
|
+
"""
|
|
92
|
+
if "base" not in attrs_well.columns:
|
|
93
|
+
raise ValueError(f"Base column must be present in the file for well '{well_name}'.")
|
|
94
|
+
elif "top" not in attrs_well.columns:
|
|
95
|
+
return attrs_well['base'].values
|
|
96
|
+
else:
|
|
97
|
+
return ((attrs_well['top'] + attrs_well['base']) / 2).values
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _nearest_neighbor_categorical_interpolation(
|
|
101
|
+
x_locations: np.ndarray,
|
|
102
|
+
y_values: np.ndarray,
|
|
103
|
+
target_depths: np.ndarray
|
|
104
|
+
) -> np.ndarray:
|
|
105
|
+
"""
|
|
106
|
+
Custom nearest neighbor interpolation for categorical data.
|
|
107
|
+
|
|
108
|
+
This function finds the nearest source point for each target point
|
|
109
|
+
and assigns the corresponding categorical value.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
x_locations: Array of source locations
|
|
113
|
+
y_values: Array of categorical values at source locations
|
|
114
|
+
target_depths: Array of target depths for interpolation
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
Array of interpolated categorical values
|
|
118
|
+
"""
|
|
119
|
+
# Initialize output array with NaN or None values
|
|
120
|
+
result = np.full(target_depths.shape, np.nan, dtype=object)
|
|
121
|
+
|
|
122
|
+
# For each target depth, find the nearest source location
|
|
123
|
+
for i, depth in enumerate(target_depths):
|
|
124
|
+
# Calculate distances to all source locations
|
|
125
|
+
distances = np.abs(x_locations - depth)
|
|
126
|
+
|
|
127
|
+
# Find the index of the minimum distance
|
|
128
|
+
if len(distances) > 0:
|
|
129
|
+
nearest_idx = np.argmin(distances)
|
|
130
|
+
result[i] = y_values[nearest_idx]
|
|
131
|
+
|
|
132
|
+
return result
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _interpolate_attribute(
|
|
136
|
+
attr_values: pd.Series,
|
|
137
|
+
x_locations: np.ndarray,
|
|
138
|
+
target_depths: np.ndarray,
|
|
139
|
+
column_name: str,
|
|
140
|
+
is_categorical: bool
|
|
141
|
+
) -> np.ndarray:
|
|
142
|
+
"""
|
|
143
|
+
Interpolate attribute values to target depths.
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
attr_values: Series containing attribute values
|
|
147
|
+
x_locations: Array of source locations for interpolation
|
|
148
|
+
target_depths: Array of target depths for interpolation
|
|
149
|
+
column_name: Name of the column being interpolated
|
|
150
|
+
is_categorical: Whether the attribute is categorical
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
Array of interpolated values
|
|
154
|
+
"""
|
|
155
|
+
# For categorical data or specific columns, use custom nearest neighbor interpolation
|
|
156
|
+
if is_categorical or column_name in ['lith_ids', 'component lith']:
|
|
157
|
+
return _nearest_neighbor_categorical_interpolation(
|
|
158
|
+
x_locations=x_locations,
|
|
159
|
+
y_values=attr_values.values,
|
|
160
|
+
target_depths=target_depths
|
|
161
|
+
)
|
|
162
|
+
else:
|
|
163
|
+
# For numerical data, use scipy's interp1d with linear interpolation
|
|
164
|
+
scipy = optional_requirements.require_scipy()
|
|
165
|
+
interp_func = scipy.interpolate.interp1d(
|
|
166
|
+
x=x_locations,
|
|
167
|
+
y=attr_values.values,
|
|
168
|
+
bounds_error=False,
|
|
169
|
+
fill_value=np.nan,
|
|
170
|
+
kind='linear'
|
|
171
|
+
)
|
|
172
|
+
return interp_func(target_depths)
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def _map_attrs_to_measured_depths(attrs: pd.DataFrame, survey_trajectory: LineSet, well_id_mapper: dict[str, int]) -> pd.DataFrame:
|
|
176
|
+
"""
|
|
177
|
+
Map attributes to measured depths for each well.
|
|
178
|
+
|
|
179
|
+
Args:
|
|
180
|
+
attrs: DataFrame containing attribute data
|
|
181
|
+
survey_trajectory: LineSet containing trajectory data
|
|
182
|
+
well_id_mapper: Dictionary mapping well names to IDs
|
|
183
|
+
|
|
184
|
+
Returns:
|
|
185
|
+
DataFrame with attributes mapped to measured depths
|
|
186
|
+
"""
|
|
187
|
+
# Extract trajectory data
|
|
188
|
+
trajectory: xr.DataArray = survey_trajectory.data.data["vertex_attrs"]
|
|
189
|
+
trajectory_well_id: xr.DataArray = trajectory.sel({'vertex_attr': 'well_id'})
|
|
190
|
+
measured_depths: np.ndarray = trajectory.sel({'vertex_attr': 'measured_depths'}).values.astype(np.float64)
|
|
191
|
+
|
|
192
|
+
# Prepare data
|
|
193
|
+
attrs: pd.DataFrame = _prepare_categorical_data(attrs)
|
|
194
|
+
new_attrs: pd.DataFrame = _prepare_new_attributes(attrs, survey_trajectory)
|
|
195
|
+
|
|
196
|
+
# Process each well
|
|
197
|
+
for well_name in well_id_mapper:
|
|
198
|
+
# Skip wells not in the attributes DataFrame
|
|
199
|
+
if well_name not in attrs.index:
|
|
200
|
+
print(f"Well '{well_name}' does not exist in the attributes DataFrame.")
|
|
201
|
+
continue
|
|
202
|
+
|
|
203
|
+
# Get well data
|
|
204
|
+
attrs_well = attrs.loc[[well_name]]
|
|
205
|
+
well_id = well_id_mapper.get(well_name)
|
|
206
|
+
well_mask = (trajectory_well_id == well_id).values
|
|
207
|
+
well_depths = measured_depths[well_mask]
|
|
208
|
+
|
|
209
|
+
# Get interpolation locations
|
|
210
|
+
interp_locations = _get_interpolation_locations(attrs_well, well_name)
|
|
211
|
+
|
|
212
|
+
# Interpolate each attribute
|
|
213
|
+
for col in attrs_well.columns:
|
|
214
|
+
# Skip location and ID columns
|
|
215
|
+
if col in ['top', 'base', 'well_id']:
|
|
216
|
+
continue
|
|
217
|
+
|
|
218
|
+
attr_values = attrs_well[col]
|
|
219
|
+
is_categorical = attr_values.dtype == 'O' or isinstance(attr_values.dtype, pd.CategoricalDtype)
|
|
220
|
+
|
|
221
|
+
# Skip columns that can't be interpolated and aren't categorical
|
|
222
|
+
if is_categorical and col not in ['lith_ids', 'component lith']:
|
|
223
|
+
continue
|
|
224
|
+
|
|
225
|
+
# Interpolate and assign values
|
|
226
|
+
interpolated_values = _interpolate_attribute(
|
|
227
|
+
attr_values,
|
|
228
|
+
interp_locations,
|
|
229
|
+
well_depths,
|
|
230
|
+
col,
|
|
231
|
+
is_categorical
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
new_attrs.loc[well_mask, col] = interpolated_values
|
|
235
|
+
|
|
236
|
+
return new_attrs
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
from typing import Hashable, Optional
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pandas as pd
|
|
5
|
+
|
|
6
|
+
from subsurface import optional_requirements
|
|
7
|
+
from ...structs.base_structures import UnstructuredData
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def data_frame_to_unstructured_data(survey_df: 'pd.DataFrame', number_nodes: int, attr_df: Optional['pd.DataFrame'] = None,
|
|
11
|
+
duplicate_attr_depths: bool = False) -> UnstructuredData:
|
|
12
|
+
wp = optional_requirements.require_wellpathpy()
|
|
13
|
+
|
|
14
|
+
cum_vertex: np.ndarray = np.empty((0, 3), dtype=np.float32)
|
|
15
|
+
cells: np.ndarray = np.empty((0, 2), dtype=np.int_)
|
|
16
|
+
cell_attr: pd.DataFrame = pd.DataFrame(columns=['well_id'], dtype=np.float32)
|
|
17
|
+
vertex_attr: pd.DataFrame = pd.DataFrame()
|
|
18
|
+
|
|
19
|
+
for e, (borehole_id, data) in enumerate(survey_df.groupby(level=0)):
|
|
20
|
+
dev = wp.deviation(
|
|
21
|
+
md=data['md'].values,
|
|
22
|
+
inc=data['inc'].values,
|
|
23
|
+
azi=data['azi'].values
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
md_min = dev.md.min()
|
|
27
|
+
md_max = dev.md.max()
|
|
28
|
+
|
|
29
|
+
attr_depths = _grab_depths_from_attr(
|
|
30
|
+
attr_df=attr_df,
|
|
31
|
+
borehole_id=borehole_id,
|
|
32
|
+
duplicate_attr_depths=duplicate_attr_depths,
|
|
33
|
+
md_max=md_max,
|
|
34
|
+
md_min=md_min
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
# Now combine attr_depths with depths
|
|
38
|
+
md_min = dev.md.min()
|
|
39
|
+
md_max = dev.md.max()
|
|
40
|
+
depths = np.linspace(md_min, md_max, number_nodes)
|
|
41
|
+
depths = np.union1d(depths, attr_depths)
|
|
42
|
+
depths.sort()
|
|
43
|
+
|
|
44
|
+
# Resample positions at depths
|
|
45
|
+
pos = dev.minimum_curvature().resample(depths=depths)
|
|
46
|
+
vertex_count = cum_vertex.shape[0]
|
|
47
|
+
|
|
48
|
+
this_well_vertex = np.vstack([pos.easting, pos.northing, pos.depth]).T
|
|
49
|
+
cum_vertex = np.vstack([cum_vertex, this_well_vertex])
|
|
50
|
+
measured_depths = _calculate_distances(array_of_vertices=this_well_vertex)
|
|
51
|
+
|
|
52
|
+
n_vertex_shift_0 = np.arange(0, len(pos.depth) - 1, dtype=np.int_)
|
|
53
|
+
n_vertex_shift_1 = np.arange(1, len(pos.depth), dtype=np.int_)
|
|
54
|
+
cell_per_well = np.vstack([n_vertex_shift_0, n_vertex_shift_1]).T + vertex_count
|
|
55
|
+
cells = np.vstack([cells, cell_per_well])
|
|
56
|
+
|
|
57
|
+
attribute_values = np.isin(depths, attr_depths)
|
|
58
|
+
|
|
59
|
+
vertex_attr_per_well = pd.DataFrame({
|
|
60
|
+
'well_id' : [e] * len(pos.depth),
|
|
61
|
+
'measured_depths': measured_depths,
|
|
62
|
+
'is_attr_point' : attribute_values,
|
|
63
|
+
})
|
|
64
|
+
|
|
65
|
+
vertex_attr = pd.concat([vertex_attr, vertex_attr_per_well], ignore_index=True)
|
|
66
|
+
|
|
67
|
+
# Add the id (e), to cell_attr
|
|
68
|
+
cell_attr = pd.concat([cell_attr, pd.DataFrame({'well_id': [e] * len(cell_per_well)})], ignore_index=True)
|
|
69
|
+
|
|
70
|
+
unstruct = UnstructuredData.from_array(
|
|
71
|
+
vertex=cum_vertex,
|
|
72
|
+
cells=cells.astype(int),
|
|
73
|
+
vertex_attr=vertex_attr.reset_index(drop=True),
|
|
74
|
+
cells_attr=cell_attr.reset_index(drop=True)
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
unstruct.data.attrs["well_id_mapper"] = {well_id: e for e, well_id in enumerate(survey_df.index.unique(level=0))}
|
|
78
|
+
|
|
79
|
+
return unstruct
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _grab_depths_from_attr(
|
|
83
|
+
attr_df: pd.DataFrame,
|
|
84
|
+
borehole_id: Hashable,
|
|
85
|
+
duplicate_attr_depths: bool,
|
|
86
|
+
md_max: float,
|
|
87
|
+
md_min: float
|
|
88
|
+
) -> np.ndarray:
|
|
89
|
+
# Initialize attr_depths and attr_labels as empty arrays
|
|
90
|
+
attr_depths = np.array([], dtype=float)
|
|
91
|
+
attr_labels = np.array([], dtype='<U4') # Initialize labels for 'top' and 'base'
|
|
92
|
+
|
|
93
|
+
if attr_df is None or ("top" not in attr_df.columns and "base" not in attr_df.columns):
|
|
94
|
+
return attr_depths
|
|
95
|
+
|
|
96
|
+
try:
|
|
97
|
+
vals = attr_df.loc[borehole_id]
|
|
98
|
+
|
|
99
|
+
tops = np.array([], dtype=float)
|
|
100
|
+
bases = np.array([], dtype=float)
|
|
101
|
+
|
|
102
|
+
if 'top' in vals:
|
|
103
|
+
if isinstance(vals, pd.DataFrame):
|
|
104
|
+
tops = vals['top'].values.flatten()
|
|
105
|
+
else:
|
|
106
|
+
tops = np.array([vals['top']])
|
|
107
|
+
# Convert to float and remove NaNs
|
|
108
|
+
tops = tops.astype(float)
|
|
109
|
+
tops = tops[~np.isnan(tops)]
|
|
110
|
+
# Clip to within md range
|
|
111
|
+
tops = tops[(tops >= md_min) & (tops <= md_max)]
|
|
112
|
+
|
|
113
|
+
if 'base' in vals:
|
|
114
|
+
if isinstance(vals, pd.DataFrame):
|
|
115
|
+
bases = vals['base'].values.flatten()
|
|
116
|
+
else:
|
|
117
|
+
bases = np.array([vals['base']])
|
|
118
|
+
# Convert to float and remove NaNs
|
|
119
|
+
bases = bases.astype(float)
|
|
120
|
+
bases = bases[~np.isnan(bases)]
|
|
121
|
+
# Clip to within md range
|
|
122
|
+
bases = bases[(bases >= md_min) & (bases <= md_max)]
|
|
123
|
+
|
|
124
|
+
# Combine tops and bases into attr_depths with labels
|
|
125
|
+
attr_depths = np.concatenate((tops, bases))
|
|
126
|
+
attr_labels = np.array(['top'] * len(tops) + ['base'] * len(bases))
|
|
127
|
+
|
|
128
|
+
# Drop duplicates while preserving order
|
|
129
|
+
_, unique_indices = np.unique(attr_depths, return_index=True)
|
|
130
|
+
attr_depths = attr_depths[unique_indices]
|
|
131
|
+
attr_labels = attr_labels[unique_indices]
|
|
132
|
+
|
|
133
|
+
except KeyError:
|
|
134
|
+
# No attributes for this borehole_id or missing columns
|
|
135
|
+
attr_depths = np.array([], dtype=float)
|
|
136
|
+
attr_labels = np.array([], dtype='<U4')
|
|
137
|
+
|
|
138
|
+
# If duplicate_attr_depths is True, duplicate attr_depths with a tiny offset
|
|
139
|
+
if duplicate_attr_depths and len(attr_depths) > 0:
|
|
140
|
+
tiny_offset = (md_max - md_min) * 1e-6 # A tiny fraction of the depth range
|
|
141
|
+
# Create offsets: +tiny_offset for 'top', -tiny_offset for 'base'
|
|
142
|
+
offsets = np.where(attr_labels == 'top', tiny_offset, -tiny_offset)
|
|
143
|
+
duplicated_attr_depths = attr_depths + offsets
|
|
144
|
+
# Ensure the duplicated depths are within the md range
|
|
145
|
+
valid_indices = (duplicated_attr_depths >= md_min) & (duplicated_attr_depths <= md_max)
|
|
146
|
+
duplicated_attr_depths = duplicated_attr_depths[valid_indices]
|
|
147
|
+
# Original attribute depths
|
|
148
|
+
original_attr_depths = attr_depths
|
|
149
|
+
# Combine originals and duplicates
|
|
150
|
+
attr_depths = np.hstack([original_attr_depths, duplicated_attr_depths])
|
|
151
|
+
|
|
152
|
+
return attr_depths
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def _calculate_distances(array_of_vertices: np.ndarray) -> np.ndarray:
|
|
156
|
+
# Calculate the differences between consecutive points
|
|
157
|
+
differences = np.diff(array_of_vertices, axis=0)
|
|
158
|
+
|
|
159
|
+
# Calculate the Euclidean distance for each pair of consecutive points
|
|
160
|
+
distances = np.linalg.norm(differences, axis=1)
|
|
161
|
+
# Insert a 0 at the beginning to represent the starting point at the surface
|
|
162
|
+
measured_depths = np.insert(np.cumsum(distances), 0, 0)
|
|
163
|
+
return measured_depths
|