subsurface-terra 2025.1.0rc15__py3-none-any.whl → 2025.1.0rc17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. subsurface/__init__.py +31 -31
  2. subsurface/_version.py +34 -21
  3. subsurface/api/__init__.py +13 -13
  4. subsurface/api/interfaces/__init__.py +3 -3
  5. subsurface/api/interfaces/stream.py +136 -136
  6. subsurface/api/reader/read_wells.py +78 -78
  7. subsurface/core/geological_formats/boreholes/_combine_trajectories.py +117 -117
  8. subsurface/core/geological_formats/boreholes/_map_attrs_to_survey.py +236 -234
  9. subsurface/core/geological_formats/boreholes/_survey_to_unstruct.py +163 -163
  10. subsurface/core/geological_formats/boreholes/boreholes.py +140 -140
  11. subsurface/core/geological_formats/boreholes/collars.py +26 -26
  12. subsurface/core/geological_formats/boreholes/survey.py +86 -86
  13. subsurface/core/geological_formats/fault.py +47 -47
  14. subsurface/core/reader_helpers/reader_unstruct.py +11 -11
  15. subsurface/core/reader_helpers/readers_data.py +130 -130
  16. subsurface/core/reader_helpers/readers_wells.py +13 -13
  17. subsurface/core/structs/__init__.py +3 -3
  18. subsurface/core/structs/base_structures/__init__.py +2 -2
  19. subsurface/core/structs/base_structures/_aux.py +69 -0
  20. subsurface/core/structs/base_structures/_liquid_earth_mesh.py +121 -121
  21. subsurface/core/structs/base_structures/_unstructured_data_constructor.py +70 -70
  22. subsurface/core/structs/base_structures/base_structures_enum.py +6 -6
  23. subsurface/core/structs/base_structures/structured_data.py +282 -282
  24. subsurface/core/structs/base_structures/unstructured_data.py +338 -319
  25. subsurface/core/structs/structured_elements/octree_mesh.py +10 -10
  26. subsurface/core/structs/structured_elements/structured_grid.py +59 -59
  27. subsurface/core/structs/structured_elements/structured_mesh.py +9 -9
  28. subsurface/core/structs/unstructured_elements/__init__.py +3 -3
  29. subsurface/core/structs/unstructured_elements/line_set.py +72 -72
  30. subsurface/core/structs/unstructured_elements/point_set.py +43 -43
  31. subsurface/core/structs/unstructured_elements/tetrahedron_mesh.py +35 -35
  32. subsurface/core/structs/unstructured_elements/triangular_surface.py +62 -62
  33. subsurface/core/utils/utils_core.py +38 -38
  34. subsurface/modules/reader/__init__.py +13 -13
  35. subsurface/modules/reader/faults/faults.py +80 -80
  36. subsurface/modules/reader/from_binary.py +46 -46
  37. subsurface/modules/reader/mesh/_GOCAD_mesh.py +82 -82
  38. subsurface/modules/reader/mesh/_trimesh_reader.py +447 -447
  39. subsurface/modules/reader/mesh/csv_mesh_reader.py +53 -53
  40. subsurface/modules/reader/mesh/dxf_reader.py +177 -177
  41. subsurface/modules/reader/mesh/glb_reader.py +30 -30
  42. subsurface/modules/reader/mesh/mx_reader.py +232 -232
  43. subsurface/modules/reader/mesh/obj_reader.py +53 -53
  44. subsurface/modules/reader/mesh/omf_mesh_reader.py +43 -43
  45. subsurface/modules/reader/mesh/surface_reader.py +56 -56
  46. subsurface/modules/reader/mesh/surfaces_api.py +41 -41
  47. subsurface/modules/reader/profiles/__init__.py +3 -3
  48. subsurface/modules/reader/profiles/profiles_core.py +197 -197
  49. subsurface/modules/reader/read_netcdf.py +38 -38
  50. subsurface/modules/reader/topography/__init__.py +7 -7
  51. subsurface/modules/reader/topography/topo_core.py +100 -100
  52. subsurface/modules/reader/volume/read_grav3d.py +447 -428
  53. subsurface/modules/reader/volume/read_volume.py +327 -230
  54. subsurface/modules/reader/volume/segy_reader.py +105 -105
  55. subsurface/modules/reader/volume/seismic.py +173 -173
  56. subsurface/modules/reader/volume/volume_utils.py +43 -43
  57. subsurface/modules/reader/wells/DEP/__init__.py +43 -43
  58. subsurface/modules/reader/wells/DEP/_well_files_reader.py +167 -167
  59. subsurface/modules/reader/wells/DEP/_wells_api.py +61 -61
  60. subsurface/modules/reader/wells/DEP/_welly_reader.py +180 -180
  61. subsurface/modules/reader/wells/DEP/pandas_to_welly.py +212 -212
  62. subsurface/modules/reader/wells/_read_to_df.py +57 -57
  63. subsurface/modules/reader/wells/read_borehole_interface.py +148 -148
  64. subsurface/modules/reader/wells/wells_utils.py +68 -68
  65. subsurface/modules/tools/mocking_aux.py +104 -104
  66. subsurface/modules/visualization/__init__.py +2 -2
  67. subsurface/modules/visualization/to_pyvista.py +320 -320
  68. subsurface/modules/writer/to_binary.py +12 -12
  69. subsurface/modules/writer/to_rex/common.py +78 -78
  70. subsurface/modules/writer/to_rex/data_struct.py +74 -74
  71. subsurface/modules/writer/to_rex/gempy_to_rexfile.py +791 -791
  72. subsurface/modules/writer/to_rex/material_encoder.py +44 -44
  73. subsurface/modules/writer/to_rex/mesh_encoder.py +152 -152
  74. subsurface/modules/writer/to_rex/to_rex.py +115 -115
  75. subsurface/modules/writer/to_rex/utils.py +15 -15
  76. subsurface/optional_requirements.py +116 -116
  77. {subsurface_terra-2025.1.0rc15.dist-info → subsurface_terra-2025.1.0rc17.dist-info}/METADATA +194 -194
  78. subsurface_terra-2025.1.0rc17.dist-info/RECORD +99 -0
  79. {subsurface_terra-2025.1.0rc15.dist-info → subsurface_terra-2025.1.0rc17.dist-info}/WHEEL +1 -1
  80. {subsurface_terra-2025.1.0rc15.dist-info → subsurface_terra-2025.1.0rc17.dist-info}/licenses/LICENSE +203 -203
  81. subsurface_terra-2025.1.0rc15.dist-info/RECORD +0 -98
  82. {subsurface_terra-2025.1.0rc15.dist-info → subsurface_terra-2025.1.0rc17.dist-info}/top_level.txt +0 -0
@@ -1,27 +1,27 @@
1
- import pandas as pd
2
- from dataclasses import dataclass
3
-
4
- from ...structs.base_structures import UnstructuredData
5
- from ...structs.base_structures.base_structures_enum import SpecialCellCase
6
- from ...structs.unstructured_elements import PointSet
7
-
8
-
9
- @dataclass
10
- class Collars:
11
- ids: list[str]
12
- collar_loc: PointSet
13
-
14
- @classmethod
15
- def from_df(cls, df: pd.DataFrame):
16
- unstruc: UnstructuredData = UnstructuredData.from_array(
17
- vertex=df[["x", "y", "z"]].values,
18
- cells=SpecialCellCase.POINTS
19
- )
20
- return cls(
21
- ids=df.index.to_list(),
22
- collar_loc=PointSet(data=unstruc)
23
- )
24
-
25
- @property
26
- def data(self):
1
+ import pandas as pd
2
+ from dataclasses import dataclass
3
+
4
+ from ...structs.base_structures import UnstructuredData
5
+ from ...structs.base_structures.base_structures_enum import SpecialCellCase
6
+ from ...structs.unstructured_elements import PointSet
7
+
8
+
9
+ @dataclass
10
+ class Collars:
11
+ ids: list[str]
12
+ collar_loc: PointSet
13
+
14
+ @classmethod
15
+ def from_df(cls, df: pd.DataFrame):
16
+ unstruc: UnstructuredData = UnstructuredData.from_array(
17
+ vertex=df[["x", "y", "z"]].values,
18
+ cells=SpecialCellCase.POINTS
19
+ )
20
+ return cls(
21
+ ids=df.index.to_list(),
22
+ collar_loc=PointSet(data=unstruc)
23
+ )
24
+
25
+ @property
26
+ def data(self):
27
27
  return self.collar_loc.data
@@ -1,86 +1,86 @@
1
- from dataclasses import dataclass
2
- from typing import Union, Hashable, Optional
3
-
4
- import pandas as pd
5
-
6
- from ._map_attrs_to_survey import combine_survey_and_attrs
7
- from ._survey_to_unstruct import data_frame_to_unstructured_data
8
- from ...structs.base_structures import UnstructuredData
9
- from ...structs.unstructured_elements import LineSet
10
-
11
- NUMBER_NODES = 30
12
- RADIUS = 10
13
-
14
-
15
- @dataclass
16
- class Survey:
17
- ids: list[str]
18
- survey_trajectory: LineSet
19
- well_id_mapper: dict[str, int] = None #: This is following the order of the survey csv that can be different that the collars
20
-
21
- @property
22
- def id_to_well_id(self):
23
- # Reverse the well_id_mapper dictionary to map IDs to well names
24
- id_to_well_name_mapper = {v: k for k, v in self.well_id_mapper.items()}
25
- return id_to_well_name_mapper
26
-
27
- @classmethod
28
- def from_df(cls, survey_df: 'pd.DataFrame', attr_df: Optional['pd.DataFrame'] = None, number_nodes: Optional[int] = NUMBER_NODES,
29
- duplicate_attr_depths: bool = False) -> 'Survey':
30
- """
31
- Create a Survey object from two DataFrames containing survey and attribute data.
32
-
33
- :param survey_df: DataFrame containing survey data.
34
- :param attr_df: DataFrame containing attribute data. This is used to make sure the raw data is perfectly aligned.
35
- :param number_nodes: Optional parameter specifying the number of nodes.
36
- :return: A Survey object representing the input data.
37
-
38
- """
39
- trajectories: UnstructuredData = data_frame_to_unstructured_data(
40
- survey_df=_correct_angles(survey_df),
41
- attr_df=attr_df,
42
- number_nodes=number_nodes,
43
- duplicate_attr_depths=duplicate_attr_depths
44
- )
45
- # Grab the unique ids
46
- unique_ids = trajectories.points_attributes["well_id"].unique()
47
-
48
- return cls(
49
- ids=unique_ids,
50
- survey_trajectory=LineSet(data=trajectories, radius=RADIUS),
51
- well_id_mapper=trajectories.data.attrs["well_id_mapper"]
52
- )
53
-
54
- def get_well_string_id(self, well_id: int) -> str:
55
- return self.ids[well_id]
56
-
57
- def get_well_num_id(self, well_string_id: Union[str, Hashable]) -> int:
58
- return self.well_id_mapper.get(well_string_id, None)
59
-
60
- def update_survey_with_lith(self, lith: pd.DataFrame):
61
- unstruct: UnstructuredData = combine_survey_and_attrs(lith, self.survey_trajectory, self.well_id_mapper)
62
- self.survey_trajectory.data = unstruct
63
-
64
- def update_survey_with_attr(self, attrs: pd.DataFrame):
65
- self.survey_trajectory.data = combine_survey_and_attrs(attrs, self.survey_trajectory, self.well_id_mapper)
66
-
67
-
68
- def _correct_angles(df: pd.DataFrame) -> pd.DataFrame:
69
- def correct_inclination(inc: float) -> float:
70
- if inc < 0:
71
- inc = inc % 360 # Normalize to 0-360 range first if negative
72
- if 0 <= inc <= 180:
73
- # add or subtract a very small number to make sure that 0 or 180 are never possible
74
- return inc + 1e-10 if inc == 0 else inc - 1e-10
75
- elif 180 < inc < 360:
76
- return 360 - inc # Reflect angles greater than 180 back into the 0-180 range
77
- else:
78
- raise ValueError(f'Inclination value {inc} is out of the expected range of 0 to 360 degrees')
79
-
80
- def correct_azimuth(azi: float) -> float:
81
- return azi % 360 # Normalize azimuth to 0-360 range
82
-
83
- df['inc'] = df['inc'].apply(correct_inclination)
84
- df['azi'] = df['azi'].apply(correct_azimuth)
85
-
86
- return df
1
+ from dataclasses import dataclass
2
+ from typing import Union, Hashable, Optional
3
+
4
+ import pandas as pd
5
+
6
+ from ._map_attrs_to_survey import combine_survey_and_attrs
7
+ from ._survey_to_unstruct import data_frame_to_unstructured_data
8
+ from ...structs.base_structures import UnstructuredData
9
+ from ...structs.unstructured_elements import LineSet
10
+
11
+ NUMBER_NODES = 30
12
+ RADIUS = 10
13
+
14
+
15
+ @dataclass
16
+ class Survey:
17
+ ids: list[str]
18
+ survey_trajectory: LineSet
19
+ well_id_mapper: dict[str, int] = None #: This is following the order of the survey csv that can be different that the collars
20
+
21
+ @property
22
+ def id_to_well_id(self):
23
+ # Reverse the well_id_mapper dictionary to map IDs to well names
24
+ id_to_well_name_mapper = {v: k for k, v in self.well_id_mapper.items()}
25
+ return id_to_well_name_mapper
26
+
27
+ @classmethod
28
+ def from_df(cls, survey_df: 'pd.DataFrame', attr_df: Optional['pd.DataFrame'] = None, number_nodes: Optional[int] = NUMBER_NODES,
29
+ duplicate_attr_depths: bool = False) -> 'Survey':
30
+ """
31
+ Create a Survey object from two DataFrames containing survey and attribute data.
32
+
33
+ :param survey_df: DataFrame containing survey data.
34
+ :param attr_df: DataFrame containing attribute data. This is used to make sure the raw data is perfectly aligned.
35
+ :param number_nodes: Optional parameter specifying the number of nodes.
36
+ :return: A Survey object representing the input data.
37
+
38
+ """
39
+ trajectories: UnstructuredData = data_frame_to_unstructured_data(
40
+ survey_df=_correct_angles(survey_df),
41
+ attr_df=attr_df,
42
+ number_nodes=number_nodes,
43
+ duplicate_attr_depths=duplicate_attr_depths
44
+ )
45
+ # Grab the unique ids
46
+ unique_ids = trajectories.points_attributes["well_id"].unique()
47
+
48
+ return cls(
49
+ ids=unique_ids,
50
+ survey_trajectory=LineSet(data=trajectories, radius=RADIUS),
51
+ well_id_mapper=trajectories.data.attrs["well_id_mapper"]
52
+ )
53
+
54
+ def get_well_string_id(self, well_id: int) -> str:
55
+ return self.ids[well_id]
56
+
57
+ def get_well_num_id(self, well_string_id: Union[str, Hashable]) -> int:
58
+ return self.well_id_mapper.get(well_string_id, None)
59
+
60
+ def update_survey_with_lith(self, lith: pd.DataFrame):
61
+ unstruct: UnstructuredData = combine_survey_and_attrs(lith, self.survey_trajectory, self.well_id_mapper)
62
+ self.survey_trajectory.data = unstruct
63
+
64
+ def update_survey_with_attr(self, attrs: pd.DataFrame):
65
+ self.survey_trajectory.data = combine_survey_and_attrs(attrs, self.survey_trajectory, self.well_id_mapper)
66
+
67
+
68
+ def _correct_angles(df: pd.DataFrame) -> pd.DataFrame:
69
+ def correct_inclination(inc: float) -> float:
70
+ if inc < 0:
71
+ inc = inc % 360 # Normalize to 0-360 range first if negative
72
+ if 0 <= inc <= 180:
73
+ # add or subtract a very small number to make sure that 0 or 180 are never possible
74
+ return inc + 1e-10 if inc == 0 else inc - 1e-10
75
+ elif 180 < inc < 360:
76
+ return 360 - inc # Reflect angles greater than 180 back into the 0-180 range
77
+ else:
78
+ raise ValueError(f'Inclination value {inc} is out of the expected range of 0 to 360 degrees')
79
+
80
+ def correct_azimuth(azi: float) -> float:
81
+ return azi % 360 # Normalize azimuth to 0-360 range
82
+
83
+ df['inc'] = df['inc'].apply(correct_inclination)
84
+ df['azi'] = df['azi'].apply(correct_azimuth)
85
+
86
+ return df
@@ -1,48 +1,48 @@
1
- import pandas as pd
2
-
3
- from subsurface import optional_requirements
4
-
5
-
6
- class FaultSticks:
7
- def __init__(self, df: pd.DataFrame):
8
- self.df = df
9
-
10
- self.pointcloud = None
11
- self.sticks = None
12
-
13
- def __getattr__(self, attr):
14
- if attr in self.__dict__:
15
- return getattr(self, attr)
16
- return getattr(self.df, attr)
17
-
18
- def __getitem__(self, item):
19
- return self.df[item]
20
-
21
- def plot(self, notebook=False, color="black"):
22
- if not self.pointcloud:
23
- self._make_pointcloud()
24
- self._make_sticks()
25
-
26
- pv = optional_requirements.require_pyvista()
27
- p = pv.Plotter(notebook=notebook)
28
- p.add_mesh(self.pointcloud, color=color)
29
- for stick in self.sticks:
30
- p.add_mesh(stick, color=color)
31
- p.show()
32
-
33
- def _make_pointcloud(self):
34
- pv = optional_requirements.require_pyvista()
35
- self.pointcloud = pv.PolyData(self.df[["X", "Y", "Z"]].values)
36
-
37
- def _make_sticks(self):
38
- pv = optional_requirements.require_pyvista()
39
- lines = []
40
- for stick, indices in self.df.groupby("stick id").groups.items():
41
- stickdf = self.df.loc[indices]
42
- for (r1, row1), (r2, row2) in zip(stickdf[:-1].iterrows(), stickdf[1:].iterrows()):
43
- line = pv.Line(
44
- pointa=(row1.X, row1.Y, row1.Z),
45
- pointb=(row2.X, row2.Y, row2.Z),
46
- )
47
- lines.append(line)
1
+ import pandas as pd
2
+
3
+ from subsurface import optional_requirements
4
+
5
+
6
+ class FaultSticks:
7
+ def __init__(self, df: pd.DataFrame):
8
+ self.df = df
9
+
10
+ self.pointcloud = None
11
+ self.sticks = None
12
+
13
+ def __getattr__(self, attr):
14
+ if attr in self.__dict__:
15
+ return getattr(self, attr)
16
+ return getattr(self.df, attr)
17
+
18
+ def __getitem__(self, item):
19
+ return self.df[item]
20
+
21
+ def plot(self, notebook=False, color="black"):
22
+ if not self.pointcloud:
23
+ self._make_pointcloud()
24
+ self._make_sticks()
25
+
26
+ pv = optional_requirements.require_pyvista()
27
+ p = pv.Plotter(notebook=notebook)
28
+ p.add_mesh(self.pointcloud, color=color)
29
+ for stick in self.sticks:
30
+ p.add_mesh(stick, color=color)
31
+ p.show()
32
+
33
+ def _make_pointcloud(self):
34
+ pv = optional_requirements.require_pyvista()
35
+ self.pointcloud = pv.PolyData(self.df[["X", "Y", "Z"]].values)
36
+
37
+ def _make_sticks(self):
38
+ pv = optional_requirements.require_pyvista()
39
+ lines = []
40
+ for stick, indices in self.df.groupby("stick id").groups.items():
41
+ stickdf = self.df.loc[indices]
42
+ for (r1, row1), (r2, row2) in zip(stickdf[:-1].iterrows(), stickdf[1:].iterrows()):
43
+ line = pv.Line(
44
+ pointa=(row1.X, row1.Y, row1.Z),
45
+ pointb=(row2.X, row2.Y, row2.Z),
46
+ )
47
+ lines.append(line)
48
48
  self.sticks = lines
@@ -1,11 +1,11 @@
1
- from dataclasses import dataclass
2
-
3
- from subsurface.core.reader_helpers.readers_data import GenericReaderFilesHelper
4
-
5
-
6
- @dataclass
7
- class ReaderUnstructuredHelper:
8
- reader_vertex_args: GenericReaderFilesHelper
9
- reader_cells_args: GenericReaderFilesHelper = None
10
- reader_vertex_attr_args: GenericReaderFilesHelper = None
11
- reader_cells_attr_args: GenericReaderFilesHelper = None
1
+ from dataclasses import dataclass
2
+
3
+ from subsurface.core.reader_helpers.readers_data import GenericReaderFilesHelper
4
+
5
+
6
+ @dataclass
7
+ class ReaderUnstructuredHelper:
8
+ reader_vertex_args: GenericReaderFilesHelper
9
+ reader_cells_args: GenericReaderFilesHelper = None
10
+ reader_vertex_attr_args: GenericReaderFilesHelper = None
11
+ reader_cells_attr_args: GenericReaderFilesHelper = None
@@ -1,131 +1,131 @@
1
- import enum
2
- import pathlib
3
- import pandas as pd
4
-
5
- from subsurface.core.utils.utils_core import get_extension
6
- from pydantic import BaseModel, Field, model_validator
7
- from typing import Union, List, Optional
8
-
9
- if pd.__version__ < '1.4.0':
10
- pass
11
- elif pd.__version__ >= '1.4.0':
12
- from pandas._typing import FilePath, ReadCsvBuffer
13
-
14
- fb = Union[FilePath, ReadCsvBuffer[bytes], ReadCsvBuffer[str]]
15
-
16
-
17
- class SupportedFormats(str, enum.Enum):
18
- DXF = "dxf"
19
- DXFStream = "dxfstream"
20
- CSV = "csv"
21
- JSON = "json"
22
- XLXS = "xlsx"
23
-
24
-
25
- class GenericReaderFilesHelper(BaseModel):
26
- file_or_buffer: Union[str, bytes, pathlib.Path, dict]
27
- usecols: Optional[Union[List[str], List[int]]] = None
28
- col_names: Optional[List[Union[str, int]]] = None
29
- drop_cols: Optional[List[str]] = None
30
- format: Optional[SupportedFormats] = None
31
- separator: Optional[str] = None
32
- index_map: Optional[dict] = None # Adjusted for serialization
33
- columns_map: Optional[dict] = None # Adjusted for serialization
34
- additional_reader_kwargs: dict = Field(default_factory=dict)
35
- encoding: str = "ISO-8859-1"
36
- index_col: Optional[Union[int, str, bool]] = False
37
- header: Union[None, int, List[int]] = 0
38
-
39
- # Computed fields
40
- file_or_buffer_type: str = Field(init=False)
41
-
42
- @model_validator(mode="before")
43
- def set_format_and_file_type(cls, values):
44
- file_or_buffer = values.get('file_or_buffer')
45
- format = values.get('format')
46
-
47
- # Determine format if not provided
48
- if format is None and file_or_buffer is not None:
49
- extension = get_extension(file_or_buffer)
50
- format_map = {
51
- ".dxf" : SupportedFormats.DXF,
52
- ".csv" : SupportedFormats.CSV,
53
- ".json": SupportedFormats.JSON,
54
- ".xlsx": SupportedFormats.XLXS,
55
- }
56
- format = format_map.get(extension.lower())
57
- values['format'] = format
58
-
59
- # Set file_or_buffer_type as a string representation
60
- if file_or_buffer is not None:
61
- values['file_or_buffer_type'] = type(file_or_buffer).__name__
62
- else:
63
- values['file_or_buffer_type'] = None
64
-
65
- return values
66
-
67
- # Custom validation for index_col to explicitly handle None
68
-
69
- @model_validator(mode="before")
70
- def validate_additional_reader_kwargs(cls, values):
71
- additional_reader_kwargs = values.get('additional_reader_kwargs')
72
- # Make sure that if any of the values is a regex expression that it is properly parsed like "delimiter":"\\\\s{2,}" to delimiter="\s{2,}"
73
- if additional_reader_kwargs is not None:
74
- for key, value in additional_reader_kwargs.items():
75
- if isinstance(value, str):
76
- additional_reader_kwargs[key] = value.replace("\\\\", "\\")
77
-
78
- return values
79
-
80
-
81
- @model_validator(mode="before")
82
- def validate_index_col(cls, values):
83
- index_col = values.get('index_col')
84
- # Allow None explicitly
85
- if index_col is None:
86
- values['index_col'] = False
87
- else:
88
- # Ensure index_col is either int, str, or bool
89
- if not isinstance(index_col, (int, str, bool)):
90
- raise ValueError(f"Invalid value for index_col: {index_col}. Must be int, str, bool, or None.")
91
-
92
- return values
93
-
94
- # Validator to handle negative header values. If -1 is the same as null, other raise an error
95
- @model_validator(mode="before")
96
- def validate_header(cls, values):
97
- header = values.get('header')
98
- if header == -1:
99
- values['header'] = None
100
- header = None
101
- if header is not None and header < 0:
102
- raise ValueError(f"Invalid value for header: {header}. Must be None, 0, or positive integer.")
103
- return values
104
-
105
- # If col names is null or empy list it should be None
106
- @model_validator(mode="before")
107
- def validate_col_names(cls, values):
108
- col_names = values.get('col_names')
109
- if col_names is None or col_names == []:
110
- values['col_names'] = None
111
- return values
112
-
113
- @property
114
- def pandas_reader_kwargs(self):
115
- attr_dict = {
116
- "names" : self.col_names,
117
- "header" : self.header,
118
- "index_col": self.index_col,
119
- "usecols" : self.usecols,
120
- "encoding" : self.encoding
121
- }
122
- # Check if delimiter or separator is in additional_reader_kwargs if not add it here
123
- if self.additional_reader_kwargs:
124
- delimiter = self.additional_reader_kwargs.get("delimiter", None)
125
- else:
126
- delimiter = None
127
- if self.separator is not None and delimiter is None:
128
- attr_dict["sep"] = self.separator
129
-
130
- return {**attr_dict, **self.additional_reader_kwargs}
1
+ import enum
2
+ import pathlib
3
+ import pandas as pd
4
+
5
+ from subsurface.core.utils.utils_core import get_extension
6
+ from pydantic import BaseModel, Field, model_validator
7
+ from typing import Union, List, Optional
8
+
9
+ if pd.__version__ < '1.4.0':
10
+ pass
11
+ elif pd.__version__ >= '1.4.0':
12
+ from pandas._typing import FilePath, ReadCsvBuffer
13
+
14
+ fb = Union[FilePath, ReadCsvBuffer[bytes], ReadCsvBuffer[str]]
15
+
16
+
17
+ class SupportedFormats(str, enum.Enum):
18
+ DXF = "dxf"
19
+ DXFStream = "dxfstream"
20
+ CSV = "csv"
21
+ JSON = "json"
22
+ XLXS = "xlsx"
23
+
24
+
25
+ class GenericReaderFilesHelper(BaseModel):
26
+ file_or_buffer: Union[str, bytes, pathlib.Path, dict]
27
+ usecols: Optional[Union[List[str], List[int]]] = None
28
+ col_names: Optional[List[Union[str, int]]] = None
29
+ drop_cols: Optional[List[str]] = None
30
+ format: Optional[SupportedFormats] = None
31
+ separator: Optional[str] = None
32
+ index_map: Optional[dict] = None # Adjusted for serialization
33
+ columns_map: Optional[dict] = None # Adjusted for serialization
34
+ additional_reader_kwargs: dict = Field(default_factory=dict)
35
+ encoding: str = "ISO-8859-1"
36
+ index_col: Optional[Union[int, str, bool]] = False
37
+ header: Union[None, int, List[int]] = 0
38
+
39
+ # Computed fields
40
+ file_or_buffer_type: str = Field(init=False)
41
+
42
+ @model_validator(mode="before")
43
+ def set_format_and_file_type(cls, values):
44
+ file_or_buffer = values.get('file_or_buffer')
45
+ format = values.get('format')
46
+
47
+ # Determine format if not provided
48
+ if format is None and file_or_buffer is not None:
49
+ extension = get_extension(file_or_buffer)
50
+ format_map = {
51
+ ".dxf" : SupportedFormats.DXF,
52
+ ".csv" : SupportedFormats.CSV,
53
+ ".json": SupportedFormats.JSON,
54
+ ".xlsx": SupportedFormats.XLXS,
55
+ }
56
+ format = format_map.get(extension.lower())
57
+ values['format'] = format
58
+
59
+ # Set file_or_buffer_type as a string representation
60
+ if file_or_buffer is not None:
61
+ values['file_or_buffer_type'] = type(file_or_buffer).__name__
62
+ else:
63
+ values['file_or_buffer_type'] = None
64
+
65
+ return values
66
+
67
+ # Custom validation for index_col to explicitly handle None
68
+
69
+ @model_validator(mode="before")
70
+ def validate_additional_reader_kwargs(cls, values):
71
+ additional_reader_kwargs = values.get('additional_reader_kwargs')
72
+ # Make sure that if any of the values is a regex expression that it is properly parsed like "delimiter":"\\\\s{2,}" to delimiter="\s{2,}"
73
+ if additional_reader_kwargs is not None:
74
+ for key, value in additional_reader_kwargs.items():
75
+ if isinstance(value, str):
76
+ additional_reader_kwargs[key] = value.replace("\\\\", "\\")
77
+
78
+ return values
79
+
80
+
81
+ @model_validator(mode="before")
82
+ def validate_index_col(cls, values):
83
+ index_col = values.get('index_col')
84
+ # Allow None explicitly
85
+ if index_col is None:
86
+ values['index_col'] = False
87
+ else:
88
+ # Ensure index_col is either int, str, or bool
89
+ if not isinstance(index_col, (int, str, bool)):
90
+ raise ValueError(f"Invalid value for index_col: {index_col}. Must be int, str, bool, or None.")
91
+
92
+ return values
93
+
94
+ # Validator to handle negative header values. If -1 is the same as null, other raise an error
95
+ @model_validator(mode="before")
96
+ def validate_header(cls, values):
97
+ header = values.get('header')
98
+ if header == -1:
99
+ values['header'] = None
100
+ header = None
101
+ if header is not None and header < 0:
102
+ raise ValueError(f"Invalid value for header: {header}. Must be None, 0, or positive integer.")
103
+ return values
104
+
105
+ # If col names is null or empy list it should be None
106
+ @model_validator(mode="before")
107
+ def validate_col_names(cls, values):
108
+ col_names = values.get('col_names')
109
+ if col_names is None or col_names == []:
110
+ values['col_names'] = None
111
+ return values
112
+
113
+ @property
114
+ def pandas_reader_kwargs(self):
115
+ attr_dict = {
116
+ "names" : self.col_names,
117
+ "header" : self.header,
118
+ "index_col": self.index_col,
119
+ "usecols" : self.usecols,
120
+ "encoding" : self.encoding
121
+ }
122
+ # Check if delimiter or separator is in additional_reader_kwargs if not add it here
123
+ if self.additional_reader_kwargs:
124
+ delimiter = self.additional_reader_kwargs.get("delimiter", None)
125
+ else:
126
+ delimiter = None
127
+ if self.separator is not None and delimiter is None:
128
+ attr_dict["sep"] = self.separator
129
+
130
+ return {**attr_dict, **self.additional_reader_kwargs}
131
131
 
@@ -1,13 +1,13 @@
1
- from typing import List
2
-
3
- from dataclasses import dataclass
4
-
5
- from subsurface.core.reader_helpers.readers_data import GenericReaderFilesHelper
6
-
7
-
8
- @dataclass
9
- class ReaderWellsHelper:
10
- reader_collars_args: GenericReaderFilesHelper
11
- reader_survey_args: GenericReaderFilesHelper
12
- reader_lith_args: GenericReaderFilesHelper = None
13
- reader_attr_args: List[GenericReaderFilesHelper] = None
1
+ from typing import List
2
+
3
+ from dataclasses import dataclass
4
+
5
+ from subsurface.core.reader_helpers.readers_data import GenericReaderFilesHelper
6
+
7
+
8
+ @dataclass
9
+ class ReaderWellsHelper:
10
+ reader_collars_args: GenericReaderFilesHelper
11
+ reader_survey_args: GenericReaderFilesHelper
12
+ reader_lith_args: GenericReaderFilesHelper = None
13
+ reader_attr_args: List[GenericReaderFilesHelper] = None
@@ -1,3 +1,3 @@
1
- from .base_structures import StructuredData, UnstructuredData
2
- from .unstructured_elements import PointSet, TriSurf, LineSet
3
- from .structured_elements import StructuredGrid
1
+ from .base_structures import StructuredData, UnstructuredData
2
+ from .unstructured_elements import PointSet, TriSurf, LineSet
3
+ from .structured_elements import StructuredGrid
@@ -1,2 +1,2 @@
1
- from .unstructured_data import UnstructuredData
2
- from .structured_data import StructuredData
1
+ from .unstructured_data import UnstructuredData
2
+ from .structured_data import StructuredData