webviz-subsurface 0.2.39__py3-none-any.whl → 0.2.41__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. webviz_subsurface/_figures/timeseries_figure.py +1 -1
  2. webviz_subsurface/_providers/ensemble_summary_provider/_provider_impl_arrow_lazy.py +3 -1
  3. webviz_subsurface/_providers/ensemble_summary_provider/_provider_impl_arrow_presampled.py +3 -1
  4. webviz_subsurface/_providers/ensemble_table_provider/ensemble_table_provider_impl_arrow.py +3 -1
  5. webviz_subsurface/_utils/dataframe_utils.py +1 -1
  6. webviz_subsurface/_version.py +34 -0
  7. webviz_subsurface/plugins/_bhp_qc/views/_view_functions.py +5 -5
  8. webviz_subsurface/plugins/_co2_migration/__init__.py +1 -0
  9. webviz_subsurface/plugins/{_co2_leakage → _co2_migration}/_plugin.py +86 -46
  10. webviz_subsurface/plugins/{_co2_leakage → _co2_migration}/_utilities/callbacks.py +53 -30
  11. webviz_subsurface/plugins/{_co2_leakage → _co2_migration}/_utilities/co2volume.py +283 -40
  12. webviz_subsurface/plugins/{_co2_leakage → _co2_migration}/_utilities/color_tables.py +1 -1
  13. webviz_subsurface/plugins/{_co2_leakage → _co2_migration}/_utilities/containment_data_provider.py +6 -4
  14. webviz_subsurface/plugins/{_co2_leakage → _co2_migration}/_utilities/containment_info.py +6 -0
  15. webviz_subsurface/plugins/{_co2_leakage → _co2_migration}/_utilities/ensemble_well_picks.py +1 -1
  16. webviz_subsurface/plugins/{_co2_leakage → _co2_migration}/_utilities/generic.py +59 -6
  17. webviz_subsurface/plugins/{_co2_leakage → _co2_migration}/_utilities/initialization.py +73 -10
  18. webviz_subsurface/plugins/{_co2_leakage → _co2_migration}/_utilities/polygon_handler.py +1 -1
  19. webviz_subsurface/plugins/{_co2_leakage → _co2_migration}/_utilities/summary_graphs.py +20 -18
  20. webviz_subsurface/plugins/{_co2_leakage → _co2_migration}/_utilities/surface_publishing.py +18 -20
  21. webviz_subsurface/plugins/{_co2_leakage → _co2_migration}/_utilities/unsmry_data_provider.py +8 -8
  22. webviz_subsurface/plugins/{_co2_leakage → _co2_migration}/views/mainview/mainview.py +98 -44
  23. webviz_subsurface/plugins/{_co2_leakage → _co2_migration}/views/mainview/settings.py +7 -5
  24. webviz_subsurface/plugins/_disk_usage.py +19 -8
  25. webviz_subsurface/plugins/_line_plotter_fmu/controllers/build_figure.py +4 -4
  26. webviz_subsurface/plugins/_map_viewer_fmu/layout.py +2 -1
  27. webviz_subsurface/plugins/_map_viewer_fmu/map_viewer_fmu.py +1 -1
  28. webviz_subsurface/plugins/_parameter_analysis/_utils/_parameters_model.py +5 -5
  29. webviz_subsurface/plugins/_property_statistics/property_statistics.py +1 -1
  30. webviz_subsurface/plugins/_relative_permeability.py +6 -6
  31. webviz_subsurface/plugins/_reservoir_simulation_timeseries_regional.py +12 -12
  32. webviz_subsurface/plugins/_running_time_analysis_fmu.py +6 -1
  33. webviz_subsurface/plugins/_seismic_misfit.py +2 -3
  34. webviz_subsurface/plugins/_simulation_time_series/_views/_subplot_view/_utils/vector_statistics.py +4 -4
  35. webviz_subsurface/plugins/_structural_uncertainty/views/intersection_and_map.py +1 -1
  36. webviz_subsurface/plugins/_swatinit_qc/_business_logic.py +1 -1
  37. webviz_subsurface-0.2.41.dist-info/METADATA +822 -0
  38. {webviz_subsurface-0.2.39.dist-info → webviz_subsurface-0.2.41.dist-info}/RECORD +51 -102
  39. {webviz_subsurface-0.2.39.dist-info → webviz_subsurface-0.2.41.dist-info}/WHEEL +1 -1
  40. {webviz_subsurface-0.2.39.dist-info → webviz_subsurface-0.2.41.dist-info}/entry_points.txt +1 -1
  41. {webviz_subsurface-0.2.39.dist-info → webviz_subsurface-0.2.41.dist-info}/top_level.txt +0 -1
  42. tests/integration_tests/test_parameter_filter.py +0 -28
  43. tests/integration_tests/test_surface_selector.py +0 -53
  44. tests/unit_tests/abbreviations_tests/test_reservoir_simulation.py +0 -94
  45. tests/unit_tests/data_input/__init__.py +0 -0
  46. tests/unit_tests/data_input/test_calc_from_cumulatives.py +0 -178
  47. tests/unit_tests/data_input/test_image_processing.py +0 -11
  48. tests/unit_tests/mocks/__init__.py +0 -0
  49. tests/unit_tests/mocks/ensemble_summary_provider_dummy.py +0 -67
  50. tests/unit_tests/model_tests/__init__.py +0 -0
  51. tests/unit_tests/model_tests/test_ensemble_model.py +0 -176
  52. tests/unit_tests/model_tests/test_ensemble_set_model.py +0 -105
  53. tests/unit_tests/model_tests/test_gruptree_model.py +0 -89
  54. tests/unit_tests/model_tests/test_property_statistics_model.py +0 -42
  55. tests/unit_tests/model_tests/test_surface_set_model.py +0 -48
  56. tests/unit_tests/model_tests/test_well_attributes_model.py +0 -110
  57. tests/unit_tests/model_tests/test_well_set_model.py +0 -70
  58. tests/unit_tests/plugin_tests/__init__.py +0 -0
  59. tests/unit_tests/plugin_tests/test_grouptree.py +0 -175
  60. tests/unit_tests/plugin_tests/test_simulation_time_series/__init__.py +0 -0
  61. tests/unit_tests/plugin_tests/test_simulation_time_series/mocks/__init__.py +0 -0
  62. tests/unit_tests/plugin_tests/test_simulation_time_series/mocks/derived_vectors_accessor_ensemble_summary_provider_mock.py +0 -60
  63. tests/unit_tests/plugin_tests/test_simulation_time_series/test_utils/__init__.py +0 -0
  64. tests/unit_tests/plugin_tests/test_simulation_time_series/test_utils/test_create_vector_traces_utils.py +0 -530
  65. tests/unit_tests/plugin_tests/test_simulation_time_series/test_utils/test_dataframe_utils.py +0 -119
  66. tests/unit_tests/plugin_tests/test_simulation_time_series/test_utils/test_datetime_utils.py +0 -51
  67. tests/unit_tests/plugin_tests/test_simulation_time_series/test_utils/test_delta_ensemble_utils.py +0 -222
  68. tests/unit_tests/plugin_tests/test_simulation_time_series/test_utils/test_derived_delta_ensemble_vectors_accessor_impl.py +0 -319
  69. tests/unit_tests/plugin_tests/test_simulation_time_series/test_utils/test_derived_ensemble_vectors_accessor_impl.py +0 -271
  70. tests/unit_tests/plugin_tests/test_simulation_time_series/test_utils/test_derived_ensemble_vectors_accessor_utils.py +0 -78
  71. tests/unit_tests/plugin_tests/test_simulation_time_series/test_utils/test_derived_vector_accessor.py +0 -57
  72. tests/unit_tests/plugin_tests/test_simulation_time_series/test_utils/test_ensemble_summary_provider_set_utils.py +0 -213
  73. tests/unit_tests/plugin_tests/test_simulation_time_series/test_utils/test_from_timeseries_cumulatives.py +0 -322
  74. tests/unit_tests/plugin_tests/test_simulation_time_series/test_utils/test_history_vectors.py +0 -201
  75. tests/unit_tests/plugin_tests/test_simulation_time_series/test_utils/test_trace_line_shape.py +0 -56
  76. tests/unit_tests/plugin_tests/test_simulation_time_series/test_utils/test_vector_statistics.py +0 -171
  77. tests/unit_tests/plugin_tests/test_tornado_data.py +0 -130
  78. tests/unit_tests/plugin_tests/test_well_completions.py +0 -158
  79. tests/unit_tests/provider_tests/__init__.py +0 -0
  80. tests/unit_tests/provider_tests/test_ensemble_summary_provider.py +0 -255
  81. tests/unit_tests/provider_tests/test_ensemble_summary_provider_impl_arrow_lazy.py +0 -388
  82. tests/unit_tests/provider_tests/test_ensemble_summary_provider_impl_arrow_presampled.py +0 -160
  83. tests/unit_tests/provider_tests/test_ensemble_summary_provider_resampling.py +0 -320
  84. tests/unit_tests/provider_tests/test_ensemble_table_provider.py +0 -190
  85. tests/unit_tests/utils_tests/__init__.py +0 -0
  86. tests/unit_tests/utils_tests/test_dataframe_utils.py +0 -281
  87. tests/unit_tests/utils_tests/test_ensemble_summary_provider_set/__init__.py +0 -0
  88. tests/unit_tests/utils_tests/test_ensemble_summary_provider_set/test_ensemble_summary_provider_set.py +0 -306
  89. tests/unit_tests/utils_tests/test_formatting.py +0 -10
  90. tests/unit_tests/utils_tests/test_simulation_timeseries.py +0 -51
  91. webviz_subsurface/plugins/_co2_leakage/__init__.py +0 -1
  92. webviz_subsurface/plugins/_co2_leakage/_utilities/__init__.py +0 -0
  93. webviz_subsurface/plugins/_co2_leakage/views/__init__.py +0 -0
  94. webviz_subsurface/plugins/_co2_leakage/views/mainview/__init__.py +0 -0
  95. webviz_subsurface-0.2.39.dist-info/METADATA +0 -147
  96. /webviz_subsurface/plugins/{_co2_leakage → _co2_migration}/_error.py +0 -0
  97. /webviz_subsurface/plugins/{_co2_leakage → _co2_migration}/_types.py +0 -0
  98. {tests/integration_tests → webviz_subsurface/plugins/_co2_migration/_utilities}/__init__.py +0 -0
  99. /webviz_subsurface/plugins/{_co2_leakage → _co2_migration}/_utilities/_misc.py +0 -0
  100. /webviz_subsurface/plugins/{_co2_leakage → _co2_migration}/_utilities/fault_polygons_handler.py +0 -0
  101. /webviz_subsurface/plugins/{_co2_leakage → _co2_migration}/_utilities/plume_extent.py +0 -0
  102. {tests/unit_tests → webviz_subsurface/plugins/_co2_migration/views}/__init__.py +0 -0
  103. {tests/unit_tests/abbreviations_tests → webviz_subsurface/plugins/_co2_migration/views/mainview}/__init__.py +0 -0
  104. {webviz_subsurface-0.2.39.dist-info → webviz_subsurface-0.2.41.dist-info/licenses}/LICENSE +0 -0
  105. {webviz_subsurface-0.2.39.dist-info → webviz_subsurface-0.2.41.dist-info/licenses}/LICENSE.chromedriver +0 -0
@@ -1,255 +0,0 @@
1
- import datetime
2
- import os
3
- from pathlib import Path
4
- from typing import Optional
5
-
6
- import pandas as pd
7
-
8
- # The fmu.ensemble dependency resdata is only available for Linux,
9
- # hence, ignore any import exception here to make
10
- # it still possible to use the PvtPlugin on
11
- # machines with other OSes.
12
- #
13
- # NOTE: Functions in this file cannot be used
14
- # on non-Linux OSes.
15
- try:
16
- from fmu.ensemble import ScratchEnsemble
17
- except ImportError:
18
- pass
19
-
20
- from webviz_subsurface._providers import (
21
- EnsembleSummaryProviderFactory,
22
- Frequency,
23
- VectorMetadata,
24
- )
25
-
26
-
27
- # Helper function for generating per-realization CSV files based on aggregated CSV file
28
- def _split_aggr_csv_into_per_real(aggr_csvfile: str, output_folder: str) -> None:
29
- df = pd.read_csv(aggr_csvfile)
30
- df = df[df["ENSEMBLE"] == "iter-0"]
31
- df = df.drop(columns="ENSEMBLE")
32
-
33
- for real in range(0, 10):
34
- real_df = df[df["REAL"] == real]
35
- real_df = real_df.drop(columns="REAL")
36
- os.makedirs(f"{output_folder}/realization-{real}/iter-0/", exist_ok=True)
37
- real_df.to_csv(
38
- f"{output_folder}/realization-{real}/iter-0/smry.csv", index=False
39
- )
40
-
41
-
42
- # Helper function dumping values obtained via fmu to CSV file.
43
- # Used to find the expected values in "arrow" tests
44
- def _dump_smry_to_csv_using_fmu(
45
- ens_path: str, time_index: str, output_csv_file: str
46
- ) -> None:
47
- scratch_ensemble = ScratchEnsemble("tempEnsName", paths=ens_path)
48
- df = scratch_ensemble.load_smry(time_index=time_index)
49
- df.sort_values(["DATE", "REAL"], inplace=True)
50
-
51
- print("Dataframe shape::", df.shape)
52
-
53
- unique_dates = df["DATE"].unique()
54
- print("Num unique dates:", len(unique_dates))
55
- print(unique_dates)
56
-
57
- unique_reals = df["REAL"].unique()
58
- print("Num unique reals:", len(unique_reals))
59
- print(unique_reals)
60
-
61
- df.to_csv(output_csv_file, index=False)
62
-
63
-
64
- def test_create_from_arrow_unsmry_lazy(testdata_folder: Path, tmp_path: Path) -> None:
65
- ensemble_path = str(testdata_folder / "01_drogon_ahm/realization-*/iter-0")
66
-
67
- # Used to generate test results
68
- # _dump_smry_to_csv_using_fmu(ensemble_path, "monthly", "expected_smry.csv")
69
-
70
- factory = EnsembleSummaryProviderFactory(tmp_path, allow_storage_writes=True)
71
- provider = factory.create_from_arrow_unsmry_lazy(
72
- ens_path=ensemble_path, rel_file_pattern="share/results/unsmry/*.arrow"
73
- )
74
-
75
- assert provider.supports_resampling()
76
-
77
- assert provider.vector_metadata("FOPT") is not None
78
-
79
- vecnames = provider.vector_names()
80
- assert len(vecnames) == 931
81
-
82
- dates = provider.dates(Frequency.MONTHLY)
83
- assert len(dates) == 31
84
- assert isinstance(dates[0], datetime.datetime)
85
- assert dates[0] == datetime.datetime(2018, 1, 1)
86
- assert dates[-1] == datetime.datetime(2020, 7, 1)
87
-
88
- realizations = provider.realizations()
89
- assert len(realizations) == 100
90
- assert realizations[0] == 0
91
- assert realizations[-1] == 99
92
-
93
- vecdf = provider.get_vectors_df(["FOPR"], Frequency.MONTHLY)
94
- assert vecdf.shape == (3100, 3)
95
- assert vecdf.columns.tolist() == ["DATE", "REAL", "FOPR"]
96
- assert vecdf["DATE"].nunique() == 31
97
- assert vecdf["REAL"].nunique() == 100
98
- sampleddate = vecdf["DATE"][0]
99
- assert isinstance(sampleddate, datetime.datetime)
100
-
101
- vecdf = provider.get_vectors_df(["FOPR"], Frequency.MONTHLY, [5])
102
- assert vecdf.shape == (31, 3)
103
- assert vecdf.columns.tolist() == ["DATE", "REAL", "FOPR"]
104
- assert vecdf["DATE"].nunique() == 31
105
- assert vecdf["REAL"].nunique() == 1
106
- assert vecdf["REAL"][0] == 5
107
-
108
-
109
- def test_arrow_unsmry_lazy_vector_metadata(
110
- testdata_folder: Path, tmp_path: Path
111
- ) -> None:
112
- ensemble_path = str(testdata_folder / "01_drogon_ahm/realization-*/iter-0")
113
- factory = EnsembleSummaryProviderFactory(tmp_path, allow_storage_writes=True)
114
- provider = factory.create_from_arrow_unsmry_lazy(
115
- ens_path=ensemble_path, rel_file_pattern="share/results/unsmry/*.arrow"
116
- )
117
-
118
- meta: Optional[VectorMetadata] = provider.vector_metadata("FOPR")
119
- assert meta is not None
120
- assert meta.unit == "SM3/DAY"
121
- assert meta.is_total is False
122
- assert meta.is_rate is True
123
- assert meta.is_historical is False
124
- assert meta.keyword == "FOPR"
125
- assert meta.wgname is None
126
- assert meta.get_num == 0
127
-
128
- meta = provider.vector_metadata("WOPTH:A6")
129
- assert meta is not None
130
- assert meta.unit == "SM3"
131
- assert meta.is_total is True
132
- assert meta.is_rate is False
133
- assert meta.is_historical is True
134
- assert meta.keyword == "WOPTH"
135
- assert meta.wgname == "A6"
136
- assert meta.get_num == 11
137
-
138
- meta = provider.vector_metadata("FWCT")
139
- assert meta is not None
140
- assert meta.unit == ""
141
- assert meta.is_total is False
142
- assert meta.is_rate is True
143
- assert meta.is_historical is False
144
- assert meta.keyword == "FWCT"
145
- assert meta.wgname is None
146
- assert meta.get_num == 0
147
-
148
-
149
- def test_create_from_arrow_unsmry_presampled_monthly(
150
- testdata_folder: Path, tmp_path: Path
151
- ) -> None:
152
- ensemble_path = testdata_folder / "01_drogon_ahm/realization-*/iter-0"
153
-
154
- factory = EnsembleSummaryProviderFactory(tmp_path, allow_storage_writes=True)
155
- provider = factory.create_from_arrow_unsmry_presampled(
156
- ens_path=str(ensemble_path),
157
- rel_file_pattern="share/results/unsmry/*.arrow",
158
- sampling_frequency=Frequency.MONTHLY,
159
- )
160
-
161
- assert not provider.supports_resampling()
162
-
163
- assert provider.vector_metadata("FOPT") is not None
164
-
165
- vecnames = provider.vector_names()
166
- assert len(vecnames) == 931
167
-
168
- dates = provider.dates(None)
169
- assert len(dates) == 31
170
- assert isinstance(dates[0], datetime.datetime)
171
- assert dates[0] == datetime.datetime(2018, 1, 1)
172
- assert dates[-1] == datetime.datetime(2020, 7, 1)
173
-
174
- realizations = provider.realizations()
175
- assert len(realizations) == 100
176
- assert realizations[0] == 0
177
- assert realizations[-1] == 99
178
-
179
- vecdf = provider.get_vectors_df(["FOPR"], None)
180
- assert vecdf.shape == (3100, 3)
181
- assert vecdf.columns.tolist() == ["DATE", "REAL", "FOPR"]
182
- assert vecdf["DATE"].nunique() == 31
183
- assert vecdf["REAL"].nunique() == 100
184
- sampleddate = vecdf["DATE"][0]
185
- assert isinstance(sampleddate, datetime.datetime)
186
-
187
-
188
- def test_create_from_per_realization_csv_file(
189
- testdata_folder: Path, tmp_path: Path
190
- ) -> None:
191
- _split_aggr_csv_into_per_real(
192
- str(testdata_folder / "reek_test_data/aggregated_data/smry.csv"),
193
- str(tmp_path / "fake_data"),
194
- )
195
-
196
- factory = EnsembleSummaryProviderFactory(tmp_path, allow_storage_writes=True)
197
-
198
- ens_path = tmp_path / "fake_data/realization-*/iter-0"
199
- csvfile = "smry.csv"
200
- provider = factory.create_from_per_realization_csv_file(str(ens_path), csvfile)
201
-
202
- vecnames = provider.vector_names()
203
- assert len(vecnames) == 16
204
- assert vecnames[0] == "FGIP"
205
- assert vecnames[15] == "YEARS"
206
-
207
- realizations = provider.realizations()
208
- assert len(realizations) == 10
209
-
210
- vecdf = provider.get_vectors_df(["FOPR"], None)
211
- assert vecdf.shape == (380, 3)
212
- assert vecdf.columns.tolist() == ["DATE", "REAL", "FOPR"]
213
- assert vecdf["REAL"].nunique() == 10
214
- sampleddate = vecdf["DATE"][0]
215
- assert isinstance(sampleddate, datetime.datetime)
216
-
217
- vecdf = provider.get_vectors_df(["FOPR"], None, [1])
218
- assert vecdf.shape == (38, 3)
219
- assert vecdf.columns.tolist() == ["DATE", "REAL", "FOPR"]
220
- assert vecdf["REAL"].nunique() == 1
221
-
222
-
223
- def test_create_from_ensemble_csv(testdata_folder: Path, tmp_path: Path) -> None:
224
- factory = EnsembleSummaryProviderFactory(tmp_path, allow_storage_writes=True)
225
-
226
- csv_filename = (
227
- testdata_folder / "reek_test_data" / "aggregated_data" / "smry_hm.csv"
228
- )
229
- provider = factory.create_from_ensemble_csv_file(csv_filename, "iter-0")
230
-
231
- vecnames = provider.vector_names()
232
- assert len(vecnames) == 473
233
- assert vecnames[0] == "BPR:15,28,1"
234
- assert vecnames[472] == "YEARS"
235
-
236
- realizations = provider.realizations()
237
- assert len(realizations) == 10
238
-
239
- dates = provider.dates(None)
240
- assert len(dates) == 38
241
- assert isinstance(dates[0], datetime.datetime)
242
- assert dates[0] == datetime.datetime(2000, 1, 1)
243
- assert dates[-1] == datetime.datetime(2003, 2, 1)
244
-
245
- vecdf = provider.get_vectors_df(["FOPR"], None)
246
- assert vecdf.shape == (380, 3)
247
- assert vecdf.columns.tolist() == ["DATE", "REAL", "FOPR"]
248
- assert vecdf["REAL"].nunique() == 10
249
- sampleddate = vecdf["DATE"][0]
250
- assert isinstance(sampleddate, datetime.datetime)
251
-
252
- vecdf = provider.get_vectors_df(["FOPR"], None, [1])
253
- assert vecdf.shape == (38, 3)
254
- assert vecdf.columns.tolist() == ["DATE", "REAL", "FOPR"]
255
- assert vecdf["REAL"].nunique() == 1
@@ -1,388 +0,0 @@
1
- from datetime import datetime
2
- from pathlib import Path
3
- from typing import Dict
4
-
5
- import numpy as np
6
- import pyarrow as pa
7
- import pyarrow.compute as pc
8
- import pytest
9
-
10
- from webviz_subsurface._providers.ensemble_summary_provider._provider_impl_arrow_lazy import (
11
- Frequency,
12
- ProviderImplArrowLazy,
13
- _find_first_non_increasing_date_pair,
14
- _is_date_column_monotonically_increasing,
15
- )
16
- from webviz_subsurface._providers.ensemble_summary_provider.ensemble_summary_provider import (
17
- EnsembleSummaryProvider,
18
- )
19
-
20
-
21
- def _add_mock_smry_meta_to_table(table: pa.Table) -> pa.Table:
22
- schema = table.schema
23
- for colname in schema.names:
24
- is_rate = bool("_r" in colname)
25
- is_total = bool("_t" in colname)
26
-
27
- metadata = None
28
- if is_rate or is_total:
29
- metadata = {
30
- b"unit": b"N/A",
31
- b"is_rate": b"True" if is_rate else b"False",
32
- b"is_total": b"True" if is_total else b"False",
33
- b"is_historical": b"False",
34
- b"keyword": b"UNKNOWN",
35
- }
36
-
37
- if metadata:
38
- idx = schema.get_field_index(colname)
39
- field = schema.field(idx)
40
- field = field.with_metadata(metadata)
41
- schema = schema.set(idx, field)
42
-
43
- table = table.cast(schema)
44
- return table
45
-
46
-
47
- def _split_into_per_realization_tables(table: pa.Table) -> Dict[int, pa.Table]:
48
- per_real_tables: Dict[int, pa.Table] = {}
49
- unique_reals = table.column("REAL").unique().to_pylist()
50
- for real in unique_reals:
51
- # pylint: disable=no-member
52
- mask = pc.is_in(table["REAL"], value_set=pa.array([real]))
53
- real_table = table.filter(mask).drop(["REAL"])
54
- per_real_tables[real] = real_table
55
-
56
- return per_real_tables
57
-
58
-
59
- def _create_provider_obj_with_data(
60
- input_data: list,
61
- storage_dir: Path,
62
- ) -> EnsembleSummaryProvider:
63
- # Turn rows into columns
64
- columns_with_header = list(zip(*input_data))
65
-
66
- input_dict = {}
67
- for col in columns_with_header:
68
- colname = col[0]
69
- coldata = col[1:]
70
- input_dict[colname] = coldata
71
- input_table = pa.Table.from_pydict(input_dict)
72
-
73
- input_table = _add_mock_smry_meta_to_table(input_table)
74
-
75
- # Split into per realization tables
76
- per_real_tables = _split_into_per_realization_tables(input_table)
77
-
78
- ProviderImplArrowLazy.write_backing_store_from_per_realization_tables(
79
- storage_dir, "dummy_key", per_real_tables
80
- )
81
- new_provider = ProviderImplArrowLazy.from_backing_store(storage_dir, "dummy_key")
82
-
83
- if not new_provider:
84
- raise ValueError("Failed to create EnsembleSummaryProvider")
85
-
86
- return new_provider
87
-
88
-
89
- def test_create_with_dates_after_2262(tmp_path: Path) -> None:
90
- # fmt:off
91
- input_data = [
92
- ["DATE", "REAL", "A"],
93
- [np.datetime64("2000-01-02T00:00", "ms"), 1, 10.0],
94
- [np.datetime64("2500-12-20T23:59", "ms"), 1, 12.0],
95
- [np.datetime64("2500-12-21T22:58", "ms"), 1, 13.0],
96
- ]
97
- # fmt:on
98
- provider = _create_provider_obj_with_data(input_data, tmp_path)
99
-
100
- dates = provider.dates(resampling_frequency=None)
101
- assert len(dates) == 3
102
- assert dates[0] == datetime(2000, 1, 2, 00, 00)
103
- assert dates[1] == datetime(2500, 12, 20, 23, 59)
104
- assert dates[2] == datetime(2500, 12, 21, 22, 58)
105
-
106
-
107
- def test_get_vector_names(tmp_path: Path) -> None:
108
- # fmt:off
109
- input_data = [
110
- ["DATE", "REAL", "A", "C", "Z"],
111
- [np.datetime64("2023-12-20", "ms"), 0, 10.0, 1.0, 0.0 ],
112
- [np.datetime64("2023-12-20", "ms"), 1, 12.0, 1.0, 0.0 ],
113
- [np.datetime64("2023-12-21", "ms"), 1, 13.0, 1.0, 0.0 ],
114
- ]
115
- # fmt:on
116
- provider = _create_provider_obj_with_data(input_data, tmp_path)
117
-
118
- all_vecnames = provider.vector_names()
119
- assert len(all_vecnames) == 3
120
- assert all_vecnames == ["A", "C", "Z"]
121
-
122
- non_const_vec_names = provider.vector_names_filtered_by_value(
123
- exclude_constant_values=True
124
- )
125
- assert len(non_const_vec_names) == 1
126
- assert non_const_vec_names == ["A"]
127
-
128
- non_zero_vec_names = provider.vector_names_filtered_by_value(
129
- exclude_all_values_zero=True
130
- )
131
- assert len(non_zero_vec_names) == 2
132
- assert non_zero_vec_names == ["A", "C"]
133
-
134
- all_realizations = provider.realizations()
135
- assert len(all_realizations) == 2
136
-
137
-
138
- def test_get_dates_without_resampling(tmp_path: Path) -> None:
139
- # fmt:off
140
- input_data = [
141
- ["DATE", "REAL", "A"],
142
- [np.datetime64("2023-12-20", "ms"), 0, 10.0],
143
- [np.datetime64("2023-12-20", "ms"), 1, 12.0],
144
- [np.datetime64("2023-12-21", "ms"), 1, 13.0],
145
- ]
146
- # fmt:on
147
- provider = _create_provider_obj_with_data(input_data, tmp_path)
148
-
149
- all_realizations = provider.realizations()
150
- assert len(all_realizations) == 2
151
-
152
- all_dates = provider.dates(resampling_frequency=None)
153
- assert len(all_dates) == 1
154
- assert isinstance(all_dates[0], datetime)
155
-
156
- r0_dates = provider.dates(resampling_frequency=None, realizations=[0])
157
- r1_dates = provider.dates(resampling_frequency=None, realizations=[1])
158
- assert len(r0_dates) == 1
159
- assert len(r1_dates) == 2
160
-
161
-
162
- def test_get_dates_with_daily_resampling(tmp_path: Path) -> None:
163
- # fmt:off
164
- input_data = [
165
- ["DATE", "REAL", "A",],
166
- [np.datetime64("2020-01-01", "ms"), 0, 10.0],
167
- [np.datetime64("2020-01-04", "ms"), 0, 40.0],
168
- [np.datetime64("2020-01-06", "ms"), 1, 60.0],
169
- ]
170
- # fmt:on
171
- provider = _create_provider_obj_with_data(input_data, tmp_path)
172
-
173
- all_realizations = provider.realizations()
174
- assert len(all_realizations) == 2
175
-
176
- all_dates = provider.dates(resampling_frequency=Frequency.DAILY)
177
- assert len(all_dates) == 6
178
- assert isinstance(all_dates[0], datetime)
179
- assert all_dates[0] == datetime(2020, 1, 1)
180
- assert all_dates[1] == datetime(2020, 1, 2)
181
- assert all_dates[4] == datetime(2020, 1, 5)
182
- assert all_dates[5] == datetime(2020, 1, 6)
183
-
184
- r0_dates = provider.dates(resampling_frequency=Frequency.DAILY, realizations=[0])
185
- assert len(r0_dates) == 4
186
-
187
- r1_dates = provider.dates(resampling_frequency=Frequency.DAILY, realizations=[1])
188
- assert len(r1_dates) == 1
189
-
190
-
191
- def test_get_vector_metadata(tmp_path: Path) -> None:
192
- # fmt:off
193
- input_data = [
194
- ["DATE", "REAL", "A", "B_r", "C_t", "D_r_t"],
195
- [np.datetime64("2023-12-20", "ms"), 0, 1.0, 10.0, 21.0, 31.0 ],
196
- [np.datetime64("2023-12-20", "ms"), 1, 2.0, 12.0, 22.0, 32.0 ],
197
- [np.datetime64("2023-12-21", "ms"), 1, 3.0, 13.0, 23.0, 33.0 ],
198
- ]
199
- # fmt:on
200
- provider = _create_provider_obj_with_data(input_data, tmp_path)
201
-
202
- meta = provider.vector_metadata("A")
203
- assert meta is None
204
-
205
- meta = provider.vector_metadata("B_r")
206
- assert meta and meta.is_rate is True
207
-
208
- meta = provider.vector_metadata("C_t")
209
- assert meta and meta.is_total is True
210
-
211
- meta = provider.vector_metadata("D_r_t")
212
- assert meta and meta.is_rate is True
213
- assert meta and meta.is_total is True
214
-
215
-
216
- def test_get_vectors_without_resampling(tmp_path: Path) -> None:
217
- # fmt:off
218
- input_data = [
219
- ["DATE", "REAL", "A", "B"],
220
- [np.datetime64("2023-12-20", "ms"), 0, 10.0, 21.0],
221
- [np.datetime64("2023-12-20", "ms"), 1, 12.0, 22.0],
222
- [np.datetime64("2023-12-21", "ms"), 1, 13.0, 23.0],
223
- ]
224
- # fmt:on
225
- provider = _create_provider_obj_with_data(input_data, tmp_path)
226
-
227
- all_vecnames = provider.vector_names()
228
- assert len(all_vecnames) == 2
229
-
230
- vecdf = provider.get_vectors_df(["A"], resampling_frequency=None)
231
- assert vecdf.shape == (3, 3)
232
- assert vecdf.columns.tolist() == ["DATE", "REAL", "A"]
233
-
234
- sampleddate = vecdf["DATE"][0]
235
- assert isinstance(sampleddate, datetime)
236
-
237
- vecdf = provider.get_vectors_df(["A"], resampling_frequency=None, realizations=[1])
238
- assert vecdf.shape == (2, 3)
239
- assert vecdf.columns.tolist() == ["DATE", "REAL", "A"]
240
-
241
- vecdf = provider.get_vectors_df(
242
- ["B", "A"], resampling_frequency=None, realizations=[0]
243
- )
244
- assert vecdf.shape == (1, 4)
245
- assert vecdf.columns.tolist() == ["DATE", "REAL", "B", "A"]
246
-
247
-
248
- def test_get_vectors_with_daily_resampling(tmp_path: Path) -> None:
249
- # fmt:off
250
- input_data = [
251
- ["DATE", "REAL", "TOT_t", "RATE_r"],
252
- [np.datetime64("2020-01-01", "ms"), 1, 10.0, 1.0],
253
- [np.datetime64("2020-01-04", "ms"), 1, 40.0, 4.0],
254
- [np.datetime64("2020-01-06", "ms"), 1, 60.0, 6.0],
255
- ]
256
- # fmt:on
257
- provider = _create_provider_obj_with_data(input_data, tmp_path)
258
-
259
- vecdf = provider.get_vectors_df(
260
- ["TOT_t", "RATE_r"], resampling_frequency=Frequency.DAILY
261
- )
262
-
263
- date_arr = vecdf["DATE"].to_numpy()
264
- assert date_arr[0] == np.datetime64("2020-01-01", "ms")
265
- assert date_arr[1] == np.datetime64("2020-01-02", "ms")
266
- assert date_arr[2] == np.datetime64("2020-01-03", "ms")
267
- assert date_arr[3] == np.datetime64("2020-01-04", "ms")
268
- assert date_arr[4] == np.datetime64("2020-01-05", "ms")
269
- assert date_arr[5] == np.datetime64("2020-01-06", "ms")
270
-
271
- # Check interpolation for the total column
272
- tot_arr = vecdf["TOT_t"].to_numpy()
273
- assert tot_arr[0] == 10
274
- assert tot_arr[1] == 20
275
- assert tot_arr[2] == 30
276
- assert tot_arr[3] == 40
277
- assert tot_arr[4] == 50
278
- assert tot_arr[5] == 60
279
-
280
- # Check backfill for the rate column
281
- tot_arr = vecdf["RATE_r"].to_numpy()
282
- assert tot_arr[0] == 1
283
- assert tot_arr[1] == 4
284
- assert tot_arr[2] == 4
285
- assert tot_arr[3] == 4
286
- assert tot_arr[4] == 6
287
- assert tot_arr[5] == 6
288
-
289
-
290
- def test_get_vectors_for_date_without_resampling(tmp_path: Path) -> None:
291
- # fmt:off
292
- input_data = [
293
- ["DATE", "REAL", "A", "B", "C"],
294
- [np.datetime64("2023-12-20", "ms"), 0, 10.0, 21.0, 31.0 ],
295
- [np.datetime64("2023-12-20", "ms"), 1, 12.0, 22.0, 32.0 ],
296
- [np.datetime64("2023-12-21", "ms"), 1, 13.0, 23.0, 33.0 ],
297
- ]
298
- # fmt:on
299
- provider = _create_provider_obj_with_data(input_data, tmp_path)
300
-
301
- all_dates = provider.dates(resampling_frequency=None)
302
- assert len(all_dates) == 1
303
-
304
- date_to_get = all_dates[0]
305
- assert isinstance(date_to_get, datetime)
306
-
307
- vecdf = provider.get_vectors_for_date_df(date_to_get, ["A"])
308
- assert vecdf.shape == (2, 2)
309
- assert vecdf.columns.tolist() == ["REAL", "A"]
310
-
311
- date_to_get = all_dates[0]
312
- vecdf = provider.get_vectors_for_date_df(date_to_get, ["A", "B"], [0])
313
- assert vecdf.shape == (1, 3)
314
- assert vecdf.columns.tolist() == ["REAL", "A", "B"]
315
-
316
- date_to_get = all_dates[0]
317
- vecdf = provider.get_vectors_for_date_df(date_to_get, ["A", "C"], [0])
318
- assert vecdf.shape == (1, 3)
319
- assert vecdf.columns.tolist() == ["REAL", "A", "C"]
320
-
321
-
322
- def test_get_vectors_for_date_with_resampling(tmp_path: Path) -> None:
323
- # fmt:off
324
- input_data = [
325
- ["DATE", "REAL", "TOT_t", "RATE_r"],
326
- [np.datetime64("2020-01-01", "ms"), 1, 10.0, 1.0],
327
- [np.datetime64("2020-01-04", "ms"), 1, 40.0, 4.0],
328
- [np.datetime64("2020-01-06", "ms"), 1, 60.0, 6.0],
329
- ]
330
- # fmt:on
331
- provider = _create_provider_obj_with_data(input_data, tmp_path)
332
-
333
- date_to_get = datetime(2020, 1, 3)
334
-
335
- df = provider.get_vectors_for_date_df(date_to_get, ["TOT_t", "RATE_r"])
336
- assert df.shape == (1, 3)
337
-
338
- assert df["REAL"][0] == 1
339
- assert df["TOT_t"][0] == 30.0
340
- assert df["RATE_r"][0] == 4.0
341
-
342
-
343
- def test_monotonically_increasing_date_util_functions() -> None:
344
- table_with_duplicate = pa.Table.from_pydict(
345
- {
346
- "DATE": [
347
- np.datetime64("2020-01-01", "ms"),
348
- np.datetime64("2020-01-02", "ms"),
349
- np.datetime64("2020-01-02", "ms"),
350
- np.datetime64("2020-01-03", "ms"),
351
- ],
352
- },
353
- )
354
-
355
- table_with_decrease = pa.Table.from_pydict(
356
- {
357
- "DATE": [
358
- np.datetime64("2020-01-01", "ms"),
359
- np.datetime64("2020-01-05", "ms"),
360
- np.datetime64("2020-01-04", "ms"),
361
- np.datetime64("2020-01-10", "ms"),
362
- ],
363
- },
364
- )
365
-
366
- assert not _is_date_column_monotonically_increasing(table_with_duplicate)
367
- offending_pair = _find_first_non_increasing_date_pair(table_with_duplicate)
368
- assert offending_pair[0] == np.datetime64("2020-01-02", "ms")
369
- assert offending_pair[1] == np.datetime64("2020-01-02", "ms")
370
-
371
- assert not _is_date_column_monotonically_increasing(table_with_decrease)
372
- offending_pair = _find_first_non_increasing_date_pair(table_with_decrease)
373
- assert offending_pair[0] == np.datetime64("2020-01-05", "ms")
374
- assert offending_pair[1] == np.datetime64("2020-01-04", "ms")
375
-
376
-
377
- def test_create_with_repeated_dates(tmp_path: Path) -> None:
378
- # fmt:off
379
- input_data = [
380
- ["DATE", "REAL", "A"],
381
- [np.datetime64("2000-01-02T00:00", "ms"), 1, 10.0],
382
- [np.datetime64("2500-12-20T23:59", "ms"), 1, 11.0],
383
- [np.datetime64("2500-12-20T23:59", "ms"), 1, 12.0],
384
- ]
385
- # fmt:on
386
-
387
- with pytest.raises(ValueError):
388
- _create_provider_obj_with_data(input_data, tmp_path)