snowpark-checkpoints-validators 0.2.0rc1__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. snowflake/snowpark_checkpoints/__init__.py +44 -0
  2. snowflake/snowpark_checkpoints/__version__.py +16 -0
  3. snowflake/snowpark_checkpoints/checkpoint.py +580 -0
  4. snowflake/snowpark_checkpoints/errors.py +60 -0
  5. snowflake/snowpark_checkpoints/io_utils/__init__.py +26 -0
  6. snowflake/snowpark_checkpoints/io_utils/io_default_strategy.py +57 -0
  7. snowflake/snowpark_checkpoints/io_utils/io_env_strategy.py +133 -0
  8. snowflake/snowpark_checkpoints/io_utils/io_file_manager.py +76 -0
  9. snowflake/snowpark_checkpoints/job_context.py +128 -0
  10. snowflake/snowpark_checkpoints/singleton.py +23 -0
  11. snowflake/snowpark_checkpoints/snowpark_sampler.py +124 -0
  12. snowflake/snowpark_checkpoints/spark_migration.py +255 -0
  13. snowflake/snowpark_checkpoints/utils/__init__.py +14 -0
  14. snowflake/snowpark_checkpoints/utils/constants.py +134 -0
  15. snowflake/snowpark_checkpoints/utils/extra_config.py +132 -0
  16. snowflake/snowpark_checkpoints/utils/logging_utils.py +67 -0
  17. snowflake/snowpark_checkpoints/utils/pandera_check_manager.py +399 -0
  18. snowflake/snowpark_checkpoints/utils/supported_types.py +65 -0
  19. snowflake/snowpark_checkpoints/utils/telemetry.py +939 -0
  20. snowflake/snowpark_checkpoints/utils/utils_checks.py +398 -0
  21. snowflake/snowpark_checkpoints/validation_result_metadata.py +159 -0
  22. snowflake/snowpark_checkpoints/validation_results.py +49 -0
  23. snowpark_checkpoints_validators-0.3.0.dist-info/METADATA +325 -0
  24. snowpark_checkpoints_validators-0.3.0.dist-info/RECORD +26 -0
  25. snowpark_checkpoints_validators-0.2.0rc1.dist-info/METADATA +0 -514
  26. snowpark_checkpoints_validators-0.2.0rc1.dist-info/RECORD +0 -4
  27. {snowpark_checkpoints_validators-0.2.0rc1.dist-info → snowpark_checkpoints_validators-0.3.0.dist-info}/WHEEL +0 -0
  28. {snowpark_checkpoints_validators-0.2.0rc1.dist-info → snowpark_checkpoints_validators-0.3.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,398 @@
1
+ # Copyright 2025 Snowflake Inc.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import inspect
17
+ import json
18
+ import logging
19
+ import os
20
+ import re
21
+
22
+ from datetime import datetime
23
+ from typing import Any, Optional
24
+
25
+ import numpy as np
26
+
27
+ from pandera import DataFrameSchema
28
+
29
+ from snowflake.snowpark import DataFrame as SnowparkDataFrame
30
+ from snowflake.snowpark_checkpoints.errors import SchemaValidationError
31
+ from snowflake.snowpark_checkpoints.io_utils.io_file_manager import get_io_file_manager
32
+ from snowflake.snowpark_checkpoints.job_context import SnowparkJobContext
33
+ from snowflake.snowpark_checkpoints.snowpark_sampler import (
34
+ SamplingAdapter,
35
+ SamplingStrategy,
36
+ )
37
+ from snowflake.snowpark_checkpoints.utils.constants import (
38
+ CHECKPOINT_JSON_OUTPUT_FILE_FORMAT_NAME,
39
+ CHECKPOINT_TABLE_NAME_FORMAT,
40
+ COLUMNS_KEY,
41
+ DATAFRAME_CUSTOM_DATA_KEY,
42
+ DATAFRAME_EXECUTION_MODE,
43
+ DATAFRAME_PANDERA_SCHEMA_KEY,
44
+ DEFAULT_KEY,
45
+ EXCEPT_HASH_AGG_QUERY,
46
+ FAIL_STATUS,
47
+ PASS_STATUS,
48
+ SNOWPARK_CHECKPOINTS_OUTPUT_DIRECTORY_NAME,
49
+ )
50
+ from snowflake.snowpark_checkpoints.utils.extra_config import (
51
+ get_checkpoint_file,
52
+ )
53
+ from snowflake.snowpark_checkpoints.utils.pandera_check_manager import (
54
+ PanderaCheckManager,
55
+ )
56
+ from snowflake.snowpark_checkpoints.utils.telemetry import STATUS_KEY, report_telemetry
57
+ from snowflake.snowpark_checkpoints.validation_result_metadata import (
58
+ ValidationResultsMetadata,
59
+ )
60
+ from snowflake.snowpark_checkpoints.validation_results import ValidationResult
61
+
62
+
63
+ LOGGER = logging.getLogger(__name__)
64
+
65
+
66
+ def _replace_special_characters(checkpoint_name: str) -> str:
67
+ """Replace special characters in the checkpoint name with underscores.
68
+
69
+ Args:
70
+ checkpoint_name (str): The checkpoint name to process.
71
+
72
+ Returns:
73
+ str: The checkpoint name with special characters replaced by underscores.
74
+
75
+ """
76
+ regex = r"^[a-zA-Z_\s-][a-zA-Z0-9$_\s-]*$"
77
+ if not bool(re.match(regex, checkpoint_name)):
78
+ raise ValueError(
79
+ f"Invalid checkpoint name: {checkpoint_name}",
80
+ "Checkpoint name must contain only alphanumeric characters, hyphens, underscores and dollar signs.",
81
+ )
82
+ return re.sub(r"[\s-]", "_", checkpoint_name)
83
+
84
+
85
+ def _process_sampling(
86
+ df: SnowparkDataFrame,
87
+ pandera_schema: DataFrameSchema,
88
+ job_context: Optional[SnowparkJobContext] = None,
89
+ sample_frac: Optional[float] = 1.0,
90
+ sample_number: Optional[int] = None,
91
+ sampling_strategy: Optional[SamplingStrategy] = SamplingStrategy.RANDOM_SAMPLE,
92
+ ):
93
+ """Process a Snowpark DataFrame by sampling it according to the specified parameters.
94
+
95
+ Adjusts the column casing of the provided Pandera schema to uppercase.
96
+
97
+ Args:
98
+ df (SnowparkDataFrame): The Snowpark DataFrame to be sampled.
99
+ pandera_schema (DataFrameSchema): The Pandera schema to validate the DataFrame.
100
+ job_context (SnowparkJobContext, optional): The job context for the sampling operation.
101
+ Defaults to None.
102
+ sample_frac (Optional[float], optional): The fraction of rows to sample.
103
+ Defaults to 0.1.
104
+ sample_number (Optional[int], optional): The number of rows to sample.
105
+ Defaults to None.
106
+ sampling_strategy (Optional[SamplingStrategy], optional): The strategy to use for sampling.
107
+ Defaults to SamplingStrategy.RANDOM_SAMPLE.
108
+
109
+ Returns:
110
+ Tuple[DataFrameSchema, pd.DataFrame]: A tuple containing the adjusted Pandera schema with uppercase column names
111
+ and the sampled pandas DataFrame.
112
+
113
+ """
114
+ sampler = SamplingAdapter(
115
+ job_context, sample_frac, sample_number, sampling_strategy
116
+ )
117
+ sampler.process_args([df])
118
+
119
+ # fix up the column casing
120
+ pandera_schema_upper = pandera_schema
121
+ new_columns: dict[Any, Any] = {}
122
+
123
+ for col in pandera_schema.columns:
124
+ new_columns[col.upper()] = pandera_schema.columns[col]
125
+
126
+ pandera_schema_upper = pandera_schema_upper.remove_columns(pandera_schema.columns)
127
+ pandera_schema_upper = pandera_schema_upper.add_columns(new_columns)
128
+
129
+ sample_df = sampler.get_sampled_pandas_args()[0]
130
+ sample_df.index = np.ones(sample_df.count().iloc[0])
131
+
132
+ return pandera_schema_upper, sample_df
133
+
134
+
135
+ def _generate_schema(
136
+ checkpoint_name: str, output_path: Optional[str] = None
137
+ ) -> DataFrameSchema:
138
+ """Generate a DataFrameSchema based on the checkpoint name provided.
139
+
140
+ This function reads a JSON file corresponding to the checkpoint name,
141
+ extracts schema information, and constructs a DataFrameSchema object.
142
+ It also adds custom checks for numeric and boolean types if specified
143
+ in the JSON file.
144
+
145
+ Args:
146
+ checkpoint_name (str): The name of the checkpoint used to locate
147
+ the JSON file containing schema information.
148
+ output_path (str): The path to the output directory.
149
+
150
+ DataFrameSchema: A schema object representing the structure and
151
+ constraints of the DataFrame.
152
+ constraints of the DataFrame.
153
+
154
+ """
155
+ LOGGER.info(
156
+ "Generating Pandera DataFrameSchema for checkpoint: '%s'", checkpoint_name
157
+ )
158
+ current_directory_path = (
159
+ output_path if output_path else get_io_file_manager().getcwd()
160
+ )
161
+
162
+ output_directory_path = os.path.join(
163
+ current_directory_path, SNOWPARK_CHECKPOINTS_OUTPUT_DIRECTORY_NAME
164
+ )
165
+
166
+ if not get_io_file_manager().folder_exists(output_directory_path):
167
+ raise ValueError(
168
+ """Output directory snowpark-checkpoints-output does not exist.
169
+ Please run the Snowpark checkpoint collector first."""
170
+ )
171
+
172
+ checkpoint_schema_file_path = os.path.join(
173
+ output_directory_path,
174
+ CHECKPOINT_JSON_OUTPUT_FILE_FORMAT_NAME.format(checkpoint_name),
175
+ )
176
+
177
+ if not get_io_file_manager().file_exists(checkpoint_schema_file_path):
178
+ raise ValueError(
179
+ f"Checkpoint {checkpoint_name} JSON file not found. Please run the Snowpark checkpoint collector first."
180
+ )
181
+
182
+ LOGGER.info("Reading schema from file: '%s'", checkpoint_schema_file_path)
183
+ schema_file = get_io_file_manager().read(checkpoint_schema_file_path)
184
+ checkpoint_schema_config = json.loads(schema_file)
185
+
186
+ if DATAFRAME_PANDERA_SCHEMA_KEY not in checkpoint_schema_config:
187
+ raise ValueError(
188
+ f"Pandera schema not found in the JSON file for checkpoint: {checkpoint_name}"
189
+ )
190
+
191
+ schema_dict = checkpoint_schema_config.get(DATAFRAME_PANDERA_SCHEMA_KEY)
192
+ schema_dict_str = json.dumps(schema_dict)
193
+ schema = DataFrameSchema.from_json(schema_dict_str)
194
+
195
+ if DATAFRAME_CUSTOM_DATA_KEY not in checkpoint_schema_config:
196
+ LOGGER.info(
197
+ "No custom data found in the JSON file for checkpoint: '%s'",
198
+ checkpoint_name,
199
+ )
200
+ return schema
201
+
202
+ custom_data = checkpoint_schema_config.get(DATAFRAME_CUSTOM_DATA_KEY)
203
+
204
+ if COLUMNS_KEY not in custom_data:
205
+ raise ValueError(
206
+ f"Columns not found in the JSON file for checkpoint: {checkpoint_name}"
207
+ )
208
+
209
+ pandera_check_manager = PanderaCheckManager(
210
+ checkpoint_name=checkpoint_name, schema=schema
211
+ )
212
+ schema = pandera_check_manager.proccess_checks(custom_data)
213
+
214
+ return schema
215
+
216
+
217
+ def _check_compare_data(
218
+ df: SnowparkDataFrame,
219
+ job_context: Optional[SnowparkJobContext],
220
+ checkpoint_name: str,
221
+ output_path: Optional[str] = None,
222
+ ):
223
+ """Compare the data in the provided Snowpark DataFrame with the data in a checkpoint table.
224
+
225
+ This function writes the provided DataFrame to a table and compares it with an existing checkpoint table
226
+ using a hash aggregation query. If there is a data mismatch, it marks the job context as failed and raises a
227
+ SchemaValidationError. If the data matches, it marks the job context as passed.
228
+
229
+ Args:
230
+ df (SnowparkDataFrame): The Snowpark DataFrame to compare.
231
+ job_context (Optional[SnowparkJobContext]): The job context containing the Snowpark session and job state.
232
+ checkpoint_name (str): The name of the checkpoint table to compare against.
233
+ output_path (Optional[str]): The path to the output directory.
234
+
235
+ Raises:
236
+ SchemaValidationError: If there is a data mismatch between the DataFrame and the checkpoint table.
237
+
238
+ """
239
+ _, err = _compare_data(df, job_context, checkpoint_name, output_path)
240
+ if err is not None:
241
+ raise err
242
+
243
+
244
+ @report_telemetry(
245
+ params_list=["df"], return_indexes=[(STATUS_KEY, 0)], multiple_return=True
246
+ )
247
+ def _compare_data(
248
+ df: SnowparkDataFrame,
249
+ job_context: Optional[SnowparkJobContext],
250
+ checkpoint_name: str,
251
+ output_path: Optional[str] = None,
252
+ ) -> tuple[bool, Optional[SchemaValidationError]]:
253
+ """Compare the data in the provided Snowpark DataFrame with the data in a checkpoint table.
254
+
255
+ This function writes the provided DataFrame to a table and compares it with an existing checkpoint table
256
+ using a hash aggregation query. If there is a data mismatch, it marks the job context as failed and raises a
257
+ SchemaValidationError. If the data matches, it marks the job context as passed.
258
+
259
+ Args:
260
+ df (SnowparkDataFrame): The Snowpark DataFrame to compare.
261
+ job_context (Optional[SnowparkJobContext]): The job context containing the Snowpark session and job state.
262
+ checkpoint_name (str): The name of the checkpoint table to compare against.
263
+ output_path (Optional[str]): The path to the output directory.
264
+
265
+ Returns:
266
+ Tuple[bool, Optional[SchemaValidationError]]: A tuple containing a boolean indicating if the data matches
267
+ and an optional SchemaValidationError if there is a data mismatch.
268
+
269
+ Raises:
270
+ SchemaValidationError: If there is a data mismatch between the DataFrame and the checkpoint table.
271
+
272
+ """
273
+ new_table_name = CHECKPOINT_TABLE_NAME_FORMAT.format(checkpoint_name)
274
+ LOGGER.info(
275
+ "Writing Snowpark DataFrame to table: '%s' for checkpoint: '%s'",
276
+ new_table_name,
277
+ checkpoint_name,
278
+ )
279
+ df.write.save_as_table(table_name=new_table_name, mode="overwrite")
280
+
281
+ LOGGER.info(
282
+ "Comparing DataFrame to checkpoint table: '%s' for checkpoint: '%s'",
283
+ new_table_name,
284
+ checkpoint_name,
285
+ )
286
+ expect_df = job_context.snowpark_session.sql(
287
+ EXCEPT_HASH_AGG_QUERY, [checkpoint_name, new_table_name]
288
+ )
289
+
290
+ if expect_df.count() != 0:
291
+ error_message = f"Data mismatch for checkpoint {checkpoint_name}"
292
+ job_context._mark_fail(
293
+ error_message,
294
+ checkpoint_name,
295
+ df,
296
+ DATAFRAME_EXECUTION_MODE,
297
+ )
298
+ _update_validation_result(
299
+ checkpoint_name,
300
+ FAIL_STATUS,
301
+ output_path,
302
+ )
303
+ return False, SchemaValidationError(
304
+ error_message,
305
+ job_context,
306
+ checkpoint_name,
307
+ df,
308
+ )
309
+ else:
310
+ _update_validation_result(checkpoint_name, PASS_STATUS, output_path)
311
+ job_context._mark_pass(checkpoint_name, DATAFRAME_EXECUTION_MODE)
312
+ return True, None
313
+
314
+
315
+ def _find_frame_in(stack: list[inspect.FrameInfo]) -> tuple:
316
+ """Find a specific frame in the provided stack trace.
317
+
318
+ This function searches through the provided stack trace to find a frame that matches
319
+ certain criteria. It looks for frames where the function name is "wrapper" or where
320
+ the code context matches specific regular expressions.
321
+
322
+ Args:
323
+ stack (list[inspect.FrameInfo]): A list of frame information objects representing
324
+ the current stack trace.
325
+
326
+ Returns:
327
+ tuple: A tuple containing the relative path of the file and the line number of the
328
+ matched frame. If no frame is matched, it returns a default key and -1.
329
+
330
+ """
331
+ regex = (
332
+ r"(?<!_check_dataframe_schema_file)"
333
+ r"(?<!_check_dataframe_schema)"
334
+ r"(validate_dataframe_checkpoint|check_dataframe_schema)"
335
+ )
336
+
337
+ first_frames = stack[:7]
338
+ first_frames.reverse()
339
+
340
+ for i, frame in enumerate(first_frames):
341
+ if frame.function == "wrapper" and i - 1 >= 0:
342
+ next_frame = first_frames[i - 1]
343
+ return _get_relative_path(next_frame.filename), next_frame.lineno
344
+
345
+ if len(frame.code_context) >= 0 and re.search(regex, frame.code_context[0]):
346
+ return _get_relative_path(frame.filename), frame.lineno
347
+ return DEFAULT_KEY, -1
348
+
349
+
350
+ def _get_relative_path(file_path: str) -> str:
351
+ """Get the relative path of a file.
352
+
353
+ Args:
354
+ file_path (str): The path to the file.
355
+
356
+ Returns:
357
+ str: The relative path of the file.
358
+
359
+ """
360
+ current_directory = get_io_file_manager().getcwd()
361
+ return os.path.relpath(file_path, current_directory)
362
+
363
+
364
+ def _update_validation_result(
365
+ checkpoint_name: str, validation_status: str, output_path: Optional[str] = None
366
+ ) -> None:
367
+ """Update the validation result file with the status of a given checkpoint.
368
+
369
+ Args:
370
+ checkpoint_name (str): The name of the checkpoint to update.
371
+ validation_status (str): The validation status to record for the checkpoint.
372
+ output_path (str): The path to the output directory.
373
+
374
+ Returns:
375
+ None
376
+
377
+ """
378
+ _file = get_checkpoint_file(checkpoint_name)
379
+
380
+ stack = inspect.stack()
381
+
382
+ _file_from_stack, _line_of_code = _find_frame_in(stack)
383
+
384
+ pipeline_result_metadata = ValidationResultsMetadata(output_path)
385
+
386
+ pipeline_result_metadata.clean()
387
+
388
+ pipeline_result_metadata.add_validation_result(
389
+ ValidationResult(
390
+ timestamp=datetime.now().isoformat(),
391
+ file=_file if _file else _file_from_stack,
392
+ line_of_code=_line_of_code,
393
+ checkpoint_name=checkpoint_name,
394
+ result=validation_status,
395
+ )
396
+ )
397
+
398
+ pipeline_result_metadata.save()
@@ -0,0 +1,159 @@
1
+ # Copyright 2025 Snowflake Inc.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import logging
17
+ import os
18
+
19
+ from typing import Optional
20
+
21
+ from snowflake.snowpark_checkpoints.io_utils.io_file_manager import get_io_file_manager
22
+ from snowflake.snowpark_checkpoints.singleton import Singleton
23
+ from snowflake.snowpark_checkpoints.utils.constants import (
24
+ SNOWPARK_CHECKPOINTS_OUTPUT_DIRECTORY_NAME,
25
+ VALIDATION_RESULTS_JSON_FILE_NAME,
26
+ )
27
+ from snowflake.snowpark_checkpoints.validation_results import (
28
+ ValidationResult,
29
+ ValidationResults,
30
+ )
31
+
32
+
33
+ LOGGER = logging.getLogger(__name__)
34
+
35
+
36
+ class ValidationResultsMetadata(metaclass=Singleton):
37
+
38
+ """ValidationResultsMetadata is a class that manages the loading, storing, and updating of validation results.
39
+
40
+ Attributes:
41
+ validation_results (list): A list to store validation results.
42
+ validation_results_file (str): The path to the validation results file.
43
+
44
+ Methods:
45
+ __init__(path: Optional[str] = None):
46
+ Initializes the PipelineResultMetadata instance and loads validation results from a JSON file
47
+ if a path is provided.
48
+ _load(path: Optional[str] = None):
49
+ Loads validation results from a JSON file. If no path is provided, the current working directory is used.
50
+ add_validation_result(validation_result: dict):
51
+ Adds a validation result to the pipeline result list.
52
+ save():
53
+ Saves the validation results to a JSON file in the current working directory.
54
+
55
+ """
56
+
57
+ def __init__(self, path: Optional[str] = None):
58
+ self._load(path)
59
+
60
+ def _load(self, path: Optional[str] = None):
61
+ """Load validation results from a JSON file.
62
+
63
+ Args:
64
+ path (Optional[str]): The directory path where the validation results file is located.
65
+ If not provided, the current working directory is used.
66
+
67
+ Raises:
68
+ Exception: If there is an error reading the validation results file.
69
+
70
+ """
71
+ self.validation_results_directory = (
72
+ path if path else get_io_file_manager().getcwd()
73
+ )
74
+ self.validation_results_directory = os.path.join(
75
+ self.validation_results_directory,
76
+ SNOWPARK_CHECKPOINTS_OUTPUT_DIRECTORY_NAME,
77
+ )
78
+
79
+ LOGGER.debug(
80
+ "Setting validation results directory to: '%s'",
81
+ self.validation_results_directory,
82
+ )
83
+
84
+ self.validation_results_file = os.path.join(
85
+ self.validation_results_directory,
86
+ VALIDATION_RESULTS_JSON_FILE_NAME,
87
+ )
88
+
89
+ LOGGER.debug(
90
+ "Setting validation results file to: '%s'", self.validation_results_file
91
+ )
92
+
93
+ self.validation_results = ValidationResults(results=[])
94
+
95
+ if get_io_file_manager().file_exists(self.validation_results_file):
96
+ LOGGER.info(
97
+ "Loading validation results from: '%s'", self.validation_results_file
98
+ )
99
+ try:
100
+ validation_result_json = get_io_file_manager().read(
101
+ self.validation_results_file
102
+ )
103
+ self.validation_results = ValidationResults.model_validate_json(
104
+ validation_result_json
105
+ )
106
+ except Exception as e:
107
+ raise Exception(
108
+ f"Error reading validation results file: {self.validation_results_file} \n {e}"
109
+ ) from None
110
+ else:
111
+ LOGGER.info(
112
+ "Validation results file not found: '%s'",
113
+ self.validation_results_file,
114
+ )
115
+
116
+ def clean(self):
117
+ """Clean the validation results list.
118
+
119
+ This method empties the validation results list.
120
+
121
+ """
122
+ if not get_io_file_manager().file_exists(self.validation_results_file):
123
+ LOGGER.info("Cleaning validation results...")
124
+ self.validation_results.results = []
125
+
126
+ def add_validation_result(self, validation_result: ValidationResult):
127
+ """Add a validation result to the pipeline result list.
128
+
129
+ Args:
130
+ validation_result (dict): The validation result to be added.
131
+
132
+ """
133
+ self.validation_results.results.append(validation_result)
134
+
135
+ def save(self):
136
+ """Save the validation results to a file.
137
+
138
+ This method checks if the directory specified by validation results directory
139
+ exists, and if not, it creates the directory. Then, it writes the validation results
140
+ to a file specified by validation results file in JSON format.
141
+
142
+ Raises:
143
+ OSError: If the directory cannot be created or the file cannot be written.
144
+
145
+ """
146
+ if not get_io_file_manager().folder_exists(self.validation_results_directory):
147
+ LOGGER.debug(
148
+ "Validation results directory '%s' does not exist. Creating it...",
149
+ self.validation_results_directory,
150
+ )
151
+ get_io_file_manager().mkdir(self.validation_results_directory)
152
+
153
+ get_io_file_manager().write(
154
+ self.validation_results_file, self.validation_results.model_dump_json()
155
+ )
156
+ LOGGER.info(
157
+ "Validation results successfully saved to: '%s'",
158
+ self.validation_results_file,
159
+ )
@@ -0,0 +1,49 @@
1
+ # Copyright 2025 Snowflake Inc.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+
17
+ from pydantic import BaseModel
18
+
19
+
20
+ class ValidationResult(BaseModel):
21
+
22
+ """ValidationResult represents the result of a validation checkpoint.
23
+
24
+ Attributes:
25
+ result (str): The result of the validation.
26
+ timestamp (datetime): The timestamp when the validation was performed.
27
+ file (str): The file where the validation checkpoint is located.
28
+ line_of_code (int): The line number in the file where the validation checkpoint is located.
29
+ checkpoint_name (str): The name of the validation checkpoint.
30
+
31
+ """
32
+
33
+ result: str
34
+ timestamp: str
35
+ file: str
36
+ line_of_code: int
37
+ checkpoint_name: str
38
+
39
+
40
+ class ValidationResults(BaseModel):
41
+
42
+ """ValidationResults is a model that holds a list of validation results.
43
+
44
+ Attributes:
45
+ results (list[ValidationResult]): A list of validation results.
46
+
47
+ """
48
+
49
+ results: list[ValidationResult]