snowpark-checkpoints-validators 0.1.0rc3__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. snowflake/snowpark_checkpoints/__init__.py +34 -0
  2. snowflake/snowpark_checkpoints/checkpoint.py +482 -0
  3. snowflake/snowpark_checkpoints/errors.py +60 -0
  4. snowflake/snowpark_checkpoints/job_context.py +85 -0
  5. snowflake/snowpark_checkpoints/singleton.py +23 -0
  6. snowflake/snowpark_checkpoints/snowpark_sampler.py +99 -0
  7. snowflake/snowpark_checkpoints/spark_migration.py +222 -0
  8. snowflake/snowpark_checkpoints/utils/__init__.py +14 -0
  9. snowflake/snowpark_checkpoints/utils/checkpoint_logger.py +52 -0
  10. snowflake/snowpark_checkpoints/utils/constants.py +134 -0
  11. snowflake/snowpark_checkpoints/utils/extra_config.py +84 -0
  12. snowflake/snowpark_checkpoints/utils/pandera_check_manager.py +358 -0
  13. snowflake/snowpark_checkpoints/utils/supported_types.py +65 -0
  14. snowflake/snowpark_checkpoints/utils/telemetry.py +900 -0
  15. snowflake/snowpark_checkpoints/utils/utils_checks.py +372 -0
  16. snowflake/snowpark_checkpoints/validation_result_metadata.py +116 -0
  17. snowflake/snowpark_checkpoints/validation_results.py +49 -0
  18. {snowpark_checkpoints_validators-0.1.0rc3.dist-info → snowpark_checkpoints_validators-0.1.1.dist-info}/METADATA +4 -6
  19. snowpark_checkpoints_validators-0.1.1.dist-info/RECORD +21 -0
  20. snowpark_checkpoints_validators-0.1.0rc3.dist-info/RECORD +0 -4
  21. {snowpark_checkpoints_validators-0.1.0rc3.dist-info → snowpark_checkpoints_validators-0.1.1.dist-info}/WHEEL +0 -0
  22. {snowpark_checkpoints_validators-0.1.0rc3.dist-info → snowpark_checkpoints_validators-0.1.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,372 @@
1
+ # Copyright 2025 Snowflake Inc.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import inspect
17
+ import json
18
+ import os
19
+ import re
20
+
21
+ from datetime import datetime
22
+ from typing import Any, Optional
23
+
24
+ import numpy as np
25
+
26
+ from pandera import DataFrameSchema
27
+
28
+ from snowflake.snowpark import DataFrame as SnowparkDataFrame
29
+ from snowflake.snowpark_checkpoints.errors import SchemaValidationError
30
+ from snowflake.snowpark_checkpoints.job_context import SnowparkJobContext
31
+ from snowflake.snowpark_checkpoints.snowpark_sampler import (
32
+ SamplingAdapter,
33
+ SamplingStrategy,
34
+ )
35
+ from snowflake.snowpark_checkpoints.utils.constants import (
36
+ CHECKPOINT_JSON_OUTPUT_FILE_FORMAT_NAME,
37
+ CHECKPOINT_TABLE_NAME_FORMAT,
38
+ COLUMNS_KEY,
39
+ DATAFRAME_CUSTOM_DATA_KEY,
40
+ DATAFRAME_EXECUTION_MODE,
41
+ DATAFRAME_PANDERA_SCHEMA_KEY,
42
+ DEFAULT_KEY,
43
+ EXCEPT_HASH_AGG_QUERY,
44
+ FAIL_STATUS,
45
+ PASS_STATUS,
46
+ SNOWPARK_CHECKPOINTS_OUTPUT_DIRECTORY_NAME,
47
+ )
48
+ from snowflake.snowpark_checkpoints.utils.extra_config import (
49
+ get_checkpoint_file,
50
+ )
51
+ from snowflake.snowpark_checkpoints.utils.pandera_check_manager import (
52
+ PanderaCheckManager,
53
+ )
54
+ from snowflake.snowpark_checkpoints.utils.telemetry import STATUS_KEY, report_telemetry
55
+ from snowflake.snowpark_checkpoints.validation_result_metadata import (
56
+ ValidationResultsMetadata,
57
+ )
58
+ from snowflake.snowpark_checkpoints.validation_results import ValidationResult
59
+
60
+
61
+ def _replace_special_characters(checkpoint_name: str) -> str:
62
+ """Replace special characters in the checkpoint name with underscores.
63
+
64
+ Args:
65
+ checkpoint_name (str): The checkpoint name to process.
66
+
67
+ Returns:
68
+ str: The checkpoint name with special characters replaced by underscores.
69
+
70
+ """
71
+ regex = r"^[a-zA-Z_\s-][a-zA-Z0-9_$\s-]*$"
72
+ if not bool(re.match(regex, checkpoint_name)):
73
+ raise ValueError(
74
+ f"Invalid checkpoint name: {checkpoint_name}",
75
+ "Checkpoint name must contain only alphanumeric characters, hyphens, and underscores.",
76
+ )
77
+ return re.sub(r"[\s-]", "_", checkpoint_name)
78
+
79
+
80
+ def _process_sampling(
81
+ df: SnowparkDataFrame,
82
+ pandera_schema: DataFrameSchema,
83
+ job_context: Optional[SnowparkJobContext] = None,
84
+ sample_frac: Optional[float] = 1.0,
85
+ sample_number: Optional[int] = None,
86
+ sampling_strategy: Optional[SamplingStrategy] = SamplingStrategy.RANDOM_SAMPLE,
87
+ ):
88
+ """Process a Snowpark DataFrame by sampling it according to the specified parameters.
89
+
90
+ Adjusts the column casing of the provided Pandera schema to uppercase.
91
+
92
+ Args:
93
+ df (SnowparkDataFrame): The Snowpark DataFrame to be sampled.
94
+ pandera_schema (DataFrameSchema): The Pandera schema to validate the DataFrame.
95
+ job_context (SnowparkJobContext, optional): The job context for the sampling operation.
96
+ Defaults to None.
97
+ sample_frac (Optional[float], optional): The fraction of rows to sample.
98
+ Defaults to 0.1.
99
+ sample_number (Optional[int], optional): The number of rows to sample.
100
+ Defaults to None.
101
+ sampling_strategy (Optional[SamplingStrategy], optional): The strategy to use for sampling.
102
+ Defaults to SamplingStrategy.RANDOM_SAMPLE.
103
+
104
+ Returns:
105
+ Tuple[DataFrameSchema, pd.DataFrame]: A tuple containing the adjusted Pandera schema with uppercase column names
106
+ and the sampled pandas DataFrame.
107
+
108
+ """
109
+ sampler = SamplingAdapter(
110
+ job_context, sample_frac, sample_number, sampling_strategy
111
+ )
112
+ sampler.process_args([df])
113
+
114
+ # fix up the column casing
115
+ pandera_schema_upper = pandera_schema
116
+ new_columns: dict[Any, Any] = {}
117
+
118
+ for col in pandera_schema.columns:
119
+ new_columns[col.upper()] = pandera_schema.columns[col]
120
+
121
+ pandera_schema_upper = pandera_schema_upper.remove_columns(pandera_schema.columns)
122
+ pandera_schema_upper = pandera_schema_upper.add_columns(new_columns)
123
+
124
+ sample_df = sampler.get_sampled_pandas_args()[0]
125
+ sample_df.index = np.ones(sample_df.count().iloc[0])
126
+
127
+ return pandera_schema_upper, sample_df
128
+
129
+
130
+ def _generate_schema(
131
+ checkpoint_name: str, output_path: Optional[str] = None
132
+ ) -> DataFrameSchema:
133
+ """Generate a DataFrameSchema based on the checkpoint name provided.
134
+
135
+ This function reads a JSON file corresponding to the checkpoint name,
136
+ extracts schema information, and constructs a DataFrameSchema object.
137
+ It also adds custom checks for numeric and boolean types if specified
138
+ in the JSON file.
139
+
140
+ Args:
141
+ checkpoint_name (str): The name of the checkpoint used to locate
142
+ the JSON file containing schema information.
143
+ output_path (str): The path to the output directory.
144
+
145
+ DataFrameSchema: A schema object representing the structure and
146
+ constraints of the DataFrame.
147
+ constraints of the DataFrame.
148
+
149
+ """
150
+ current_directory_path = output_path if output_path else os.getcwd()
151
+
152
+ output_directory_path = os.path.join(
153
+ current_directory_path, SNOWPARK_CHECKPOINTS_OUTPUT_DIRECTORY_NAME
154
+ )
155
+
156
+ if not os.path.exists(output_directory_path):
157
+ raise ValueError(
158
+ """Output directory snowpark-checkpoints-output does not exist.
159
+ Please run the Snowpark checkpoint collector first."""
160
+ )
161
+
162
+ checkpoint_schema_file_path = os.path.join(
163
+ output_directory_path,
164
+ CHECKPOINT_JSON_OUTPUT_FILE_FORMAT_NAME.format(checkpoint_name),
165
+ )
166
+
167
+ if not os.path.exists(checkpoint_schema_file_path):
168
+ raise ValueError(
169
+ f"Checkpoint {checkpoint_name} JSON file not found. Please run the Snowpark checkpoint collector first."
170
+ )
171
+
172
+ with open(checkpoint_schema_file_path) as schema_file:
173
+ checkpoint_schema_config = json.load(schema_file)
174
+
175
+ if DATAFRAME_PANDERA_SCHEMA_KEY not in checkpoint_schema_config:
176
+ raise ValueError(
177
+ f"Pandera schema not found in the JSON file for checkpoint: {checkpoint_name}"
178
+ )
179
+
180
+ schema_dict = checkpoint_schema_config.get(DATAFRAME_PANDERA_SCHEMA_KEY)
181
+ schema_dict_str = json.dumps(schema_dict)
182
+ schema = DataFrameSchema.from_json(schema_dict_str)
183
+
184
+ if DATAFRAME_CUSTOM_DATA_KEY not in checkpoint_schema_config:
185
+ return schema
186
+
187
+ custom_data = checkpoint_schema_config.get(DATAFRAME_CUSTOM_DATA_KEY)
188
+
189
+ if COLUMNS_KEY not in custom_data:
190
+ raise ValueError(
191
+ f"Columns not found in the JSON file for checkpoint: {checkpoint_name}"
192
+ )
193
+
194
+ pandera_check_manager = PanderaCheckManager(
195
+ checkpoint_name=checkpoint_name, schema=schema
196
+ )
197
+ schema = pandera_check_manager.proccess_checks(custom_data)
198
+
199
+ return schema
200
+
201
+
202
+ def _check_compare_data(
203
+ df: SnowparkDataFrame,
204
+ job_context: Optional[SnowparkJobContext],
205
+ checkpoint_name: str,
206
+ output_path: Optional[str] = None,
207
+ ):
208
+ """Compare the data in the provided Snowpark DataFrame with the data in a checkpoint table.
209
+
210
+ This function writes the provided DataFrame to a table and compares it with an existing checkpoint table
211
+ using a hash aggregation query. If there is a data mismatch, it marks the job context as failed and raises a
212
+ SchemaValidationError. If the data matches, it marks the job context as passed.
213
+
214
+ Args:
215
+ df (SnowparkDataFrame): The Snowpark DataFrame to compare.
216
+ job_context (Optional[SnowparkJobContext]): The job context containing the Snowpark session and job state.
217
+ checkpoint_name (str): The name of the checkpoint table to compare against.
218
+ output_path (Optional[str]): The path to the output directory.
219
+
220
+ Raises:
221
+ SchemaValidationError: If there is a data mismatch between the DataFrame and the checkpoint table.
222
+
223
+ """
224
+ result, err = _compare_data(df, job_context, checkpoint_name, output_path)
225
+ if err is not None:
226
+ raise err
227
+
228
+
229
+ @report_telemetry(
230
+ params_list=["df"], return_indexes=[(STATUS_KEY, 0)], multiple_return=True
231
+ )
232
+ def _compare_data(
233
+ df: SnowparkDataFrame,
234
+ job_context: Optional[SnowparkJobContext],
235
+ checkpoint_name: str,
236
+ output_path: Optional[str] = None,
237
+ ) -> tuple[bool, Optional[SchemaValidationError]]:
238
+ """Compare the data in the provided Snowpark DataFrame with the data in a checkpoint table.
239
+
240
+ This function writes the provided DataFrame to a table and compares it with an existing checkpoint table
241
+ using a hash aggregation query. If there is a data mismatch, it marks the job context as failed and raises a
242
+ SchemaValidationError. If the data matches, it marks the job context as passed.
243
+
244
+ Args:
245
+ df (SnowparkDataFrame): The Snowpark DataFrame to compare.
246
+ job_context (Optional[SnowparkJobContext]): The job context containing the Snowpark session and job state.
247
+ checkpoint_name (str): The name of the checkpoint table to compare against.
248
+ output_path (Optional[str]): The path to the output directory.
249
+
250
+ Returns:
251
+ Tuple[bool, Optional[SchemaValidationError]]: A tuple containing a boolean indicating if the data matches
252
+ and an optional SchemaValidationError if there is a data mismatch.
253
+
254
+ Raises:
255
+ SchemaValidationError: If there is a data mismatch between the DataFrame and the checkpoint table.
256
+
257
+ """
258
+ new_table_name = CHECKPOINT_TABLE_NAME_FORMAT.format(checkpoint_name)
259
+
260
+ df.write.save_as_table(table_name=new_table_name, mode="overwrite")
261
+
262
+ expect_df = job_context.snowpark_session.sql(
263
+ EXCEPT_HASH_AGG_QUERY, [checkpoint_name, new_table_name]
264
+ )
265
+
266
+ if expect_df.count() != 0:
267
+ error_message = f"Data mismatch for checkpoint {checkpoint_name}"
268
+ job_context._mark_fail(
269
+ error_message,
270
+ checkpoint_name,
271
+ df,
272
+ DATAFRAME_EXECUTION_MODE,
273
+ )
274
+ _update_validation_result(
275
+ checkpoint_name,
276
+ FAIL_STATUS,
277
+ output_path,
278
+ )
279
+ return False, SchemaValidationError(
280
+ error_message,
281
+ job_context,
282
+ checkpoint_name,
283
+ df,
284
+ )
285
+ else:
286
+ _update_validation_result(checkpoint_name, PASS_STATUS, output_path)
287
+ job_context._mark_pass(checkpoint_name, DATAFRAME_EXECUTION_MODE)
288
+ return True, None
289
+
290
+
291
+ def _find_frame_in(stack: list[inspect.FrameInfo]) -> tuple:
292
+ """Find a specific frame in the provided stack trace.
293
+
294
+ This function searches through the provided stack trace to find a frame that matches
295
+ certain criteria. It looks for frames where the function name is "wrapper" or where
296
+ the code context matches specific regular expressions.
297
+
298
+ Args:
299
+ stack (list[inspect.FrameInfo]): A list of frame information objects representing
300
+ the current stack trace.
301
+
302
+ Returns:
303
+ tuple: A tuple containing the relative path of the file and the line number of the
304
+ matched frame. If no frame is matched, it returns a default key and -1.
305
+
306
+ """
307
+ regex = (
308
+ r"(?<!_check_dataframe_schema_file)"
309
+ r"(?<!_check_dataframe_schema)"
310
+ r"(validate_dataframe_checkpoint|check_dataframe_schema)"
311
+ )
312
+
313
+ first_frames = stack[:7]
314
+ first_frames.reverse()
315
+
316
+ for i, frame in enumerate(first_frames):
317
+ if frame.function == "wrapper" and i - 1 >= 0:
318
+ next_frame = first_frames[i - 1]
319
+ return _get_relative_path(next_frame.filename), next_frame.lineno
320
+
321
+ if len(frame.code_context) >= 0 and re.search(regex, frame.code_context[0]):
322
+ return _get_relative_path(frame.filename), frame.lineno
323
+ return DEFAULT_KEY, -1
324
+
325
+
326
+ def _get_relative_path(file_path: str) -> str:
327
+ """Get the relative path of a file.
328
+
329
+ Args:
330
+ file_path (str): The path to the file.
331
+
332
+ Returns:
333
+ str: The relative path of the file.
334
+
335
+ """
336
+ current_directory = os.getcwd()
337
+ return os.path.relpath(file_path, current_directory)
338
+
339
+
340
+ def _update_validation_result(
341
+ checkpoint_name: str, validation_status: str, output_path: Optional[str] = None
342
+ ) -> None:
343
+ """Update the validation result file with the status of a given checkpoint.
344
+
345
+ Args:
346
+ checkpoint_name (str): The name of the checkpoint to update.
347
+ validation_status (str): The validation status to record for the checkpoint.
348
+ output_path (str): The path to the output directory.
349
+
350
+ Returns:
351
+ None
352
+
353
+ """
354
+ _file = get_checkpoint_file(checkpoint_name)
355
+
356
+ stack = inspect.stack()
357
+
358
+ _file_from_stack, _line_of_code = _find_frame_in(stack)
359
+
360
+ pipeline_result_metadata = ValidationResultsMetadata(output_path)
361
+
362
+ pipeline_result_metadata.add_validation_result(
363
+ ValidationResult(
364
+ timestamp=datetime.now().isoformat(),
365
+ file=_file if _file else _file_from_stack,
366
+ line_of_code=_line_of_code,
367
+ checkpoint_name=checkpoint_name,
368
+ result=validation_status,
369
+ )
370
+ )
371
+
372
+ pipeline_result_metadata.save()
@@ -0,0 +1,116 @@
1
+ # Copyright 2025 Snowflake Inc.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import os
17
+
18
+ from typing import Optional
19
+
20
+ from snowflake.snowpark_checkpoints.singleton import Singleton
21
+ from snowflake.snowpark_checkpoints.utils.constants import (
22
+ SNOWPARK_CHECKPOINTS_OUTPUT_DIRECTORY_NAME,
23
+ VALIDATION_RESULTS_JSON_FILE_NAME,
24
+ )
25
+ from snowflake.snowpark_checkpoints.validation_results import (
26
+ ValidationResult,
27
+ ValidationResults,
28
+ )
29
+
30
+
31
+ class ValidationResultsMetadata(metaclass=Singleton):
32
+
33
+ """ValidationResultsMetadata is a class that manages the loading, storing, and updating of validation results.
34
+
35
+ Attributes:
36
+ validation_results (list): A list to store validation results.
37
+ validation_results_file (str): The path to the validation results file.
38
+
39
+ Methods:
40
+ __init__(path: Optional[str] = None):
41
+ Initializes the PipelineResultMetadata instance and loads validation results from a JSON file
42
+ if a path is provided.
43
+ _load(path: Optional[str] = None):
44
+ Loads validation results from a JSON file. If no path is provided, the current working directory is used.
45
+ add_validation_result(validation_result: dict):
46
+ Adds a validation result to the pipeline result list.
47
+ save():
48
+ Saves the validation results to a JSON file in the current working directory.
49
+
50
+ """
51
+
52
+ def __init__(self, path: Optional[str] = None):
53
+ self._load(path)
54
+
55
+ def _load(self, path: Optional[str] = None):
56
+ """Load validation results from a JSON file.
57
+
58
+ Args:
59
+ path (Optional[str]): The directory path where the validation results file is located.
60
+ If not provided, the current working directory is used.
61
+
62
+ Raises:
63
+ Exception: If there is an error reading the validation results file.
64
+
65
+ """
66
+ self.validation_results_directory = path if path else os.getcwd()
67
+ self.validation_results_directory = os.path.join(
68
+ self.validation_results_directory,
69
+ SNOWPARK_CHECKPOINTS_OUTPUT_DIRECTORY_NAME,
70
+ )
71
+
72
+ self.validation_results_file = os.path.join(
73
+ self.validation_results_directory,
74
+ VALIDATION_RESULTS_JSON_FILE_NAME,
75
+ )
76
+
77
+ self.validation_results = ValidationResults(results=[])
78
+
79
+ if os.path.exists(self.validation_results_file):
80
+ with open(self.validation_results_file) as file:
81
+ try:
82
+ validation_result_json = file.read()
83
+ self.validation_results = ValidationResults.model_validate_json(
84
+ validation_result_json
85
+ )
86
+ except Exception as e:
87
+ raise Exception(
88
+ f"Error reading validation results file: {self.validation_results_file} \n {e}"
89
+ ) from None
90
+
91
+ def add_validation_result(self, validation_result: ValidationResult):
92
+ """Add a validation result to the pipeline result list.
93
+
94
+ Args:
95
+ checkpoint_name (str): The name of the checkpoint.
96
+ validation_result (dict): The validation result to be added.
97
+
98
+ """
99
+ self.validation_results.results.append(validation_result)
100
+
101
+ def save(self):
102
+ """Save the validation results to a file.
103
+
104
+ This method checks if the directory specified by validation results directory
105
+ exists, and if not, it creates the directory. Then, it writes the validation results
106
+ to a file specified by validation results file in JSON format.
107
+
108
+ Raises:
109
+ OSError: If the directory cannot be created or the file cannot be written.
110
+
111
+ """
112
+ if not os.path.exists(self.validation_results_directory):
113
+ os.makedirs(self.validation_results_directory)
114
+
115
+ with open(self.validation_results_file, "w") as output_file:
116
+ output_file.write(self.validation_results.model_dump_json())
@@ -0,0 +1,49 @@
1
+ # Copyright 2025 Snowflake Inc.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+
17
+ from pydantic import BaseModel
18
+
19
+
20
+ class ValidationResult(BaseModel):
21
+
22
+ """ValidationResult represents the result of a validation checkpoint.
23
+
24
+ Attributes:
25
+ result (str): The result of the validation.
26
+ timestamp (datetime): The timestamp when the validation was performed.
27
+ file (str): The file where the validation checkpoint is located.
28
+ line_of_code (int): The line number in the file where the validation checkpoint is located.
29
+ checkpoint_name (str): The name of the validation checkpoint.
30
+
31
+ """
32
+
33
+ result: str
34
+ timestamp: str
35
+ file: str
36
+ line_of_code: int
37
+ checkpoint_name: str
38
+
39
+
40
+ class ValidationResults(BaseModel):
41
+
42
+ """ValidationResults is a model that holds a list of validation results.
43
+
44
+ Attributes:
45
+ results (list[ValidationResult]): A list of validation results.
46
+
47
+ """
48
+
49
+ results: list[ValidationResult]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: snowpark-checkpoints-validators
3
- Version: 0.1.0rc3
3
+ Version: 0.1.1
4
4
  Summary: Migration tools for Snowpark
5
5
  Project-URL: Bug Tracker, https://github.com/snowflakedb/snowpark-checkpoints/issues
6
6
  Project-URL: Source code, https://github.com/snowflakedb/snowpark-checkpoints/
@@ -29,8 +29,8 @@ Requires-Python: <3.12,>=3.9
29
29
  Requires-Dist: pandera-report==0.1.2
30
30
  Requires-Dist: pandera[io]==0.20.4
31
31
  Requires-Dist: pyspark
32
- Requires-Dist: snowflake-connector-python
33
- Requires-Dist: snowflake-snowpark-python
32
+ Requires-Dist: snowflake-connector-python==3.13.0
33
+ Requires-Dist: snowflake-snowpark-python==1.26.0
34
34
  Provides-Extra: development
35
35
  Requires-Dist: coverage>=7.6.7; extra == 'development'
36
36
  Requires-Dist: deepdiff>=8.0.0; extra == 'development'
@@ -47,9 +47,7 @@ Description-Content-Type: text/markdown
47
47
 
48
48
  ---
49
49
  **NOTE**
50
-
51
- This package is on Private Preview.
52
-
50
+ This package is on Public Preview.
53
51
  ---
54
52
 
55
53
  **snowpark-checkpoints-validators** is a package designed to validate Snowpark DataFrames against predefined schemas and checkpoints. This package ensures data integrity and consistency by performing schema and data validation checks at various stages of a Snowpark pipeline.
@@ -0,0 +1,21 @@
1
+ snowflake/snowpark_checkpoints/__init__.py,sha256=1_xzSopIHWpw1i3gQqWLN0wCfWWEefjr4cl1vl0xSdY,1211
2
+ snowflake/snowpark_checkpoints/checkpoint.py,sha256=-y1iWdGxYGuTWdngOEXdA59MT33PCiM7cP1s3jJs9jE,18997
3
+ snowflake/snowpark_checkpoints/errors.py,sha256=9KjzRf8bjDZTTNL4LeySJAwuucDOyz0Ka7EFBKWFpyg,1821
4
+ snowflake/snowpark_checkpoints/job_context.py,sha256=7LdJ682lC8hCJOYUn-AVXq_Llv18R9oGdK2F-amYR_o,2990
5
+ snowflake/snowpark_checkpoints/singleton.py,sha256=7AgIHQBXVRvPBBCkmBplzkdrrm-xVWf_N8svzA2vF8E,836
6
+ snowflake/snowpark_checkpoints/snowpark_sampler.py,sha256=-t7cg-swMK0SaU7r8y90MLSDPXGlKprc6xdVxEs29sU,3632
7
+ snowflake/snowpark_checkpoints/spark_migration.py,sha256=DzzgUZ-XlzIqCz-aWpBICP8mgnjk8UNoL8JsomadF-U,8832
8
+ snowflake/snowpark_checkpoints/validation_result_metadata.py,sha256=zNU7hk9GH4d73iVfNopSAs_8yJeT12s_mcbpB7FShSY,4516
9
+ snowflake/snowpark_checkpoints/validation_results.py,sha256=J8OcpNty6hQD8RbAy8xmA0UMbPWfXSmQnHYspWWSisk,1502
10
+ snowflake/snowpark_checkpoints/utils/__init__.py,sha256=I4srmZ8G1q9DU6Suo1S91aVfNvETyisKH95uvLAvEJ0,609
11
+ snowflake/snowpark_checkpoints/utils/checkpoint_logger.py,sha256=meGl5T3Avp4Qn0GEwkJi5GSLS4MDb7zTGbTOI-8bf1E,1592
12
+ snowflake/snowpark_checkpoints/utils/constants.py,sha256=pgFttLDQ6fTa6obSdvivWBYClS21ap41YVDNGAS4sxY,4146
13
+ snowflake/snowpark_checkpoints/utils/extra_config.py,sha256=pmGLYT7cu9WMKzQwcEPkgk1DMnnT1fREm45p19e79hk,2567
14
+ snowflake/snowpark_checkpoints/utils/pandera_check_manager.py,sha256=ddTwXauuZdowIRwPMT61GWYCG4XGKOFkVyfZO49bc-8,14516
15
+ snowflake/snowpark_checkpoints/utils/supported_types.py,sha256=GrMX2tHdSFnK7LlPbZx20UufD6Br6TNVRkkBwIxdPy0,1433
16
+ snowflake/snowpark_checkpoints/utils/telemetry.py,sha256=JZ5bdPBxAoyWT7clua4T_QprHcwWQChd2A5ojLFHJ0o,31366
17
+ snowflake/snowpark_checkpoints/utils/utils_checks.py,sha256=LF1EJrwJwV4gBqifXdULBBGKXxCZqC9vR7BGLe_LTSM,13490
18
+ snowpark_checkpoints_validators-0.1.1.dist-info/METADATA,sha256=o2uERHArlH9q3HpWYRGZBjzJAgYKNlolLQAd6JkiuXY,11012
19
+ snowpark_checkpoints_validators-0.1.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
20
+ snowpark_checkpoints_validators-0.1.1.dist-info/licenses/LICENSE,sha256=pmjhbh6uVhV5MBXOlou_UZgFP7CYVQITkCCdvfcS5lY,11340
21
+ snowpark_checkpoints_validators-0.1.1.dist-info/RECORD,,
@@ -1,4 +0,0 @@
1
- snowpark_checkpoints_validators-0.1.0rc3.dist-info/METADATA,sha256=tjA56AFxmFIDPB3jx0ySyf2sHNAdEFYUu9RuqwySQ4M,11002
2
- snowpark_checkpoints_validators-0.1.0rc3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
3
- snowpark_checkpoints_validators-0.1.0rc3.dist-info/licenses/LICENSE,sha256=pmjhbh6uVhV5MBXOlou_UZgFP7CYVQITkCCdvfcS5lY,11340
4
- snowpark_checkpoints_validators-0.1.0rc3.dist-info/RECORD,,