snowpark-checkpoints-validators 0.1.0rc2__tar.gz → 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. snowpark_checkpoints_validators-0.1.1/PKG-INFO +311 -0
  2. {snowpark_checkpoints_validators-0.1.0rc2 → snowpark_checkpoints_validators-0.1.1}/README.md +1 -3
  3. {snowpark_checkpoints_validators-0.1.0rc2 → snowpark_checkpoints_validators-0.1.1}/pyproject.toml +17 -8
  4. snowpark_checkpoints_validators-0.1.1/src/snowflake/snowpark_checkpoints/__init__.py +34 -0
  5. {snowpark_checkpoints_validators-0.1.0rc2 → snowpark_checkpoints_validators-0.1.1}/src/snowflake/snowpark_checkpoints/checkpoint.py +14 -3
  6. {snowpark_checkpoints_validators-0.1.0rc2 → snowpark_checkpoints_validators-0.1.1}/src/snowflake/snowpark_checkpoints/errors.py +14 -3
  7. {snowpark_checkpoints_validators-0.1.0rc2 → snowpark_checkpoints_validators-0.1.1}/src/snowflake/snowpark_checkpoints/job_context.py +14 -3
  8. snowpark_checkpoints_validators-0.1.1/src/snowflake/snowpark_checkpoints/singleton.py +23 -0
  9. {snowpark_checkpoints_validators-0.1.0rc2 → snowpark_checkpoints_validators-0.1.1}/src/snowflake/snowpark_checkpoints/snowpark_sampler.py +14 -3
  10. {snowpark_checkpoints_validators-0.1.0rc2 → snowpark_checkpoints_validators-0.1.1}/src/snowflake/snowpark_checkpoints/spark_migration.py +14 -3
  11. snowpark_checkpoints_validators-0.1.1/src/snowflake/snowpark_checkpoints/utils/__init__.py +14 -0
  12. {snowpark_checkpoints_validators-0.1.0rc2 → snowpark_checkpoints_validators-0.1.1}/src/snowflake/snowpark_checkpoints/utils/checkpoint_logger.py +14 -3
  13. {snowpark_checkpoints_validators-0.1.0rc2 → snowpark_checkpoints_validators-0.1.1}/src/snowflake/snowpark_checkpoints/utils/constants.py +14 -3
  14. {snowpark_checkpoints_validators-0.1.0rc2 → snowpark_checkpoints_validators-0.1.1}/src/snowflake/snowpark_checkpoints/utils/extra_config.py +14 -3
  15. {snowpark_checkpoints_validators-0.1.0rc2 → snowpark_checkpoints_validators-0.1.1}/src/snowflake/snowpark_checkpoints/utils/supported_types.py +14 -3
  16. {snowpark_checkpoints_validators-0.1.0rc2 → snowpark_checkpoints_validators-0.1.1}/src/snowflake/snowpark_checkpoints/utils/telemetry.py +101 -34
  17. {snowpark_checkpoints_validators-0.1.0rc2 → snowpark_checkpoints_validators-0.1.1}/src/snowflake/snowpark_checkpoints/utils/utils_checks.py +14 -3
  18. {snowpark_checkpoints_validators-0.1.0rc2 → snowpark_checkpoints_validators-0.1.1}/src/snowflake/snowpark_checkpoints/validation_result_metadata.py +14 -3
  19. {snowpark_checkpoints_validators-0.1.0rc2 → snowpark_checkpoints_validators-0.1.1}/src/snowflake/snowpark_checkpoints/validation_results.py +14 -3
  20. {snowpark_checkpoints_validators-0.1.0rc2 → snowpark_checkpoints_validators-0.1.1}/test/integ/e2eexample.py +14 -3
  21. {snowpark_checkpoints_validators-0.1.0rc2 → snowpark_checkpoints_validators-0.1.1}/test/integ/telemetry_compare_utils.py +20 -3
  22. {snowpark_checkpoints_validators-0.1.0rc2 → snowpark_checkpoints_validators-0.1.1}/test/integ/telemetry_expected/df_mode_dataframe_mismatch_telemetry.json +5 -4
  23. {snowpark_checkpoints_validators-0.1.0rc2 → snowpark_checkpoints_validators-0.1.1}/test/integ/telemetry_expected/df_mode_dataframe_telemetry.json +5 -4
  24. snowpark_checkpoints_validators-0.1.1/test/integ/telemetry_expected/spark_checkpoint_df_fail_telemetry.json +18 -0
  25. snowpark_checkpoints_validators-0.1.1/test/integ/telemetry_expected/spark_checkpoint_df_pass_telemetry.json +18 -0
  26. snowpark_checkpoints_validators-0.1.1/test/integ/telemetry_expected/spark_checkpoint_limit_sample_telemetry.json +18 -0
  27. snowpark_checkpoints_validators-0.1.1/test/integ/telemetry_expected/spark_checkpoint_random_sample_telemetry.json +18 -0
  28. {snowpark_checkpoints_validators-0.1.0rc2 → snowpark_checkpoints_validators-0.1.1}/test/integ/telemetry_expected/spark_checkpoint_scalar_fail_telemetry.json +5 -4
  29. {snowpark_checkpoints_validators-0.1.0rc2 → snowpark_checkpoints_validators-0.1.1}/test/integ/telemetry_expected/spark_checkpoint_scalar_passing_telemetry.json +5 -4
  30. {snowpark_checkpoints_validators-0.1.0rc2 → snowpark_checkpoints_validators-0.1.1}/test/integ/telemetry_expected/test_df_check_custom_check_telemetry.json +6 -5
  31. {snowpark_checkpoints_validators-0.1.0rc2 → snowpark_checkpoints_validators-0.1.1}/test/integ/telemetry_expected/test_df_check_fail_telemetry.json +6 -5
  32. {snowpark_checkpoints_validators-0.1.0rc2 → snowpark_checkpoints_validators-0.1.1}/test/integ/telemetry_expected/test_df_check_from_file_telemetry.json +6 -5
  33. {snowpark_checkpoints_validators-0.1.0rc2 → snowpark_checkpoints_validators-0.1.1}/test/integ/telemetry_expected/test_df_check_skip_check_telemetry.json +6 -5
  34. {snowpark_checkpoints_validators-0.1.0rc2 → snowpark_checkpoints_validators-0.1.1}/test/integ/telemetry_expected/test_df_check_telemetry.json +6 -5
  35. {snowpark_checkpoints_validators-0.1.0rc2 → snowpark_checkpoints_validators-0.1.1}/test/integ/telemetry_expected/test_input_fail_telemetry.json +5 -4
  36. {snowpark_checkpoints_validators-0.1.0rc2 → snowpark_checkpoints_validators-0.1.1}/test/integ/telemetry_expected/test_input_telemetry.json +5 -4
  37. {snowpark_checkpoints_validators-0.1.0rc2 → snowpark_checkpoints_validators-0.1.1}/test/integ/telemetry_expected/test_output_fail_telemetry.json +5 -4
  38. {snowpark_checkpoints_validators-0.1.0rc2 → snowpark_checkpoints_validators-0.1.1}/test/integ/telemetry_expected/test_output_telemetry.json +5 -4
  39. {snowpark_checkpoints_validators-0.1.0rc2 → snowpark_checkpoints_validators-0.1.1}/test/integ/test_pandera.py +14 -3
  40. {snowpark_checkpoints_validators-0.1.0rc2 → snowpark_checkpoints_validators-0.1.1}/test/integ/test_spark_checkpoint.py +14 -3
  41. {snowpark_checkpoints_validators-0.1.0rc2 → snowpark_checkpoints_validators-0.1.1}/test/unit/test_extra_config.py +14 -3
  42. {snowpark_checkpoints_validators-0.1.0rc2 → snowpark_checkpoints_validators-0.1.1}/test/unit/test_spark_migration.py +14 -3
  43. {snowpark_checkpoints_validators-0.1.0rc2 → snowpark_checkpoints_validators-0.1.1}/test/unit/test_telemetry.py +247 -83
  44. {snowpark_checkpoints_validators-0.1.0rc2 → snowpark_checkpoints_validators-0.1.1}/test/unit/test_utils_checks.py +14 -3
  45. snowpark_checkpoints_validators-0.1.0rc2/PKG-INFO +0 -514
  46. snowpark_checkpoints_validators-0.1.0rc2/src/snowflake/snowpark_checkpoints/__init__.py +0 -23
  47. snowpark_checkpoints_validators-0.1.0rc2/src/snowflake/snowpark_checkpoints/singleton.py +0 -12
  48. snowpark_checkpoints_validators-0.1.0rc2/src/snowflake/snowpark_checkpoints/utils/__init__.py +0 -3
  49. snowpark_checkpoints_validators-0.1.0rc2/test/integ/telemetry_expected/spark_checkpoint_df_fail_telemetry.json +0 -17
  50. snowpark_checkpoints_validators-0.1.0rc2/test/integ/telemetry_expected/spark_checkpoint_df_pass_telemetry.json +0 -17
  51. snowpark_checkpoints_validators-0.1.0rc2/test/integ/telemetry_expected/spark_checkpoint_limit_sample_telemetry.json +0 -17
  52. snowpark_checkpoints_validators-0.1.0rc2/test/integ/telemetry_expected/spark_checkpoint_random_sample_telemetry.json +0 -17
  53. {snowpark_checkpoints_validators-0.1.0rc2 → snowpark_checkpoints_validators-0.1.1}/.gitignore +0 -0
  54. {snowpark_checkpoints_validators-0.1.0rc2 → snowpark_checkpoints_validators-0.1.1}/CHANGELOG.md +0 -0
  55. {snowpark_checkpoints_validators-0.1.0rc2 → snowpark_checkpoints_validators-0.1.1}/LICENSE +0 -0
  56. {snowpark_checkpoints_validators-0.1.0rc2 → snowpark_checkpoints_validators-0.1.1}/src/snowflake/snowpark_checkpoints/utils/pandera_check_manager.py +0 -0
  57. {snowpark_checkpoints_validators-0.1.0rc2 → snowpark_checkpoints_validators-0.1.1}/test/.coveragerc +0 -0
  58. {snowpark_checkpoints_validators-0.1.0rc2 → snowpark_checkpoints_validators-0.1.1}/test/integ/test_parquet.py +0 -0
  59. {snowpark_checkpoints_validators-0.1.0rc2 → snowpark_checkpoints_validators-0.1.1}/test/unit/test_pandera_check_manager.py +0 -0
  60. {snowpark_checkpoints_validators-0.1.0rc2 → snowpark_checkpoints_validators-0.1.1}/test/unit/test_validation_result_metadata.py +0 -0
@@ -0,0 +1,311 @@
1
+ Metadata-Version: 2.4
2
+ Name: snowpark-checkpoints-validators
3
+ Version: 0.1.1
4
+ Summary: Migration tools for Snowpark
5
+ Project-URL: Bug Tracker, https://github.com/snowflakedb/snowpark-checkpoints/issues
6
+ Project-URL: Source code, https://github.com/snowflakedb/snowpark-checkpoints/
7
+ Author-email: "Snowflake, Inc." <snowflake-python-libraries-dl@snowflake.com>
8
+ License: Apache License, Version 2.0
9
+ License-File: LICENSE
10
+ Keywords: Snowflake,Snowpark,analytics,cloud,database,db
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Environment :: Console
13
+ Classifier: Environment :: Other Environment
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Intended Audience :: Education
16
+ Classifier: Intended Audience :: Information Technology
17
+ Classifier: Intended Audience :: System Administrators
18
+ Classifier: License :: OSI Approved :: Apache Software License
19
+ Classifier: Operating System :: OS Independent
20
+ Classifier: Programming Language :: Python :: 3 :: Only
21
+ Classifier: Programming Language :: SQL
22
+ Classifier: Topic :: Database
23
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
24
+ Classifier: Topic :: Software Development
25
+ Classifier: Topic :: Software Development :: Libraries
26
+ Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
27
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
28
+ Requires-Python: <3.12,>=3.9
29
+ Requires-Dist: pandera-report==0.1.2
30
+ Requires-Dist: pandera[io]==0.20.4
31
+ Requires-Dist: pyspark
32
+ Requires-Dist: snowflake-connector-python==3.13.0
33
+ Requires-Dist: snowflake-snowpark-python==1.26.0
34
+ Provides-Extra: development
35
+ Requires-Dist: coverage>=7.6.7; extra == 'development'
36
+ Requires-Dist: deepdiff>=8.0.0; extra == 'development'
37
+ Requires-Dist: hatchling==1.25.0; extra == 'development'
38
+ Requires-Dist: pre-commit>=4.0.1; extra == 'development'
39
+ Requires-Dist: pyarrow>=18.0.0; extra == 'development'
40
+ Requires-Dist: pytest-cov>=6.0.0; extra == 'development'
41
+ Requires-Dist: pytest>=8.3.3; extra == 'development'
42
+ Requires-Dist: setuptools>=70.0.0; extra == 'development'
43
+ Requires-Dist: twine==5.1.1; extra == 'development'
44
+ Description-Content-Type: text/markdown
45
+
46
+ # snowpark-checkpoints-validators
47
+
48
+ ---
49
+ **NOTE**
50
+ This package is on Public Preview.
51
+ ---
52
+
53
+ **snowpark-checkpoints-validators** is a package designed to validate Snowpark DataFrames against predefined schemas and checkpoints. This package ensures data integrity and consistency by performing schema and data validation checks at various stages of a Snowpark pipeline.
54
+
55
+ ## Features
56
+
57
+ - Validate Snowpark DataFrames against predefined Pandera schemas.
58
+ - Perform custom checks and skip specific checks as needed.
59
+ - Generate validation results and log them for further analysis.
60
+ - Support for sampling strategies to validate large datasets efficiently.
61
+ - Integration with PySpark for cross-validation between Snowpark and PySpark DataFrames.
62
+
63
+ ## Functionalities
64
+
65
+ ### Validate DataFrame Schema from File
66
+
67
+ The `validate_dataframe_checkpoint` function validates a Snowpark DataFrame against a checkpoint schema file or dataframe.
68
+
69
+ ```python
70
+ from snowflake.snowpark import DataFrame as SnowparkDataFrame
71
+ from snowflake.snowpark_checkpoints.job_context import SnowparkJobContext
72
+ from snowflake.snowpark_checkpoints.utils.constant import (
73
+ CheckpointMode,
74
+ )
75
+ from snowflake.snowpark_checkpoints.spark_migration import SamplingStrategy
76
+ from typing import Any, Optional
77
+
78
+ # Signature of the function
79
+ def validate_dataframe_checkpoint(
80
+ df: SnowparkDataFrame,
81
+ checkpoint_name: str,
82
+ job_context: Optional[SnowparkJobContext] = None,
83
+ mode: Optional[CheckpointMode] = CheckpointMode.SCHEMA,
84
+ custom_checks: Optional[dict[Any, Any]] = None,
85
+ skip_checks: Optional[dict[Any, Any]] = None,
86
+ sample_frac: Optional[float] = 1.0,
87
+ sample_number: Optional[int] = None,
88
+ sampling_strategy: Optional[SamplingStrategy] = SamplingStrategy.RANDOM_SAMPLE,
89
+ output_path: Optional[str] = None,
90
+ ):
91
+ ...
92
+ ```
93
+
94
+ - `df`: Snowpark dataframe to validate.
95
+ - `checkpoint_name`: Name of the checkpoint schema file or dataframe.
96
+ - `job_context`: Snowpark job context.
97
+ - `mode`: Checkpoint mode (schema or data).
98
+ - `custom_checks`: Custom checks to perform.
99
+ - `skip_checks`: Checks to skip.
100
+ - `sample_frac`: Fraction of the dataframe to sample.
101
+ - `sample_number`: Number of rows to sample.
102
+ - `sampling_strategy`: Sampling strategy to use.
103
+ - `output_path`: Output path for the checkpoint report.
104
+
105
+ ### Usage Example
106
+
107
+ ```python
108
+ from snowflake.snowpark import Session
109
+ from snowflake.snowpark_checkpoints.utils.constant import (
110
+ CheckpointMode,
111
+ )
112
+ from snowflake.snowpark_checkpoints.checkpoint import validate_dataframe_checkpoint
113
+ from snowflake.snowpark_checkpoints.spark_migration import SamplingStrategy
114
+ from snowflake.snowpark_checkpoints.job_context import SnowparkJobContext
115
+ from pyspark.sql import SparkSession
116
+
117
+ session = Session.builder.getOrCreate()
118
+ job_context = SnowparkJobContext(
119
+ session, SparkSession.builder.getOrCreate(), "job_context", True
120
+ )
121
+ df = session.read.format("csv").load("data.csv")
122
+
123
+ validate_dataframe_checkpoint(
124
+ df,
125
+ "schema_checkpoint",
126
+ job_context=job_context,
127
+ mode=CheckpointMode.SCHEMA,
128
+ sample_frac=0.1,
129
+ sampling_strategy=SamplingStrategy.RANDOM_SAMPLE
130
+ )
131
+ ```
132
+
133
+ ### Check with Spark Decorator
134
+
135
+ The `check_with_spark` decorator converts any Snowpark dataframe arguments to a function, samples them, and converts them to PySpark dataframe. It then executes a provided Spark function and compares the outputs between the two implementations.
136
+
137
+ ```python
138
+ from snowflake.snowpark_checkpoints.job_context import SnowparkJobContext
139
+ from snowflake.snowpark_checkpoints.spark_migration import SamplingStrategy
140
+ from typing import Callable, Optional, TypeVar
141
+
142
+ fn = TypeVar("F", bound=Callable)
143
+
144
+ # Signature of the decorator
145
+ def check_with_spark(
146
+ job_context: Optional[SnowparkJobContext],
147
+ spark_function: fn,
148
+ checkpoint_name: str,
149
+ sample_number: Optional[int] = 100,
150
+ sampling_strategy: Optional[SamplingStrategy] = SamplingStrategy.RANDOM_SAMPLE,
151
+ output_path: Optional[str] = None,
152
+ ) -> Callable[[fn], fn]:
153
+ ...
154
+ ```
155
+
156
+ - `job_context`: Snowpark job context.
157
+ - `spark_function`: PySpark function to execute.
158
+ - `checkpoint_name`: Name of the check.
159
+ - `sample_number`: Number of rows to sample.
160
+ - `sampling_strategy`: Sampling strategy to use.
161
+ - `output_path`: Output path for the checkpoint report.
162
+
163
+ ### Usage Example
164
+
165
+ ```python
166
+ from snowflake.snowpark import Session
167
+ from snowflake.snowpark import DataFrame as SnowparkDataFrame
168
+ from snowflake.snowpark_checkpoints.spark_migration import check_with_spark
169
+ from snowflake.snowpark_checkpoints.job_context import SnowparkJobContext
170
+ from pyspark.sql import DataFrame as SparkDataFrame, SparkSession
171
+
172
+ session = Session.builder.getOrCreate()
173
+ job_context = SnowparkJobContext(
174
+ session, SparkSession.builder.getOrCreate(), "job_context", True
175
+ )
176
+
177
+ def my_spark_scalar_fn(df: SparkDataFrame):
178
+ return df.count()
179
+
180
+ @check_with_spark(
181
+ job_context=job_context,
182
+ spark_function=my_spark_scalar_fn,
183
+ checkpoint_name="count_checkpoint",
184
+ )
185
+ def my_snowpark_scalar_fn(df: SnowparkDataFrame):
186
+ return df.count()
187
+
188
+ df = job_context.snowpark_session.create_dataframe(
189
+ [[1, 2], [3, 4]], schema=["a", "b"]
190
+ )
191
+ count = my_snowpark_scalar_fn(df)
192
+ ```
193
+
194
+ ### Pandera Snowpark Decorators
195
+
196
+ The decorators `@check_input_schema` and `@check_output_schema` allow for sampled schema validation of Snowpark dataframes in the input arguments or in the return value.
197
+
198
+ ```python
199
+ from snowflake.snowpark_checkpoints.spark_migration import SamplingStrategy
200
+ from snowflake.snowpark_checkpoints.job_context import SnowparkJobContext
201
+ from pandera import DataFrameSchema
202
+ from typing import Optional
203
+
204
+ # Signature of the decorator
205
+ def check_input_schema(
206
+ pandera_schema: DataFrameSchema,
207
+ checkpoint_name: str,
208
+ sample_frac: Optional[float] = 1.0,
209
+ sample_number: Optional[int] = None,
210
+ sampling_strategy: Optional[SamplingStrategy] = SamplingStrategy.RANDOM_SAMPLE,
211
+ job_context: Optional[SnowparkJobContext] = None,
212
+ output_path: Optional[str] = None,
213
+ ):
214
+ ...
215
+
216
+ # Signature of the decorator
217
+ def check_output_schema(
218
+ pandera_schema: DataFrameSchema,
219
+ checkpoint_name: str,
220
+ sample_frac: Optional[float] = 1.0,
221
+ sample_number: Optional[int] = None,
222
+ sampling_strategy: Optional[SamplingStrategy] = SamplingStrategy.RANDOM_SAMPLE,
223
+ job_context: Optional[SnowparkJobContext] = None,
224
+ output_path: Optional[str] = None,
225
+ ):
226
+ ...
227
+ ```
228
+
229
+ - `pandera_schema`: Pandera schema to validate.
230
+ - `checkpoint_name`: Name of the checkpoint schema file or DataFrame.
231
+ - `sample_frac`: Fraction of the DataFrame to sample.
232
+ - `sample_number`: Number of rows to sample.
233
+ - `sampling_strategy`: Sampling strategy to use.
234
+ - `job_context`: Snowpark job context.
235
+ - `output_path`: Output path for the checkpoint report.
236
+
237
+ ### Usage Example
238
+
239
+ #### Check Input Schema Example
240
+ ```python
241
+ from pandas import DataFrame as PandasDataFrame
242
+ from pandera import DataFrameSchema, Column, Check
243
+ from snowflake.snowpark import Session
244
+ from snowflake.snowpark import DataFrame as SnowparkDataFrame
245
+ from snowflake.snowpark_checkpoints.checkpoint import check_input_schema
246
+ from numpy import int8
247
+
248
+ df = PandasDataFrame(
249
+ {
250
+ "COLUMN1": [1, 4, 0, 10, 9],
251
+ "COLUMN2": [-1.3, -1.4, -2.9, -10.1, -20.4],
252
+ }
253
+ )
254
+
255
+ in_schema = DataFrameSchema(
256
+ {
257
+ "COLUMN1": Column(int8, Check(lambda x: 0 <= x <= 10, element_wise=True)),
258
+ "COLUMN2": Column(float, Check(lambda x: x < -1.2, element_wise=True)),
259
+ }
260
+ )
261
+
262
+ @check_input_schema(in_schema, "input_schema_checkpoint")
263
+ def preprocessor(dataframe: SnowparkDataFrame):
264
+ dataframe = dataframe.withColumn(
265
+ "COLUMN3", dataframe["COLUMN1"] + dataframe["COLUMN2"]
266
+ )
267
+ return dataframe
268
+
269
+ session = Session.builder.getOrCreate()
270
+ sp_dataframe = session.create_dataframe(df)
271
+
272
+ preprocessed_dataframe = preprocessor(sp_dataframe)
273
+ ```
274
+
275
+ #### Check Input Schema Example
276
+ ```python
277
+ from pandas import DataFrame as PandasDataFrame
278
+ from pandera import DataFrameSchema, Column, Check
279
+ from snowflake.snowpark import Session
280
+ from snowflake.snowpark import DataFrame as SnowparkDataFrame
281
+ from snowflake.snowpark_checkpoints.checkpoint import check_output_schema
282
+ from numpy import int8
283
+
284
+ df = PandasDataFrame(
285
+ {
286
+ "COLUMN1": [1, 4, 0, 10, 9],
287
+ "COLUMN2": [-1.3, -1.4, -2.9, -10.1, -20.4],
288
+ }
289
+ )
290
+
291
+ out_schema = DataFrameSchema(
292
+ {
293
+ "COLUMN1": Column(int8, Check.between(0, 10, include_max=True, include_min=True)),
294
+ "COLUMN2": Column(float, Check.less_than_or_equal_to(-1.2)),
295
+ "COLUMN3": Column(float, Check.less_than(10)),
296
+ }
297
+ )
298
+
299
+ @check_output_schema(out_schema, "output_schema_checkpoint")
300
+ def preprocessor(dataframe: SnowparkDataFrame):
301
+ return dataframe.with_column(
302
+ "COLUMN3", dataframe["COLUMN1"] + dataframe["COLUMN2"]
303
+ )
304
+
305
+ session = Session.builder.getOrCreate()
306
+ sp_dataframe = session.create_dataframe(df)
307
+
308
+ preprocessed_dataframe = preprocessor(sp_dataframe)
309
+ ```
310
+
311
+ ------
@@ -2,9 +2,7 @@
2
2
 
3
3
  ---
4
4
  **NOTE**
5
-
6
- This package is on Private Preview.
7
-
5
+ This package is on Public Preview.
8
6
  ---
9
7
 
10
8
  **snowpark-checkpoints-validators** is a package designed to validate Snowpark DataFrames against predefined schemas and checkpoints. This package ensures data integrity and consistency by performing schema and data validation checks at various stages of a Snowpark pipeline.
@@ -3,7 +3,9 @@ build-backend = "hatchling.build"
3
3
  requires = ["hatchling"]
4
4
 
5
5
  [project]
6
- authors = [{name = "Snowflake Inc."}]
6
+ authors = [
7
+ {name = "Snowflake, Inc.", email = "snowflake-python-libraries-dl@snowflake.com"},
8
+ ]
7
9
  classifiers = [
8
10
  "Development Status :: 4 - Beta",
9
11
  "Environment :: Console",
@@ -24,13 +26,14 @@ classifiers = [
24
26
  "Topic :: Scientific/Engineering :: Information Analysis",
25
27
  ]
26
28
  dependencies = [
27
- "snowflake-snowpark-python",
28
- "snowflake-connector-python",
29
+ "snowflake-snowpark-python==1.26.0",
30
+ "snowflake-connector-python==3.13.0",
29
31
  "pyspark",
30
32
  "pandera[io]==0.20.4",
31
33
  "pandera-report==0.1.2",
32
34
  ]
33
35
  description = "Migration tools for Snowpark"
36
+ dynamic = ['version']
34
37
  keywords = [
35
38
  'Snowflake',
36
39
  'analytics',
@@ -39,11 +42,10 @@ keywords = [
39
42
  'db',
40
43
  'Snowpark',
41
44
  ]
42
- license = {file = "LICENSE"}
45
+ license = {text = "Apache License, Version 2.0"}
43
46
  name = "snowpark-checkpoints-validators"
44
47
  readme = "README.md"
45
48
  requires-python = '>=3.9,<3.12'
46
- dynamic = ['version']
47
49
 
48
50
  [project.optional-dependencies]
49
51
  development = [
@@ -75,15 +77,22 @@ where = ["src/"]
75
77
  dev-mode-dirs = ['src']
76
78
  directory = 'snowpark-checkpoints-validators'
77
79
 
80
+ [[tool.hatch.sources]]
81
+ dir = "src/snowflake/snowpark_checkpoints"
82
+ name = "snowpark-checkpoints-validators"
83
+ type = "package"
84
+
78
85
  [tool.hatch.build.targets.wheel]
79
86
  directory = "dist"
80
- packages = ["snowpark-checkpoints-validators/src/snowflake/snowpark_checkpoints"]
87
+ packages = [
88
+ "src/snowflake",
89
+ ]
81
90
 
82
91
  [tool.hatch.build.targets.sdist]
83
92
  directory = "dist"
84
93
  exclude = ["/.github", "/.idea"]
85
94
  include = [
86
- 'src/',
95
+ 'src/**',
87
96
  'README.md',
88
97
  'LICENSE',
89
98
  'test/',
@@ -118,7 +127,7 @@ check = [
118
127
 
119
128
  [tool.hatch.envs.test.scripts]
120
129
  check = [
121
- "pip install -e ../snowpark-checkpoints-configuration" ,
130
+ "pip install -e ../snowpark-checkpoints-configuration",
122
131
  'pytest -v --junitxml=test/outcome/test-results.xml --cov=. --cov-config=test/.coveragerc --cov-report=xml:test/outcome/coverage-{matrix:python:{env:PYTHON_VERSION:unset}}.xml {args:test} --cov-report=term --cov-report=json:test/outcome/coverage-{matrix:python:{env:PYTHON_VERSION:unset}}.json',
123
132
  ]
124
133
 
@@ -0,0 +1,34 @@
1
+ # Copyright 2025 Snowflake Inc.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ from snowflake.snowpark_checkpoints.checkpoint import (
17
+ check_dataframe_schema,
18
+ check_output_schema,
19
+ check_input_schema,
20
+ validate_dataframe_checkpoint,
21
+ )
22
+ from snowflake.snowpark_checkpoints.job_context import SnowparkJobContext
23
+ from snowflake.snowpark_checkpoints.spark_migration import check_with_spark
24
+ from snowflake.snowpark_checkpoints.utils.constants import CheckpointMode
25
+
26
+ __all__ = [
27
+ "check_with_spark",
28
+ "SnowparkJobContext",
29
+ "check_dataframe_schema",
30
+ "check_output_schema",
31
+ "check_input_schema",
32
+ "validate_dataframe_checkpoint",
33
+ "CheckpointMode",
34
+ ]
@@ -1,6 +1,17 @@
1
- #
2
- # Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
3
- #
1
+ # Copyright 2025 Snowflake Inc.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
4
15
 
5
16
  # Wrapper around pandera which logs to snowflake
6
17
  from typing import Any, Optional, Union
@@ -1,6 +1,17 @@
1
- #
2
- # Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
3
- #
1
+ # Copyright 2025 Snowflake Inc.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
4
15
 
5
16
  from typing import Optional
6
17
 
@@ -1,6 +1,17 @@
1
- #
2
- # Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
3
- #
1
+ # Copyright 2025 Snowflake Inc.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
4
15
 
5
16
  from datetime import datetime
6
17
  from typing import Optional
@@ -0,0 +1,23 @@
1
+ # Copyright 2025 Snowflake Inc.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+
17
+ class Singleton(type):
18
+ _instances = {}
19
+
20
+ def __call__(cls, *args, **kwargs):
21
+ if cls not in cls._instances:
22
+ cls._instances[cls] = super().__call__(*args, **kwargs)
23
+ return cls._instances[cls]
@@ -1,6 +1,17 @@
1
- #
2
- # Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
3
- #
1
+ # Copyright 2025 Snowflake Inc.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
4
15
 
5
16
  from typing import Optional
6
17
 
@@ -1,6 +1,17 @@
1
- #
2
- # Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
3
- #
1
+ # Copyright 2025 Snowflake Inc.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
4
15
  from typing import Callable, Optional, TypeVar
5
16
 
6
17
  import pandas as pd
@@ -0,0 +1,14 @@
1
+ # Copyright 2025 Snowflake Inc.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
@@ -1,6 +1,17 @@
1
- #
2
- # Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
3
- #
1
+ # Copyright 2025 Snowflake Inc.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
4
15
 
5
16
  import logging
6
17
  import threading
@@ -1,6 +1,17 @@
1
- #
2
- # Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
3
- #
1
+ # Copyright 2025 Snowflake Inc.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
4
15
 
5
16
  # Skip type
6
17
  from enum import IntEnum
@@ -1,6 +1,17 @@
1
- #
2
- # Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
3
- #
1
+ # Copyright 2025 Snowflake Inc.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
4
15
 
5
16
  import os
6
17
 
@@ -1,6 +1,17 @@
1
- #
2
- # Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved.
3
- #
1
+ # Copyright 2025 Snowflake Inc.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
4
15
 
5
16
  from snowflake.snowpark_checkpoints.utils.constants import (
6
17
  BINARY_TYPE,