snowpark-checkpoints-validators 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,4 +13,4 @@
13
13
  # See the License for the specific language governing permissions and
14
14
  # limitations under the License.
15
15
 
16
- __version__ = "0.1.2"
16
+ __version__ = "0.1.4"
@@ -14,11 +14,11 @@
14
14
  # limitations under the License.
15
15
 
16
16
  # Wrapper around pandera which logs to snowflake
17
- from typing import Any, Optional, Union
17
+ from typing import Any, Optional, Union, cast
18
18
 
19
19
  from pandas import DataFrame as PandasDataFrame
20
- from pandera import Check, DataFrameSchema
21
- from pandera_report import DataFrameValidator
20
+ from pandera import Check, DataFrameModel, DataFrameSchema
21
+ from pandera.errors import SchemaError, SchemaErrors
22
22
 
23
23
  from snowflake.snowpark import DataFrame as SnowparkDataFrame
24
24
  from snowflake.snowpark_checkpoints.errors import SchemaValidationError
@@ -259,12 +259,7 @@ def _check_dataframe_schema(
259
259
  pandera_schema_upper, sample_df = _process_sampling(
260
260
  df, pandera_schema, job_context, sample_frac, sample_number, sampling_strategy
261
261
  )
262
-
263
- # Raises SchemaError on validation issues
264
- validator = DataFrameValidator()
265
- is_valid, validation_result = validator.validate(
266
- pandera_schema_upper, sample_df, validity_flag=True
267
- )
262
+ is_valid, validation_result = _validate(pandera_schema_upper, sample_df)
268
263
  if is_valid:
269
264
  if job_context is not None:
270
265
  job_context._mark_pass(checkpoint_name)
@@ -342,10 +337,8 @@ def check_output_schema(
342
337
  sampler.process_args([snowpark_results])
343
338
  pandas_sample_args = sampler.get_sampled_pandas_args()
344
339
 
345
- # Raises SchemaError on validation issues
346
- validator = DataFrameValidator()
347
- is_valid, validation_result = validator.validate(
348
- pandera_schema, pandas_sample_args[0], validity_flag=True
340
+ is_valid, validation_result = _validate(
341
+ pandera_schema, pandas_sample_args[0]
349
342
  )
350
343
  logger = CheckpointLogger().get_logger()
351
344
  logger.info(
@@ -440,11 +433,9 @@ def check_input_schema(
440
433
  for arg in pandas_sample_args:
441
434
  if isinstance(arg, PandasDataFrame):
442
435
 
443
- validator = DataFrameValidator()
444
- is_valid, validation_result = validator.validate(
436
+ is_valid, validation_result = _validate(
445
437
  pandera_schema,
446
438
  arg,
447
- validity_flag=True,
448
439
  )
449
440
 
450
441
  logger = CheckpointLogger().get_logger()
@@ -480,3 +471,19 @@ def check_input_schema(
480
471
  return wrapper
481
472
 
482
473
  return check_input_with_decorator
474
+
475
+
476
+ def _validate(
477
+ schema: Union[type[DataFrameModel], DataFrameSchema],
478
+ df: PandasDataFrame,
479
+ lazy: bool = True,
480
+ ) -> tuple[bool, PandasDataFrame]:
481
+ if not isinstance(schema, DataFrameSchema):
482
+ schema = schema.to_schema()
483
+ is_valid = True
484
+ try:
485
+ df = schema.validate(df, lazy=lazy)
486
+ except (SchemaErrors, SchemaError) as schema_errors:
487
+ df = cast(PandasDataFrame, schema_errors.failure_cases)
488
+ is_valid = False
489
+ return is_valid, df
@@ -330,11 +330,11 @@ def _generate_event(
330
330
  if sc_version is not None:
331
331
  metadata["snowpark_checkpoints_version"] = sc_version
332
332
  message = {
333
- "type": event_type,
333
+ "event_type": event_type,
334
+ "type": "snowpark-checkpoints",
334
335
  "event_name": event_name,
335
336
  "driver_type": "PythonConnector",
336
337
  "driver_version": SNOWFLAKE_CONNECTOR_VERSION,
337
- "source": "snowpark-checkpoints",
338
338
  "metadata": metadata,
339
339
  "data": json.dumps(parameters_info or {}),
340
340
  }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: snowpark-checkpoints-validators
3
- Version: 0.1.2
3
+ Version: 0.1.4
4
4
  Summary: Migration tools for Snowpark
5
5
  Project-URL: Bug Tracker, https://github.com/snowflakedb/snowpark-checkpoints/issues
6
6
  Project-URL: Source code, https://github.com/snowflakedb/snowpark-checkpoints/
@@ -26,21 +26,23 @@ Classifier: Topic :: Software Development :: Libraries
26
26
  Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
27
27
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
28
28
  Requires-Python: <3.12,>=3.9
29
- Requires-Dist: pandera-report==0.1.2
30
29
  Requires-Dist: pandera[io]==0.20.4
31
- Requires-Dist: pyspark
32
30
  Requires-Dist: snowflake-connector-python[pandas]
33
- Requires-Dist: snowflake-snowpark-python==1.26.0
31
+ Requires-Dist: snowflake-snowpark-python>=1.23.0
34
32
  Provides-Extra: development
35
33
  Requires-Dist: coverage>=7.6.7; extra == 'development'
34
+ Requires-Dist: deepdiff==8.1.1; extra == 'development'
36
35
  Requires-Dist: deepdiff>=8.0.0; extra == 'development'
37
36
  Requires-Dist: hatchling==1.25.0; extra == 'development'
38
37
  Requires-Dist: pre-commit>=4.0.1; extra == 'development'
39
38
  Requires-Dist: pyarrow>=18.0.0; extra == 'development'
39
+ Requires-Dist: pyspark>=3.5.0; extra == 'development'
40
40
  Requires-Dist: pytest-cov>=6.0.0; extra == 'development'
41
41
  Requires-Dist: pytest>=8.3.3; extra == 'development'
42
42
  Requires-Dist: setuptools>=70.0.0; extra == 'development'
43
43
  Requires-Dist: twine==5.1.1; extra == 'development'
44
+ Provides-Extra: pyspark
45
+ Requires-Dist: pyspark>=3.5.0; extra == 'pyspark'
44
46
  Description-Content-Type: text/markdown
45
47
 
46
48
  # snowpark-checkpoints-validators
@@ -51,6 +53,17 @@ Description-Content-Type: text/markdown
51
53
 
52
54
  **snowpark-checkpoints-validators** is a package designed to validate Snowpark DataFrames against predefined schemas and checkpoints. This package ensures data integrity and consistency by performing schema and data validation checks at various stages of a Snowpark pipeline.
53
55
 
56
+ ---
57
+ ## Install the library
58
+ ```bash
59
+ pip install snowpark-checkpoints-validators
60
+ ```
61
+ This package requires PySpark to be installed in the same environment. If you do not have it, you can install PySpark alongside Snowpark Checkpoints by running the following command:
62
+ ```bash
63
+ pip install "snowpark-checkpoints-validators[pyspark]"
64
+ ```
65
+ ---
66
+
54
67
  ## Features
55
68
 
56
69
  - Validate Snowpark DataFrames against predefined Pandera schemas.
@@ -1,6 +1,6 @@
1
1
  snowflake/snowpark_checkpoints/__init__.py,sha256=1_xzSopIHWpw1i3gQqWLN0wCfWWEefjr4cl1vl0xSdY,1211
2
- snowflake/snowpark_checkpoints/__version__.py,sha256=qNTBwMUtsLu0okWXwrUvl9AohG1pXd4kalMC8v10gHM,632
3
- snowflake/snowpark_checkpoints/checkpoint.py,sha256=-y1iWdGxYGuTWdngOEXdA59MT33PCiM7cP1s3jJs9jE,18997
2
+ snowflake/snowpark_checkpoints/__version__.py,sha256=XLR7FIaqd--6dSDY841wZ2c8LM8IAx8L5fVTjJIHRkQ,632
3
+ snowflake/snowpark_checkpoints/checkpoint.py,sha256=VceodgS3muZLH5uWhqmuMsvS7cTBAOWOWFCRKt2VsNg,19150
4
4
  snowflake/snowpark_checkpoints/errors.py,sha256=9KjzRf8bjDZTTNL4LeySJAwuucDOyz0Ka7EFBKWFpyg,1821
5
5
  snowflake/snowpark_checkpoints/job_context.py,sha256=7LdJ682lC8hCJOYUn-AVXq_Llv18R9oGdK2F-amYR_o,2990
6
6
  snowflake/snowpark_checkpoints/singleton.py,sha256=7AgIHQBXVRvPBBCkmBplzkdrrm-xVWf_N8svzA2vF8E,836
@@ -14,9 +14,9 @@ snowflake/snowpark_checkpoints/utils/constants.py,sha256=pgFttLDQ6fTa6obSdvivWBY
14
14
  snowflake/snowpark_checkpoints/utils/extra_config.py,sha256=pmGLYT7cu9WMKzQwcEPkgk1DMnnT1fREm45p19e79hk,2567
15
15
  snowflake/snowpark_checkpoints/utils/pandera_check_manager.py,sha256=ddTwXauuZdowIRwPMT61GWYCG4XGKOFkVyfZO49bc-8,14516
16
16
  snowflake/snowpark_checkpoints/utils/supported_types.py,sha256=GrMX2tHdSFnK7LlPbZx20UufD6Br6TNVRkkBwIxdPy0,1433
17
- snowflake/snowpark_checkpoints/utils/telemetry.py,sha256=zUQw9kfOgqLJYQuIW7bl2fRJjX3oEKyKjDlYQPFPHtA,31366
17
+ snowflake/snowpark_checkpoints/utils/telemetry.py,sha256=_WOVo19BxcF6cpQDplID6BEOvgJfHTGK1JZI1-OI4uc,31370
18
18
  snowflake/snowpark_checkpoints/utils/utils_checks.py,sha256=o9HOBrDuTxSIgzZQHfsa9pMzzXRUsRAISI7L6OURouo,13528
19
- snowpark_checkpoints_validators-0.1.2.dist-info/METADATA,sha256=bw7FnPXUQUPZINvLZQwKJUdnRu7LeiuzG1vYp2T85Kc,11009
20
- snowpark_checkpoints_validators-0.1.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
21
- snowpark_checkpoints_validators-0.1.2.dist-info/licenses/LICENSE,sha256=pmjhbh6uVhV5MBXOlou_UZgFP7CYVQITkCCdvfcS5lY,11340
22
- snowpark_checkpoints_validators-0.1.2.dist-info/RECORD,,
19
+ snowpark_checkpoints_validators-0.1.4.dist-info/METADATA,sha256=L00oXPSPvCUBEJuYcAPPt08BPyKvbd3ZgPLpr3DgvqQ,11470
20
+ snowpark_checkpoints_validators-0.1.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
21
+ snowpark_checkpoints_validators-0.1.4.dist-info/licenses/LICENSE,sha256=pmjhbh6uVhV5MBXOlou_UZgFP7CYVQITkCCdvfcS5lY,11340
22
+ snowpark_checkpoints_validators-0.1.4.dist-info/RECORD,,