snowpark-checkpoints-validators 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/snowpark_checkpoints/__version__.py +1 -1
- snowflake/snowpark_checkpoints/checkpoint.py +23 -16
- snowflake/snowpark_checkpoints/utils/telemetry.py +2 -2
- {snowpark_checkpoints_validators-0.1.2.dist-info → snowpark_checkpoints_validators-0.1.4.dist-info}/METADATA +17 -4
- {snowpark_checkpoints_validators-0.1.2.dist-info → snowpark_checkpoints_validators-0.1.4.dist-info}/RECORD +7 -7
- {snowpark_checkpoints_validators-0.1.2.dist-info → snowpark_checkpoints_validators-0.1.4.dist-info}/WHEEL +0 -0
- {snowpark_checkpoints_validators-0.1.2.dist-info → snowpark_checkpoints_validators-0.1.4.dist-info}/licenses/LICENSE +0 -0
@@ -14,11 +14,11 @@
|
|
14
14
|
# limitations under the License.
|
15
15
|
|
16
16
|
# Wrapper around pandera which logs to snowflake
|
17
|
-
from typing import Any, Optional, Union
|
17
|
+
from typing import Any, Optional, Union, cast
|
18
18
|
|
19
19
|
from pandas import DataFrame as PandasDataFrame
|
20
|
-
from pandera import Check, DataFrameSchema
|
21
|
-
from
|
20
|
+
from pandera import Check, DataFrameModel, DataFrameSchema
|
21
|
+
from pandera.errors import SchemaError, SchemaErrors
|
22
22
|
|
23
23
|
from snowflake.snowpark import DataFrame as SnowparkDataFrame
|
24
24
|
from snowflake.snowpark_checkpoints.errors import SchemaValidationError
|
@@ -259,12 +259,7 @@ def _check_dataframe_schema(
|
|
259
259
|
pandera_schema_upper, sample_df = _process_sampling(
|
260
260
|
df, pandera_schema, job_context, sample_frac, sample_number, sampling_strategy
|
261
261
|
)
|
262
|
-
|
263
|
-
# Raises SchemaError on validation issues
|
264
|
-
validator = DataFrameValidator()
|
265
|
-
is_valid, validation_result = validator.validate(
|
266
|
-
pandera_schema_upper, sample_df, validity_flag=True
|
267
|
-
)
|
262
|
+
is_valid, validation_result = _validate(pandera_schema_upper, sample_df)
|
268
263
|
if is_valid:
|
269
264
|
if job_context is not None:
|
270
265
|
job_context._mark_pass(checkpoint_name)
|
@@ -342,10 +337,8 @@ def check_output_schema(
|
|
342
337
|
sampler.process_args([snowpark_results])
|
343
338
|
pandas_sample_args = sampler.get_sampled_pandas_args()
|
344
339
|
|
345
|
-
|
346
|
-
|
347
|
-
is_valid, validation_result = validator.validate(
|
348
|
-
pandera_schema, pandas_sample_args[0], validity_flag=True
|
340
|
+
is_valid, validation_result = _validate(
|
341
|
+
pandera_schema, pandas_sample_args[0]
|
349
342
|
)
|
350
343
|
logger = CheckpointLogger().get_logger()
|
351
344
|
logger.info(
|
@@ -440,11 +433,9 @@ def check_input_schema(
|
|
440
433
|
for arg in pandas_sample_args:
|
441
434
|
if isinstance(arg, PandasDataFrame):
|
442
435
|
|
443
|
-
|
444
|
-
is_valid, validation_result = validator.validate(
|
436
|
+
is_valid, validation_result = _validate(
|
445
437
|
pandera_schema,
|
446
438
|
arg,
|
447
|
-
validity_flag=True,
|
448
439
|
)
|
449
440
|
|
450
441
|
logger = CheckpointLogger().get_logger()
|
@@ -480,3 +471,19 @@ def check_input_schema(
|
|
480
471
|
return wrapper
|
481
472
|
|
482
473
|
return check_input_with_decorator
|
474
|
+
|
475
|
+
|
476
|
+
def _validate(
|
477
|
+
schema: Union[type[DataFrameModel], DataFrameSchema],
|
478
|
+
df: PandasDataFrame,
|
479
|
+
lazy: bool = True,
|
480
|
+
) -> tuple[bool, PandasDataFrame]:
|
481
|
+
if not isinstance(schema, DataFrameSchema):
|
482
|
+
schema = schema.to_schema()
|
483
|
+
is_valid = True
|
484
|
+
try:
|
485
|
+
df = schema.validate(df, lazy=lazy)
|
486
|
+
except (SchemaErrors, SchemaError) as schema_errors:
|
487
|
+
df = cast(PandasDataFrame, schema_errors.failure_cases)
|
488
|
+
is_valid = False
|
489
|
+
return is_valid, df
|
@@ -330,11 +330,11 @@ def _generate_event(
|
|
330
330
|
if sc_version is not None:
|
331
331
|
metadata["snowpark_checkpoints_version"] = sc_version
|
332
332
|
message = {
|
333
|
-
"
|
333
|
+
"event_type": event_type,
|
334
|
+
"type": "snowpark-checkpoints",
|
334
335
|
"event_name": event_name,
|
335
336
|
"driver_type": "PythonConnector",
|
336
337
|
"driver_version": SNOWFLAKE_CONNECTOR_VERSION,
|
337
|
-
"source": "snowpark-checkpoints",
|
338
338
|
"metadata": metadata,
|
339
339
|
"data": json.dumps(parameters_info or {}),
|
340
340
|
}
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: snowpark-checkpoints-validators
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.4
|
4
4
|
Summary: Migration tools for Snowpark
|
5
5
|
Project-URL: Bug Tracker, https://github.com/snowflakedb/snowpark-checkpoints/issues
|
6
6
|
Project-URL: Source code, https://github.com/snowflakedb/snowpark-checkpoints/
|
@@ -26,21 +26,23 @@ Classifier: Topic :: Software Development :: Libraries
|
|
26
26
|
Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
|
27
27
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
28
28
|
Requires-Python: <3.12,>=3.9
|
29
|
-
Requires-Dist: pandera-report==0.1.2
|
30
29
|
Requires-Dist: pandera[io]==0.20.4
|
31
|
-
Requires-Dist: pyspark
|
32
30
|
Requires-Dist: snowflake-connector-python[pandas]
|
33
|
-
Requires-Dist: snowflake-snowpark-python
|
31
|
+
Requires-Dist: snowflake-snowpark-python>=1.23.0
|
34
32
|
Provides-Extra: development
|
35
33
|
Requires-Dist: coverage>=7.6.7; extra == 'development'
|
34
|
+
Requires-Dist: deepdiff==8.1.1; extra == 'development'
|
36
35
|
Requires-Dist: deepdiff>=8.0.0; extra == 'development'
|
37
36
|
Requires-Dist: hatchling==1.25.0; extra == 'development'
|
38
37
|
Requires-Dist: pre-commit>=4.0.1; extra == 'development'
|
39
38
|
Requires-Dist: pyarrow>=18.0.0; extra == 'development'
|
39
|
+
Requires-Dist: pyspark>=3.5.0; extra == 'development'
|
40
40
|
Requires-Dist: pytest-cov>=6.0.0; extra == 'development'
|
41
41
|
Requires-Dist: pytest>=8.3.3; extra == 'development'
|
42
42
|
Requires-Dist: setuptools>=70.0.0; extra == 'development'
|
43
43
|
Requires-Dist: twine==5.1.1; extra == 'development'
|
44
|
+
Provides-Extra: pyspark
|
45
|
+
Requires-Dist: pyspark>=3.5.0; extra == 'pyspark'
|
44
46
|
Description-Content-Type: text/markdown
|
45
47
|
|
46
48
|
# snowpark-checkpoints-validators
|
@@ -51,6 +53,17 @@ Description-Content-Type: text/markdown
|
|
51
53
|
|
52
54
|
**snowpark-checkpoints-validators** is a package designed to validate Snowpark DataFrames against predefined schemas and checkpoints. This package ensures data integrity and consistency by performing schema and data validation checks at various stages of a Snowpark pipeline.
|
53
55
|
|
56
|
+
---
|
57
|
+
## Install the library
|
58
|
+
```bash
|
59
|
+
pip install snowpark-checkpoints-validators
|
60
|
+
```
|
61
|
+
This package requires PySpark to be installed in the same environment. If you do not have it, you can install PySpark alongside Snowpark Checkpoints by running the following command:
|
62
|
+
```bash
|
63
|
+
pip install "snowpark-checkpoints-validators[pyspark]"
|
64
|
+
```
|
65
|
+
---
|
66
|
+
|
54
67
|
## Features
|
55
68
|
|
56
69
|
- Validate Snowpark DataFrames against predefined Pandera schemas.
|
@@ -1,6 +1,6 @@
|
|
1
1
|
snowflake/snowpark_checkpoints/__init__.py,sha256=1_xzSopIHWpw1i3gQqWLN0wCfWWEefjr4cl1vl0xSdY,1211
|
2
|
-
snowflake/snowpark_checkpoints/__version__.py,sha256=
|
3
|
-
snowflake/snowpark_checkpoints/checkpoint.py,sha256
|
2
|
+
snowflake/snowpark_checkpoints/__version__.py,sha256=XLR7FIaqd--6dSDY841wZ2c8LM8IAx8L5fVTjJIHRkQ,632
|
3
|
+
snowflake/snowpark_checkpoints/checkpoint.py,sha256=VceodgS3muZLH5uWhqmuMsvS7cTBAOWOWFCRKt2VsNg,19150
|
4
4
|
snowflake/snowpark_checkpoints/errors.py,sha256=9KjzRf8bjDZTTNL4LeySJAwuucDOyz0Ka7EFBKWFpyg,1821
|
5
5
|
snowflake/snowpark_checkpoints/job_context.py,sha256=7LdJ682lC8hCJOYUn-AVXq_Llv18R9oGdK2F-amYR_o,2990
|
6
6
|
snowflake/snowpark_checkpoints/singleton.py,sha256=7AgIHQBXVRvPBBCkmBplzkdrrm-xVWf_N8svzA2vF8E,836
|
@@ -14,9 +14,9 @@ snowflake/snowpark_checkpoints/utils/constants.py,sha256=pgFttLDQ6fTa6obSdvivWBY
|
|
14
14
|
snowflake/snowpark_checkpoints/utils/extra_config.py,sha256=pmGLYT7cu9WMKzQwcEPkgk1DMnnT1fREm45p19e79hk,2567
|
15
15
|
snowflake/snowpark_checkpoints/utils/pandera_check_manager.py,sha256=ddTwXauuZdowIRwPMT61GWYCG4XGKOFkVyfZO49bc-8,14516
|
16
16
|
snowflake/snowpark_checkpoints/utils/supported_types.py,sha256=GrMX2tHdSFnK7LlPbZx20UufD6Br6TNVRkkBwIxdPy0,1433
|
17
|
-
snowflake/snowpark_checkpoints/utils/telemetry.py,sha256=
|
17
|
+
snowflake/snowpark_checkpoints/utils/telemetry.py,sha256=_WOVo19BxcF6cpQDplID6BEOvgJfHTGK1JZI1-OI4uc,31370
|
18
18
|
snowflake/snowpark_checkpoints/utils/utils_checks.py,sha256=o9HOBrDuTxSIgzZQHfsa9pMzzXRUsRAISI7L6OURouo,13528
|
19
|
-
snowpark_checkpoints_validators-0.1.
|
20
|
-
snowpark_checkpoints_validators-0.1.
|
21
|
-
snowpark_checkpoints_validators-0.1.
|
22
|
-
snowpark_checkpoints_validators-0.1.
|
19
|
+
snowpark_checkpoints_validators-0.1.4.dist-info/METADATA,sha256=L00oXPSPvCUBEJuYcAPPt08BPyKvbd3ZgPLpr3DgvqQ,11470
|
20
|
+
snowpark_checkpoints_validators-0.1.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
21
|
+
snowpark_checkpoints_validators-0.1.4.dist-info/licenses/LICENSE,sha256=pmjhbh6uVhV5MBXOlou_UZgFP7CYVQITkCCdvfcS5lY,11340
|
22
|
+
snowpark_checkpoints_validators-0.1.4.dist-info/RECORD,,
|
File without changes
|
File without changes
|