snowpark-checkpoints-validators 0.3.0__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/snowpark_checkpoints/__init__.py +1 -0
- snowflake/snowpark_checkpoints/__version__.py +1 -1
- snowflake/snowpark_checkpoints/checkpoint.py +46 -4
- snowflake/snowpark_checkpoints/utils/constants.py +1 -0
- {snowpark_checkpoints_validators-0.3.0.dist-info → snowpark_checkpoints_validators-0.3.1.dist-info}/METADATA +34 -1
- {snowpark_checkpoints_validators-0.3.0.dist-info → snowpark_checkpoints_validators-0.3.1.dist-info}/RECORD +8 -8
- {snowpark_checkpoints_validators-0.3.0.dist-info → snowpark_checkpoints_validators-0.3.1.dist-info}/WHEEL +0 -0
- {snowpark_checkpoints_validators-0.3.0.dist-info → snowpark_checkpoints_validators-0.3.1.dist-info}/licenses/LICENSE +0 -0
@@ -33,6 +33,7 @@ from snowflake.snowpark_checkpoints.snowpark_sampler import (
|
|
33
33
|
from snowflake.snowpark_checkpoints.utils.constants import (
|
34
34
|
FAIL_STATUS,
|
35
35
|
PASS_STATUS,
|
36
|
+
SKIP_STATUS,
|
36
37
|
CheckpointMode,
|
37
38
|
)
|
38
39
|
from snowflake.snowpark_checkpoints.utils.extra_config import is_checkpoint_enabled
|
@@ -92,11 +93,10 @@ def validate_dataframe_checkpoint(
|
|
92
93
|
checkpoint_name = _replace_special_characters(checkpoint_name)
|
93
94
|
|
94
95
|
if not is_checkpoint_enabled(checkpoint_name):
|
95
|
-
|
96
|
-
"Checkpoint '
|
97
|
-
|
96
|
+
raise Exception(
|
97
|
+
f"Checkpoint '{checkpoint_name}' is disabled. Please enable it in the checkpoints.json file.",
|
98
|
+
"In case you want to skip it, use the xvalidate_dataframe_checkpoint method instead.",
|
98
99
|
)
|
99
|
-
return None
|
100
100
|
|
101
101
|
LOGGER.info(
|
102
102
|
"Starting DataFrame checkpoint validation for checkpoint '%s'", checkpoint_name
|
@@ -132,6 +132,48 @@ def validate_dataframe_checkpoint(
|
|
132
132
|
)
|
133
133
|
|
134
134
|
|
135
|
+
@log
|
136
|
+
def xvalidate_dataframe_checkpoint(
|
137
|
+
df: SnowparkDataFrame,
|
138
|
+
checkpoint_name: str,
|
139
|
+
job_context: Optional[SnowparkJobContext] = None,
|
140
|
+
mode: Optional[CheckpointMode] = CheckpointMode.SCHEMA,
|
141
|
+
custom_checks: Optional[dict[Any, Any]] = None,
|
142
|
+
skip_checks: Optional[dict[Any, Any]] = None,
|
143
|
+
sample_frac: Optional[float] = 1.0,
|
144
|
+
sample_number: Optional[int] = None,
|
145
|
+
sampling_strategy: Optional[SamplingStrategy] = SamplingStrategy.RANDOM_SAMPLE,
|
146
|
+
output_path: Optional[str] = None,
|
147
|
+
) -> Union[tuple[bool, PandasDataFrame], None]:
|
148
|
+
"""Skips the validation of a Snowpark DataFrame against a specified checkpoint.
|
149
|
+
|
150
|
+
Args:
|
151
|
+
df (SnowparkDataFrame): The DataFrame to validate.
|
152
|
+
checkpoint_name (str): The name of the checkpoint to validate against.
|
153
|
+
job_context (SnowparkJobContext, optional): The job context for the validation. Required for PARQUET mode.
|
154
|
+
mode (CheckpointMode): The mode of validation (e.g., SCHEMA, PARQUET). Defaults to SCHEMA.
|
155
|
+
custom_checks (Optional[dict[Any, Any]], optional): Custom checks to apply during validation.
|
156
|
+
skip_checks (Optional[dict[Any, Any]], optional): Checks to skip during validation.
|
157
|
+
sample_frac (Optional[float], optional): Fraction of the DataFrame to sample for validation. Defaults to 0.1.
|
158
|
+
sample_number (Optional[int], optional): Number of rows to sample for validation.
|
159
|
+
sampling_strategy (Optional[SamplingStrategy], optional): Strategy to use for sampling.
|
160
|
+
Defaults to RANDOM_SAMPLE.
|
161
|
+
output_path (Optional[str], optional): The output path for the validation results.
|
162
|
+
|
163
|
+
Raises:
|
164
|
+
ValueError: If an invalid validation mode is provided or if job_context is None for PARQUET mode.
|
165
|
+
|
166
|
+
"""
|
167
|
+
checkpoint_name = _replace_special_characters(checkpoint_name)
|
168
|
+
|
169
|
+
LOGGER.warning(
|
170
|
+
"Checkpoint '%s' is disabled. Skipping DataFrame checkpoint validation.",
|
171
|
+
checkpoint_name,
|
172
|
+
)
|
173
|
+
|
174
|
+
_update_validation_result(checkpoint_name, SKIP_STATUS, output_path)
|
175
|
+
|
176
|
+
|
135
177
|
def _check_dataframe_schema_file(
|
136
178
|
df: SnowparkDataFrame,
|
137
179
|
checkpoint_name: str,
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: snowpark-checkpoints-validators
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.1
|
4
4
|
Summary: Migration tools for Snowpark
|
5
5
|
Project-URL: Bug Tracker, https://github.com/snowflakedb/snowpark-checkpoints/issues
|
6
6
|
Project-URL: Source code, https://github.com/snowflakedb/snowpark-checkpoints/
|
@@ -322,4 +322,37 @@ sp_dataframe = session.create_dataframe(df)
|
|
322
322
|
preprocessed_dataframe = preprocessor(sp_dataframe)
|
323
323
|
```
|
324
324
|
|
325
|
+
### Skip validation
|
326
|
+
The method `xvalidate_dataframe_checkpoint` can be used to avoid the validation of the checkpoint defined.
|
327
|
+
The result of the checkpoint will be `SKIP` and a message will be logged in the terminal. The method contains the same signature of `validate_dataframe_checkpoint`.
|
328
|
+
|
329
|
+
#### Usage Example
|
330
|
+
|
331
|
+
```python
|
332
|
+
from snowflake.snowpark import Session
|
333
|
+
from snowflake.snowpark_checkpoints.utils.constant import (
|
334
|
+
CheckpointMode,
|
335
|
+
)
|
336
|
+
from snowflake.snowpark_checkpoints.checkpoint import validate_dataframe_checkpoint
|
337
|
+
from snowflake.snowpark_checkpoints.spark_migration import SamplingStrategy
|
338
|
+
from snowflake.snowpark_checkpoints.job_context import SnowparkJobContext
|
339
|
+
from pyspark.sql import SparkSession
|
340
|
+
|
341
|
+
session = Session.builder.getOrCreate()
|
342
|
+
job_context = SnowparkJobContext(
|
343
|
+
session, SparkSession.builder.getOrCreate(), "job_context", True
|
344
|
+
)
|
345
|
+
df = session.read.format("csv").load("data.csv")
|
346
|
+
|
347
|
+
xvalidate_dataframe_checkpoint(
|
348
|
+
df,
|
349
|
+
"schema_checkpoint",
|
350
|
+
job_context=job_context,
|
351
|
+
mode=CheckpointMode.SCHEMA,
|
352
|
+
sample_frac=0.1,
|
353
|
+
sampling_strategy=SamplingStrategy.RANDOM_SAMPLE
|
354
|
+
)
|
355
|
+
```
|
356
|
+
|
357
|
+
|
325
358
|
------
|
@@ -1,6 +1,6 @@
|
|
1
|
-
snowflake/snowpark_checkpoints/__init__.py,sha256=
|
2
|
-
snowflake/snowpark_checkpoints/__version__.py,sha256=
|
3
|
-
snowflake/snowpark_checkpoints/checkpoint.py,sha256=
|
1
|
+
snowflake/snowpark_checkpoints/__init__.py,sha256=V2HtQkoek-2twos_Qit-ZRS9FrSsbQ58nVf0uuGnyyk,1473
|
2
|
+
snowflake/snowpark_checkpoints/__version__.py,sha256=uSRs7fRupFeQ-z3PtU_6qh6ry8YBaSAnEIAvLhJKUR8,632
|
3
|
+
snowflake/snowpark_checkpoints/checkpoint.py,sha256=pU-HdpoS4SYzJU0qEaFzS5QBUE8K55Sn8K27zJe9_xM,24187
|
4
4
|
snowflake/snowpark_checkpoints/errors.py,sha256=9KjzRf8bjDZTTNL4LeySJAwuucDOyz0Ka7EFBKWFpyg,1821
|
5
5
|
snowflake/snowpark_checkpoints/job_context.py,sha256=RMK0g0HrbDVrOAvai4PgsGvsAn_GIo9aFmh-tWlyieY,4183
|
6
6
|
snowflake/snowpark_checkpoints/singleton.py,sha256=7AgIHQBXVRvPBBCkmBplzkdrrm-xVWf_N8svzA2vF8E,836
|
@@ -13,14 +13,14 @@ snowflake/snowpark_checkpoints/io_utils/io_default_strategy.py,sha256=VMfdqj4uDg
|
|
13
13
|
snowflake/snowpark_checkpoints/io_utils/io_env_strategy.py,sha256=ltG_rxm0CkJFXpskOf__ByZw-C6B9LtycqlyB9EmaJI,3569
|
14
14
|
snowflake/snowpark_checkpoints/io_utils/io_file_manager.py,sha256=YHrxRBzTlhIUrSFrsoWkRY_Qa-TXgDWglr00T98Tc5g,2485
|
15
15
|
snowflake/snowpark_checkpoints/utils/__init__.py,sha256=I4srmZ8G1q9DU6Suo1S91aVfNvETyisKH95uvLAvEJ0,609
|
16
|
-
snowflake/snowpark_checkpoints/utils/constants.py,sha256=
|
16
|
+
snowflake/snowpark_checkpoints/utils/constants.py,sha256=M3vLdvKiVOhHMo0oPu4P42Wn_v6UDqmK6wHOGuoG6sY,4179
|
17
17
|
snowflake/snowpark_checkpoints/utils/extra_config.py,sha256=xOYaG6MfsUCAHI0C_7qWF_m96xcLIZWwrgxY4UlpaZI,4325
|
18
18
|
snowflake/snowpark_checkpoints/utils/logging_utils.py,sha256=yyi6X5DqKeTg0HRhvsH6ymYp2P0wbnyKIzI2RzrQS7k,2278
|
19
19
|
snowflake/snowpark_checkpoints/utils/pandera_check_manager.py,sha256=tQIozLO-2kM8WZ-gGKfRwmXBx1cDPaIZB0qIcArp8xA,16100
|
20
20
|
snowflake/snowpark_checkpoints/utils/supported_types.py,sha256=GrMX2tHdSFnK7LlPbZx20UufD6Br6TNVRkkBwIxdPy0,1433
|
21
21
|
snowflake/snowpark_checkpoints/utils/telemetry.py,sha256=GfuyIaI3QG4a4_qWwyJHvWRM0GENunNexuEJ6IgscF4,32684
|
22
22
|
snowflake/snowpark_checkpoints/utils/utils_checks.py,sha256=oQ1c4n-uAA2kFIpWIRPWhbCW8e-wwOIL8qDqLvr5Fok,14398
|
23
|
-
snowpark_checkpoints_validators-0.3.
|
24
|
-
snowpark_checkpoints_validators-0.3.
|
25
|
-
snowpark_checkpoints_validators-0.3.
|
26
|
-
snowpark_checkpoints_validators-0.3.
|
23
|
+
snowpark_checkpoints_validators-0.3.1.dist-info/METADATA,sha256=mtI8xnknt0g9McBdjcbNqOHRCDlJs2GtB1A1zcuH_00,12676
|
24
|
+
snowpark_checkpoints_validators-0.3.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
25
|
+
snowpark_checkpoints_validators-0.3.1.dist-info/licenses/LICENSE,sha256=pmjhbh6uVhV5MBXOlou_UZgFP7CYVQITkCCdvfcS5lY,11340
|
26
|
+
snowpark_checkpoints_validators-0.3.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|