snowpark-checkpoints-validators 0.3.0__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -39,6 +39,7 @@ __all__ = [
39
39
  "check_dataframe_schema",
40
40
  "check_output_schema",
41
41
  "check_input_schema",
42
+ "xvalidate_dataframe_checkpoint",
42
43
  "validate_dataframe_checkpoint",
43
44
  "CheckpointMode",
44
45
  ]
@@ -13,4 +13,4 @@
13
13
  # See the License for the specific language governing permissions and
14
14
  # limitations under the License.
15
15
 
16
- __version__ = "0.3.0"
16
+ __version__ = "0.3.1"
@@ -33,6 +33,7 @@ from snowflake.snowpark_checkpoints.snowpark_sampler import (
33
33
  from snowflake.snowpark_checkpoints.utils.constants import (
34
34
  FAIL_STATUS,
35
35
  PASS_STATUS,
36
+ SKIP_STATUS,
36
37
  CheckpointMode,
37
38
  )
38
39
  from snowflake.snowpark_checkpoints.utils.extra_config import is_checkpoint_enabled
@@ -92,11 +93,10 @@ def validate_dataframe_checkpoint(
92
93
  checkpoint_name = _replace_special_characters(checkpoint_name)
93
94
 
94
95
  if not is_checkpoint_enabled(checkpoint_name):
95
- LOGGER.warning(
96
- "Checkpoint '%s' is disabled. Skipping DataFrame checkpoint validation.",
97
- checkpoint_name,
96
+ raise Exception(
97
+ f"Checkpoint '{checkpoint_name}' is disabled. Please enable it in the checkpoints.json file.",
98
+ "In case you want to skip it, use the xvalidate_dataframe_checkpoint method instead.",
98
99
  )
99
- return None
100
100
 
101
101
  LOGGER.info(
102
102
  "Starting DataFrame checkpoint validation for checkpoint '%s'", checkpoint_name
@@ -132,6 +132,48 @@ def validate_dataframe_checkpoint(
132
132
  )
133
133
 
134
134
 
135
+ @log
136
+ def xvalidate_dataframe_checkpoint(
137
+ df: SnowparkDataFrame,
138
+ checkpoint_name: str,
139
+ job_context: Optional[SnowparkJobContext] = None,
140
+ mode: Optional[CheckpointMode] = CheckpointMode.SCHEMA,
141
+ custom_checks: Optional[dict[Any, Any]] = None,
142
+ skip_checks: Optional[dict[Any, Any]] = None,
143
+ sample_frac: Optional[float] = 1.0,
144
+ sample_number: Optional[int] = None,
145
+ sampling_strategy: Optional[SamplingStrategy] = SamplingStrategy.RANDOM_SAMPLE,
146
+ output_path: Optional[str] = None,
147
+ ) -> Union[tuple[bool, PandasDataFrame], None]:
148
+ """Skips the validation of a Snowpark DataFrame against a specified checkpoint.
149
+
150
+ Args:
151
+ df (SnowparkDataFrame): The DataFrame to validate.
152
+ checkpoint_name (str): The name of the checkpoint to validate against.
153
+ job_context (SnowparkJobContext, optional): The job context for the validation. Required for PARQUET mode.
154
+ mode (CheckpointMode): The mode of validation (e.g., SCHEMA, PARQUET). Defaults to SCHEMA.
155
+ custom_checks (Optional[dict[Any, Any]], optional): Custom checks to apply during validation.
156
+ skip_checks (Optional[dict[Any, Any]], optional): Checks to skip during validation.
157
+ sample_frac (Optional[float], optional): Fraction of the DataFrame to sample for validation. Defaults to 0.1.
158
+ sample_number (Optional[int], optional): Number of rows to sample for validation.
159
+ sampling_strategy (Optional[SamplingStrategy], optional): Strategy to use for sampling.
160
+ Defaults to RANDOM_SAMPLE.
161
+ output_path (Optional[str], optional): The output path for the validation results.
162
+
163
+ Raises:
164
+ ValueError: If an invalid validation mode is provided or if job_context is None for PARQUET mode.
165
+
166
+ """
167
+ checkpoint_name = _replace_special_characters(checkpoint_name)
168
+
169
+ LOGGER.warning(
170
+ "Checkpoint '%s' is disabled. Skipping DataFrame checkpoint validation.",
171
+ checkpoint_name,
172
+ )
173
+
174
+ _update_validation_result(checkpoint_name, SKIP_STATUS, output_path)
175
+
176
+
135
177
  def _check_dataframe_schema_file(
136
178
  df: SnowparkDataFrame,
137
179
  checkpoint_name: str,
@@ -39,6 +39,7 @@ STACK_POSITION_CHECKPOINT: Final[int] = 6
39
39
  # Validation status
40
40
  PASS_STATUS: Final[str] = "PASS"
41
41
  FAIL_STATUS: Final[str] = "FAIL"
42
+ SKIP_STATUS: Final[str] = "SKIP"
42
43
 
43
44
  # Validation result keys
44
45
  DEFAULT_KEY: Final[str] = "default"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: snowpark-checkpoints-validators
3
- Version: 0.3.0
3
+ Version: 0.3.1
4
4
  Summary: Migration tools for Snowpark
5
5
  Project-URL: Bug Tracker, https://github.com/snowflakedb/snowpark-checkpoints/issues
6
6
  Project-URL: Source code, https://github.com/snowflakedb/snowpark-checkpoints/
@@ -322,4 +322,37 @@ sp_dataframe = session.create_dataframe(df)
322
322
  preprocessed_dataframe = preprocessor(sp_dataframe)
323
323
  ```
324
324
 
325
+ ### Skip validation
326
+ The method `xvalidate_dataframe_checkpoint` can be used to avoid the validation of the checkpoint defined.
327
+ The result of the checkpoint will be `SKIP` and a message will be logged in the terminal. The method contains the same signature of `validate_dataframe_checkpoint`.
328
+
329
+ #### Usage Example
330
+
331
+ ```python
332
+ from snowflake.snowpark import Session
333
+ from snowflake.snowpark_checkpoints.utils.constant import (
334
+ CheckpointMode,
335
+ )
336
+ from snowflake.snowpark_checkpoints.checkpoint import validate_dataframe_checkpoint
337
+ from snowflake.snowpark_checkpoints.spark_migration import SamplingStrategy
338
+ from snowflake.snowpark_checkpoints.job_context import SnowparkJobContext
339
+ from pyspark.sql import SparkSession
340
+
341
+ session = Session.builder.getOrCreate()
342
+ job_context = SnowparkJobContext(
343
+ session, SparkSession.builder.getOrCreate(), "job_context", True
344
+ )
345
+ df = session.read.format("csv").load("data.csv")
346
+
347
+ xvalidate_dataframe_checkpoint(
348
+ df,
349
+ "schema_checkpoint",
350
+ job_context=job_context,
351
+ mode=CheckpointMode.SCHEMA,
352
+ sample_frac=0.1,
353
+ sampling_strategy=SamplingStrategy.RANDOM_SAMPLE
354
+ )
355
+ ```
356
+
357
+
325
358
  ------
@@ -1,6 +1,6 @@
1
- snowflake/snowpark_checkpoints/__init__.py,sha256=p7fzH3f8foD5nhNJHZ00JT3ODTXJGGkWTd3xRKx-8aQ,1435
2
- snowflake/snowpark_checkpoints/__version__.py,sha256=kbbDnlkY7JOLNHvfWYkCO_mOBOV9GniMGdxYoQpLhyg,632
3
- snowflake/snowpark_checkpoints/checkpoint.py,sha256=i-iDRYbGvQHy9ipW7UxHVhJhQ9BXNSO-bsCcHyg3oLA,22056
1
+ snowflake/snowpark_checkpoints/__init__.py,sha256=V2HtQkoek-2twos_Qit-ZRS9FrSsbQ58nVf0uuGnyyk,1473
2
+ snowflake/snowpark_checkpoints/__version__.py,sha256=uSRs7fRupFeQ-z3PtU_6qh6ry8YBaSAnEIAvLhJKUR8,632
3
+ snowflake/snowpark_checkpoints/checkpoint.py,sha256=pU-HdpoS4SYzJU0qEaFzS5QBUE8K55Sn8K27zJe9_xM,24187
4
4
  snowflake/snowpark_checkpoints/errors.py,sha256=9KjzRf8bjDZTTNL4LeySJAwuucDOyz0Ka7EFBKWFpyg,1821
5
5
  snowflake/snowpark_checkpoints/job_context.py,sha256=RMK0g0HrbDVrOAvai4PgsGvsAn_GIo9aFmh-tWlyieY,4183
6
6
  snowflake/snowpark_checkpoints/singleton.py,sha256=7AgIHQBXVRvPBBCkmBplzkdrrm-xVWf_N8svzA2vF8E,836
@@ -13,14 +13,14 @@ snowflake/snowpark_checkpoints/io_utils/io_default_strategy.py,sha256=VMfdqj4uDg
13
13
  snowflake/snowpark_checkpoints/io_utils/io_env_strategy.py,sha256=ltG_rxm0CkJFXpskOf__ByZw-C6B9LtycqlyB9EmaJI,3569
14
14
  snowflake/snowpark_checkpoints/io_utils/io_file_manager.py,sha256=YHrxRBzTlhIUrSFrsoWkRY_Qa-TXgDWglr00T98Tc5g,2485
15
15
  snowflake/snowpark_checkpoints/utils/__init__.py,sha256=I4srmZ8G1q9DU6Suo1S91aVfNvETyisKH95uvLAvEJ0,609
16
- snowflake/snowpark_checkpoints/utils/constants.py,sha256=pgFttLDQ6fTa6obSdvivWBYClS21ap41YVDNGAS4sxY,4146
16
+ snowflake/snowpark_checkpoints/utils/constants.py,sha256=M3vLdvKiVOhHMo0oPu4P42Wn_v6UDqmK6wHOGuoG6sY,4179
17
17
  snowflake/snowpark_checkpoints/utils/extra_config.py,sha256=xOYaG6MfsUCAHI0C_7qWF_m96xcLIZWwrgxY4UlpaZI,4325
18
18
  snowflake/snowpark_checkpoints/utils/logging_utils.py,sha256=yyi6X5DqKeTg0HRhvsH6ymYp2P0wbnyKIzI2RzrQS7k,2278
19
19
  snowflake/snowpark_checkpoints/utils/pandera_check_manager.py,sha256=tQIozLO-2kM8WZ-gGKfRwmXBx1cDPaIZB0qIcArp8xA,16100
20
20
  snowflake/snowpark_checkpoints/utils/supported_types.py,sha256=GrMX2tHdSFnK7LlPbZx20UufD6Br6TNVRkkBwIxdPy0,1433
21
21
  snowflake/snowpark_checkpoints/utils/telemetry.py,sha256=GfuyIaI3QG4a4_qWwyJHvWRM0GENunNexuEJ6IgscF4,32684
22
22
  snowflake/snowpark_checkpoints/utils/utils_checks.py,sha256=oQ1c4n-uAA2kFIpWIRPWhbCW8e-wwOIL8qDqLvr5Fok,14398
23
- snowpark_checkpoints_validators-0.3.0.dist-info/METADATA,sha256=RbOlEHK5kumiBPP2S7-7k7zxzzLYag7Yb6TtQeOYbV0,11557
24
- snowpark_checkpoints_validators-0.3.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
25
- snowpark_checkpoints_validators-0.3.0.dist-info/licenses/LICENSE,sha256=pmjhbh6uVhV5MBXOlou_UZgFP7CYVQITkCCdvfcS5lY,11340
26
- snowpark_checkpoints_validators-0.3.0.dist-info/RECORD,,
23
+ snowpark_checkpoints_validators-0.3.1.dist-info/METADATA,sha256=mtI8xnknt0g9McBdjcbNqOHRCDlJs2GtB1A1zcuH_00,12676
24
+ snowpark_checkpoints_validators-0.3.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
25
+ snowpark_checkpoints_validators-0.3.1.dist-info/licenses/LICENSE,sha256=pmjhbh6uVhV5MBXOlou_UZgFP7CYVQITkCCdvfcS5lY,11340
26
+ snowpark_checkpoints_validators-0.3.1.dist-info/RECORD,,