snowpark-checkpoints-validators 0.2.1__tar.gz → 0.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/PKG-INFO +36 -1
  2. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/README.md +33 -0
  3. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/pyproject.toml +2 -0
  4. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/src/snowflake/snowpark_checkpoints/__init__.py +1 -0
  5. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/src/snowflake/snowpark_checkpoints/__version__.py +1 -1
  6. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/src/snowflake/snowpark_checkpoints/checkpoint.py +46 -4
  7. snowpark_checkpoints_validators-0.3.1/src/snowflake/snowpark_checkpoints/io_utils/__init__.py +26 -0
  8. snowpark_checkpoints_validators-0.3.1/src/snowflake/snowpark_checkpoints/io_utils/io_default_strategy.py +57 -0
  9. snowpark_checkpoints_validators-0.3.1/src/snowflake/snowpark_checkpoints/io_utils/io_env_strategy.py +133 -0
  10. snowpark_checkpoints_validators-0.3.1/src/snowflake/snowpark_checkpoints/io_utils/io_file_manager.py +76 -0
  11. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/src/snowflake/snowpark_checkpoints/utils/constants.py +1 -0
  12. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/src/snowflake/snowpark_checkpoints/utils/extra_config.py +44 -1
  13. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/src/snowflake/snowpark_checkpoints/utils/telemetry.py +67 -28
  14. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/src/snowflake/snowpark_checkpoints/utils/utils_checks.py +9 -6
  15. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/src/snowflake/snowpark_checkpoints/validation_result_metadata.py +26 -22
  16. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/test/integ/telemetry_compare_utils.py +10 -0
  17. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/test/integ/test_parquet.py +93 -6
  18. snowpark_checkpoints_validators-0.3.1/test/unit/io_utils/test_default_strategy.py +292 -0
  19. snowpark_checkpoints_validators-0.3.1/test/unit/test_checkpoints.py +78 -0
  20. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/test/unit/test_extra_config.py +36 -0
  21. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/test/unit/test_telemetry.py +167 -80
  22. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/test/unit/test_utils_checks.py +0 -3
  23. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/test/unit/test_validation_result_metadata.py +11 -3
  24. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/.gitignore +0 -0
  25. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/CHANGELOG.md +0 -0
  26. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/LICENSE +0 -0
  27. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/src/snowflake/snowpark_checkpoints/errors.py +0 -0
  28. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/src/snowflake/snowpark_checkpoints/job_context.py +0 -0
  29. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/src/snowflake/snowpark_checkpoints/singleton.py +0 -0
  30. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/src/snowflake/snowpark_checkpoints/snowpark_sampler.py +0 -0
  31. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/src/snowflake/snowpark_checkpoints/spark_migration.py +0 -0
  32. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/src/snowflake/snowpark_checkpoints/utils/__init__.py +0 -0
  33. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/src/snowflake/snowpark_checkpoints/utils/logging_utils.py +0 -0
  34. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/src/snowflake/snowpark_checkpoints/utils/pandera_check_manager.py +0 -0
  35. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/src/snowflake/snowpark_checkpoints/utils/supported_types.py +0 -0
  36. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/src/snowflake/snowpark_checkpoints/validation_results.py +0 -0
  37. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/test/.coveragerc +0 -0
  38. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/test/integ/e2eexample.py +0 -0
  39. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/test/integ/telemetry_expected/df_mode_dataframe_mismatch_telemetry.json +0 -0
  40. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/test/integ/telemetry_expected/df_mode_dataframe_telemetry.json +0 -0
  41. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/test/integ/telemetry_expected/spark_checkpoint_df_fail_telemetry.json +0 -0
  42. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/test/integ/telemetry_expected/spark_checkpoint_df_pass_telemetry.json +0 -0
  43. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/test/integ/telemetry_expected/spark_checkpoint_limit_sample_telemetry.json +0 -0
  44. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/test/integ/telemetry_expected/spark_checkpoint_random_sample_telemetry.json +0 -0
  45. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/test/integ/telemetry_expected/spark_checkpoint_scalar_fail_telemetry.json +0 -0
  46. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/test/integ/telemetry_expected/spark_checkpoint_scalar_passing_telemetry.json +0 -0
  47. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/test/integ/telemetry_expected/test_df_check_custom_check_telemetry.json +0 -0
  48. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/test/integ/telemetry_expected/test_df_check_fail_telemetry.json +0 -0
  49. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/test/integ/telemetry_expected/test_df_check_from_file_telemetry.json +0 -0
  50. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/test/integ/telemetry_expected/test_df_check_skip_check_telemetry.json +0 -0
  51. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/test/integ/telemetry_expected/test_df_check_telemetry.json +0 -0
  52. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/test/integ/telemetry_expected/test_input_fail_telemetry.json +0 -0
  53. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/test/integ/telemetry_expected/test_input_telemetry.json +0 -0
  54. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/test/integ/telemetry_expected/test_output_fail_telemetry.json +0 -0
  55. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/test/integ/telemetry_expected/test_output_telemetry.json +0 -0
  56. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/test/integ/test_pandera.py +0 -0
  57. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/test/integ/test_spark_checkpoint.py +0 -0
  58. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/test/unit/test_job_context.py +0 -0
  59. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/test/unit/test_logger.py +0 -0
  60. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/test/unit/test_logging_utils.py +0 -0
  61. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/test/unit/test_pandera_check_manager.py +0 -0
  62. {snowpark_checkpoints_validators-0.2.1 → snowpark_checkpoints_validators-0.3.1}/test/unit/test_spark_migration.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: snowpark-checkpoints-validators
3
- Version: 0.2.1
3
+ Version: 0.3.1
4
4
  Summary: Migration tools for Snowpark
5
5
  Project-URL: Bug Tracker, https://github.com/snowflakedb/snowpark-checkpoints/issues
6
6
  Project-URL: Source code, https://github.com/snowflakedb/snowpark-checkpoints/
@@ -27,9 +27,11 @@ Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
27
27
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
28
28
  Requires-Python: <3.12,>=3.9
29
29
  Requires-Dist: pandera[io]==0.20.4
30
+ Requires-Dist: pydantic>=2.0
30
31
  Requires-Dist: snowflake-connector-python[pandas]
31
32
  Requires-Dist: snowflake-snowpark-python>=1.23.0
32
33
  Provides-Extra: development
34
+ Requires-Dist: certifi==2025.1.31; extra == 'development'
33
35
  Requires-Dist: coverage>=7.6.7; extra == 'development'
34
36
  Requires-Dist: deepdiff==8.1.1; extra == 'development'
35
37
  Requires-Dist: deepdiff>=8.0.0; extra == 'development'
@@ -320,4 +322,37 @@ sp_dataframe = session.create_dataframe(df)
320
322
  preprocessed_dataframe = preprocessor(sp_dataframe)
321
323
  ```
322
324
 
325
+ ### Skip validation
326
+ The method `xvalidate_dataframe_checkpoint` can be used to avoid the validation of the checkpoint defined.
327
+ The result of the checkpoint will be `SKIP` and a message will be logged in the terminal. The method contains the same signature of `validate_dataframe_checkpoint`.
328
+
329
+ #### Usage Example
330
+
331
+ ```python
332
+ from snowflake.snowpark import Session
333
+ from snowflake.snowpark_checkpoints.utils.constant import (
334
+ CheckpointMode,
335
+ )
336
+ from snowflake.snowpark_checkpoints.checkpoint import validate_dataframe_checkpoint
337
+ from snowflake.snowpark_checkpoints.spark_migration import SamplingStrategy
338
+ from snowflake.snowpark_checkpoints.job_context import SnowparkJobContext
339
+ from pyspark.sql import SparkSession
340
+
341
+ session = Session.builder.getOrCreate()
342
+ job_context = SnowparkJobContext(
343
+ session, SparkSession.builder.getOrCreate(), "job_context", True
344
+ )
345
+ df = session.read.format("csv").load("data.csv")
346
+
347
+ xvalidate_dataframe_checkpoint(
348
+ df,
349
+ "schema_checkpoint",
350
+ job_context=job_context,
351
+ mode=CheckpointMode.SCHEMA,
352
+ sample_frac=0.1,
353
+ sampling_strategy=SamplingStrategy.RANDOM_SAMPLE
354
+ )
355
+ ```
356
+
357
+
323
358
  ------
@@ -273,4 +273,37 @@ sp_dataframe = session.create_dataframe(df)
273
273
  preprocessed_dataframe = preprocessor(sp_dataframe)
274
274
  ```
275
275
 
276
+ ### Skip validation
277
+ The method `xvalidate_dataframe_checkpoint` can be used to avoid the validation of the checkpoint defined.
278
+ The result of the checkpoint will be `SKIP` and a message will be logged in the terminal. The method contains the same signature of `validate_dataframe_checkpoint`.
279
+
280
+ #### Usage Example
281
+
282
+ ```python
283
+ from snowflake.snowpark import Session
284
+ from snowflake.snowpark_checkpoints.utils.constant import (
285
+ CheckpointMode,
286
+ )
287
+ from snowflake.snowpark_checkpoints.checkpoint import validate_dataframe_checkpoint
288
+ from snowflake.snowpark_checkpoints.spark_migration import SamplingStrategy
289
+ from snowflake.snowpark_checkpoints.job_context import SnowparkJobContext
290
+ from pyspark.sql import SparkSession
291
+
292
+ session = Session.builder.getOrCreate()
293
+ job_context = SnowparkJobContext(
294
+ session, SparkSession.builder.getOrCreate(), "job_context", True
295
+ )
296
+ df = session.read.format("csv").load("data.csv")
297
+
298
+ xvalidate_dataframe_checkpoint(
299
+ df,
300
+ "schema_checkpoint",
301
+ job_context=job_context,
302
+ mode=CheckpointMode.SCHEMA,
303
+ sample_frac=0.1,
304
+ sampling_strategy=SamplingStrategy.RANDOM_SAMPLE
305
+ )
306
+ ```
307
+
308
+
276
309
  ------
@@ -29,6 +29,7 @@ dependencies = [
29
29
  "snowflake-snowpark-python>=1.23.0",
30
30
  "snowflake-connector-python[pandas]",
31
31
  "pandera[io]==0.20.4",
32
+ "pydantic>=2.0"
32
33
  ]
33
34
  description = "Migration tools for Snowpark"
34
35
  dynamic = ['version']
@@ -61,6 +62,7 @@ development = [
61
62
  "pyarrow>=18.0.0",
62
63
  "deepdiff>=8.0.0",
63
64
  "pyspark>=3.5.0",
65
+ "certifi==2025.1.31",
64
66
  ]
65
67
 
66
68
  [project.urls]
@@ -39,6 +39,7 @@ __all__ = [
39
39
  "check_dataframe_schema",
40
40
  "check_output_schema",
41
41
  "check_input_schema",
42
+ "xvalidate_dataframe_checkpoint",
42
43
  "validate_dataframe_checkpoint",
43
44
  "CheckpointMode",
44
45
  ]
@@ -13,4 +13,4 @@
13
13
  # See the License for the specific language governing permissions and
14
14
  # limitations under the License.
15
15
 
16
- __version__ = "0.2.1"
16
+ __version__ = "0.3.1"
@@ -33,6 +33,7 @@ from snowflake.snowpark_checkpoints.snowpark_sampler import (
33
33
  from snowflake.snowpark_checkpoints.utils.constants import (
34
34
  FAIL_STATUS,
35
35
  PASS_STATUS,
36
+ SKIP_STATUS,
36
37
  CheckpointMode,
37
38
  )
38
39
  from snowflake.snowpark_checkpoints.utils.extra_config import is_checkpoint_enabled
@@ -92,11 +93,10 @@ def validate_dataframe_checkpoint(
92
93
  checkpoint_name = _replace_special_characters(checkpoint_name)
93
94
 
94
95
  if not is_checkpoint_enabled(checkpoint_name):
95
- LOGGER.warning(
96
- "Checkpoint '%s' is disabled. Skipping DataFrame checkpoint validation.",
97
- checkpoint_name,
96
+ raise Exception(
97
+ f"Checkpoint '{checkpoint_name}' is disabled. Please enable it in the checkpoints.json file.",
98
+ "In case you want to skip it, use the xvalidate_dataframe_checkpoint method instead.",
98
99
  )
99
- return None
100
100
 
101
101
  LOGGER.info(
102
102
  "Starting DataFrame checkpoint validation for checkpoint '%s'", checkpoint_name
@@ -132,6 +132,48 @@ def validate_dataframe_checkpoint(
132
132
  )
133
133
 
134
134
 
135
+ @log
136
+ def xvalidate_dataframe_checkpoint(
137
+ df: SnowparkDataFrame,
138
+ checkpoint_name: str,
139
+ job_context: Optional[SnowparkJobContext] = None,
140
+ mode: Optional[CheckpointMode] = CheckpointMode.SCHEMA,
141
+ custom_checks: Optional[dict[Any, Any]] = None,
142
+ skip_checks: Optional[dict[Any, Any]] = None,
143
+ sample_frac: Optional[float] = 1.0,
144
+ sample_number: Optional[int] = None,
145
+ sampling_strategy: Optional[SamplingStrategy] = SamplingStrategy.RANDOM_SAMPLE,
146
+ output_path: Optional[str] = None,
147
+ ) -> Union[tuple[bool, PandasDataFrame], None]:
148
+ """Skips the validation of a Snowpark DataFrame against a specified checkpoint.
149
+
150
+ Args:
151
+ df (SnowparkDataFrame): The DataFrame to validate.
152
+ checkpoint_name (str): The name of the checkpoint to validate against.
153
+ job_context (SnowparkJobContext, optional): The job context for the validation. Required for PARQUET mode.
154
+ mode (CheckpointMode): The mode of validation (e.g., SCHEMA, PARQUET). Defaults to SCHEMA.
155
+ custom_checks (Optional[dict[Any, Any]], optional): Custom checks to apply during validation.
156
+ skip_checks (Optional[dict[Any, Any]], optional): Checks to skip during validation.
157
+ sample_frac (Optional[float], optional): Fraction of the DataFrame to sample for validation. Defaults to 0.1.
158
+ sample_number (Optional[int], optional): Number of rows to sample for validation.
159
+ sampling_strategy (Optional[SamplingStrategy], optional): Strategy to use for sampling.
160
+ Defaults to RANDOM_SAMPLE.
161
+ output_path (Optional[str], optional): The output path for the validation results.
162
+
163
+ Raises:
164
+ ValueError: If an invalid validation mode is provided or if job_context is None for PARQUET mode.
165
+
166
+ """
167
+ checkpoint_name = _replace_special_characters(checkpoint_name)
168
+
169
+ LOGGER.warning(
170
+ "Checkpoint '%s' is disabled. Skipping DataFrame checkpoint validation.",
171
+ checkpoint_name,
172
+ )
173
+
174
+ _update_validation_result(checkpoint_name, SKIP_STATUS, output_path)
175
+
176
+
135
177
  def _check_dataframe_schema_file(
136
178
  df: SnowparkDataFrame,
137
179
  checkpoint_name: str,
@@ -0,0 +1,26 @@
1
+ # Copyright 2025 Snowflake Inc.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ __all__ = ["EnvStrategy", "IOFileManager", "IODefaultStrategy"]
17
+
18
+ from snowflake.snowpark_checkpoints.io_utils.io_env_strategy import (
19
+ EnvStrategy,
20
+ )
21
+ from snowflake.snowpark_checkpoints.io_utils.io_default_strategy import (
22
+ IODefaultStrategy,
23
+ )
24
+ from snowflake.snowpark_checkpoints.io_utils.io_file_manager import (
25
+ IOFileManager,
26
+ )
@@ -0,0 +1,57 @@
1
+ # Copyright 2025 Snowflake Inc.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import glob
17
+ import os
18
+
19
+ from pathlib import Path
20
+ from typing import Optional
21
+
22
+ from snowflake.snowpark_checkpoints.io_utils import EnvStrategy
23
+
24
+
25
+ class IODefaultStrategy(EnvStrategy):
26
+ def mkdir(self, path: str, exist_ok: bool = False) -> None:
27
+ os.makedirs(path, exist_ok=exist_ok)
28
+
29
+ def folder_exists(self, path: str) -> bool:
30
+ return os.path.isdir(path)
31
+
32
+ def file_exists(self, path: str) -> bool:
33
+ return os.path.isfile(path)
34
+
35
+ def write(self, file_path: str, file_content: str, overwrite: bool = True) -> None:
36
+ mode = "w" if overwrite else "x"
37
+ with open(file_path, mode) as file:
38
+ file.write(file_content)
39
+
40
+ def read(
41
+ self, file_path: str, mode: str = "r", encoding: Optional[str] = None
42
+ ) -> str:
43
+ with open(file_path, mode=mode, encoding=encoding) as file:
44
+ return file.read()
45
+
46
+ def read_bytes(self, file_path: str) -> bytes:
47
+ with open(file_path, mode="rb") as f:
48
+ return f.read()
49
+
50
+ def ls(self, path: str, recursive: bool = False) -> list[str]:
51
+ return glob.glob(path, recursive=recursive)
52
+
53
+ def getcwd(self) -> str:
54
+ return os.getcwd()
55
+
56
+ def telemetry_path_files(self, path: str) -> Path:
57
+ return Path(path)
@@ -0,0 +1,133 @@
1
+ # Copyright 2025 Snowflake Inc.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ from abc import ABC, abstractmethod
17
+ from pathlib import Path
18
+ from typing import Optional
19
+
20
+
21
+ class EnvStrategy(ABC):
22
+
23
+ """An abstract base class that defines methods for file and directory operations.
24
+
25
+ Subclasses should implement these methods to provide environment-specific behavior.
26
+ """
27
+
28
+ @abstractmethod
29
+ def mkdir(self, path: str, exist_ok: bool = False) -> None:
30
+ """Create a directory.
31
+
32
+ Args:
33
+ path: The name of the directory to create.
34
+ exist_ok: If False, an error is raised if the directory already exists.
35
+
36
+ """
37
+
38
+ @abstractmethod
39
+ def folder_exists(self, path: str) -> bool:
40
+ """Check if a folder exists.
41
+
42
+ Args:
43
+ path: The path to the folder.
44
+
45
+ Returns:
46
+ bool: True if the folder exists, False otherwise.
47
+
48
+ """
49
+
50
+ @abstractmethod
51
+ def file_exists(self, path: str) -> bool:
52
+ """Check if a file exists.
53
+
54
+ Args:
55
+ path: The path to the file.
56
+
57
+ Returns:
58
+ bool: True if the file exists, False otherwise.
59
+
60
+ """
61
+
62
+ @abstractmethod
63
+ def write(self, file_path: str, file_content: str, overwrite: bool = True) -> None:
64
+ """Write content to a file.
65
+
66
+ Args:
67
+ file_path: The name of the file to write to.
68
+ file_content: The content to write to the file.
69
+ overwrite: If True, overwrite the file if it exists.
70
+
71
+ """
72
+
73
+ @abstractmethod
74
+ def read(
75
+ self, file_path: str, mode: str = "r", encoding: Optional[str] = None
76
+ ) -> str:
77
+ """Read content from a file.
78
+
79
+ Args:
80
+ file_path: The path to the file to read from.
81
+ mode: The mode in which to open the file.
82
+ encoding: The encoding to use for reading the file.
83
+
84
+ Returns:
85
+ str: The content of the file.
86
+
87
+ """
88
+
89
+ @abstractmethod
90
+ def read_bytes(self, file_path: str) -> bytes:
91
+ """Read binary content from a file.
92
+
93
+ Args:
94
+ file_path: The path to the file to read from.
95
+
96
+ Returns:
97
+ bytes: The binary content of the file.
98
+
99
+ """
100
+
101
+ @abstractmethod
102
+ def ls(self, path: str, recursive: bool = False) -> list[str]:
103
+ """List the contents of a directory.
104
+
105
+ Args:
106
+ path: The path to the directory.
107
+ recursive: If True, list the contents recursively.
108
+
109
+ Returns:
110
+ list[str]: A list of the contents of the directory.
111
+
112
+ """
113
+
114
+ @abstractmethod
115
+ def getcwd(self) -> str:
116
+ """Get the current working directory.
117
+
118
+ Returns:
119
+ str: The current working directory.
120
+
121
+ """
122
+
123
+ @abstractmethod
124
+ def telemetry_path_files(self, path: str) -> Path:
125
+ """Get the path to the telemetry files.
126
+
127
+ Args:
128
+ path: The path to the telemetry directory.
129
+
130
+ Returns:
131
+ Path: The path object representing the telemetry files.
132
+
133
+ """
@@ -0,0 +1,76 @@
1
+ # Copyright 2025 Snowflake Inc.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ from pathlib import Path
17
+ from typing import Optional
18
+
19
+ from snowflake.snowpark_checkpoints.io_utils import (
20
+ EnvStrategy,
21
+ IODefaultStrategy,
22
+ )
23
+ from snowflake.snowpark_checkpoints.singleton import Singleton
24
+
25
+
26
+ class IOFileManager(metaclass=Singleton):
27
+ def __init__(self, strategy: Optional[EnvStrategy] = None):
28
+ self.strategy = strategy or IODefaultStrategy()
29
+
30
+ def mkdir(self, path: str, exist_ok: bool = False) -> None:
31
+ return self.strategy.mkdir(path, exist_ok)
32
+
33
+ def folder_exists(self, path: str) -> bool:
34
+ return self.strategy.folder_exists(path)
35
+
36
+ def file_exists(self, path: str) -> bool:
37
+ return self.strategy.file_exists(path)
38
+
39
+ def write(self, file_path: str, file_content: str, overwrite: bool = True) -> None:
40
+ return self.strategy.write(file_path, file_content, overwrite)
41
+
42
+ def read(
43
+ self, file_path: str, mode: str = "r", encoding: Optional[str] = None
44
+ ) -> str:
45
+ return self.strategy.read(file_path, mode, encoding)
46
+
47
+ def read_bytes(self, file_path: str) -> bytes:
48
+ return self.strategy.read_bytes(file_path)
49
+
50
+ def ls(self, path: str, recursive: bool = False) -> list[str]:
51
+ return self.strategy.ls(path, recursive)
52
+
53
+ def getcwd(self) -> str:
54
+ return self.strategy.getcwd()
55
+
56
+ def telemetry_path_files(self, path: str) -> Path:
57
+ return self.strategy.telemetry_path_files(path)
58
+
59
+ def set_strategy(self, strategy: EnvStrategy):
60
+ """Set the strategy for file and directory operations.
61
+
62
+ Args:
63
+ strategy (EnvStrategy): The strategy to use for file and directory operations.
64
+
65
+ """
66
+ self.strategy = strategy
67
+
68
+
69
+ def get_io_file_manager():
70
+ """Get the singleton instance of IOFileManager.
71
+
72
+ Returns:
73
+ IOFileManager: The singleton instance of IOFileManager.
74
+
75
+ """
76
+ return IOFileManager()
@@ -39,6 +39,7 @@ STACK_POSITION_CHECKPOINT: Final[int] = 6
39
39
  # Validation status
40
40
  PASS_STATUS: Final[str] = "PASS"
41
41
  FAIL_STATUS: Final[str] = "FAIL"
42
+ SKIP_STATUS: Final[str] = "SKIP"
42
43
 
43
44
  # Validation result keys
44
45
  DEFAULT_KEY: Final[str] = "default"
@@ -18,6 +18,7 @@ import os
18
18
 
19
19
  from typing import Optional
20
20
 
21
+ from snowflake.snowpark_checkpoints.io_utils.io_file_manager import get_io_file_manager
21
22
  from snowflake.snowpark_checkpoints.utils.constants import (
22
23
  SNOWFLAKE_CHECKPOINT_CONTRACT_FILE_PATH_ENV_VAR,
23
24
  )
@@ -28,7 +29,48 @@ LOGGER = logging.getLogger(__name__)
28
29
 
29
30
  # noinspection DuplicatedCode
30
31
  def _get_checkpoint_contract_file_path() -> str:
31
- return os.environ.get(SNOWFLAKE_CHECKPOINT_CONTRACT_FILE_PATH_ENV_VAR, os.getcwd())
32
+ return os.environ.get(
33
+ SNOWFLAKE_CHECKPOINT_CONTRACT_FILE_PATH_ENV_VAR, get_io_file_manager().getcwd()
34
+ )
35
+
36
+
37
+ def _set_conf_io_strategy() -> None:
38
+ try:
39
+ from snowflake.snowpark_checkpoints.io_utils.io_default_strategy import (
40
+ IODefaultStrategy,
41
+ )
42
+ from snowflake.snowpark_checkpoints_configuration.io_utils.io_file_manager import (
43
+ EnvStrategy as ConfEnvStrategy,
44
+ )
45
+ from snowflake.snowpark_checkpoints_configuration.io_utils.io_file_manager import (
46
+ get_io_file_manager as get_conf_io_file_manager,
47
+ )
48
+
49
+ is_default_strategy = isinstance(
50
+ get_io_file_manager().strategy, IODefaultStrategy
51
+ )
52
+
53
+ if is_default_strategy:
54
+ return
55
+
56
+ class CustomConfEnvStrategy(ConfEnvStrategy):
57
+ def file_exists(self, path: str) -> bool:
58
+ return get_io_file_manager().file_exists(path)
59
+
60
+ def read(
61
+ self, file_path: str, mode: str = "r", encoding: Optional[str] = None
62
+ ) -> Optional[str]:
63
+ return get_io_file_manager().read(file_path, mode, encoding)
64
+
65
+ def getcwd(self) -> str:
66
+ return get_io_file_manager().getcwd()
67
+
68
+ get_conf_io_file_manager().set_strategy(CustomConfEnvStrategy())
69
+
70
+ except ImportError:
71
+ LOGGER.debug(
72
+ "snowpark-checkpoints-configuration is not installed. Cannot get a checkpoint metadata instance."
73
+ )
32
74
 
33
75
 
34
76
  # noinspection DuplicatedCode
@@ -39,6 +81,7 @@ def _get_metadata():
39
81
  )
40
82
 
41
83
  path = _get_checkpoint_contract_file_path()
84
+ _set_conf_io_strategy()
42
85
  LOGGER.debug("Loading checkpoint metadata from '%s'", path)
43
86
  metadata = CheckpointMetadata(path)
44
87
  return True, metadata