snowpark-checkpoints-validators 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,4 +13,4 @@
13
13
  # See the License for the specific language governing permissions and
14
14
  # limitations under the License.
15
15
 
16
- __version__ = "0.2.1"
16
+ __version__ = "0.3.0"
@@ -0,0 +1,26 @@
1
+ # Copyright 2025 Snowflake Inc.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ __all__ = ["EnvStrategy", "IOFileManager", "IODefaultStrategy"]
17
+
18
+ from snowflake.snowpark_checkpoints.io_utils.io_env_strategy import (
19
+ EnvStrategy,
20
+ )
21
+ from snowflake.snowpark_checkpoints.io_utils.io_default_strategy import (
22
+ IODefaultStrategy,
23
+ )
24
+ from snowflake.snowpark_checkpoints.io_utils.io_file_manager import (
25
+ IOFileManager,
26
+ )
@@ -0,0 +1,57 @@
1
+ # Copyright 2025 Snowflake Inc.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import glob
17
+ import os
18
+
19
+ from pathlib import Path
20
+ from typing import Optional
21
+
22
+ from snowflake.snowpark_checkpoints.io_utils import EnvStrategy
23
+
24
+
25
+ class IODefaultStrategy(EnvStrategy):
26
+ def mkdir(self, path: str, exist_ok: bool = False) -> None:
27
+ os.makedirs(path, exist_ok=exist_ok)
28
+
29
+ def folder_exists(self, path: str) -> bool:
30
+ return os.path.isdir(path)
31
+
32
+ def file_exists(self, path: str) -> bool:
33
+ return os.path.isfile(path)
34
+
35
+ def write(self, file_path: str, file_content: str, overwrite: bool = True) -> None:
36
+ mode = "w" if overwrite else "x"
37
+ with open(file_path, mode) as file:
38
+ file.write(file_content)
39
+
40
+ def read(
41
+ self, file_path: str, mode: str = "r", encoding: Optional[str] = None
42
+ ) -> str:
43
+ with open(file_path, mode=mode, encoding=encoding) as file:
44
+ return file.read()
45
+
46
+ def read_bytes(self, file_path: str) -> bytes:
47
+ with open(file_path, mode="rb") as f:
48
+ return f.read()
49
+
50
+ def ls(self, path: str, recursive: bool = False) -> list[str]:
51
+ return glob.glob(path, recursive=recursive)
52
+
53
+ def getcwd(self) -> str:
54
+ return os.getcwd()
55
+
56
+ def telemetry_path_files(self, path: str) -> Path:
57
+ return Path(path)
@@ -0,0 +1,133 @@
1
+ # Copyright 2025 Snowflake Inc.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ from abc import ABC, abstractmethod
17
+ from pathlib import Path
18
+ from typing import Optional
19
+
20
+
21
+ class EnvStrategy(ABC):
22
+
23
+ """An abstract base class that defines methods for file and directory operations.
24
+
25
+ Subclasses should implement these methods to provide environment-specific behavior.
26
+ """
27
+
28
+ @abstractmethod
29
+ def mkdir(self, path: str, exist_ok: bool = False) -> None:
30
+ """Create a directory.
31
+
32
+ Args:
33
+ path: The name of the directory to create.
34
+ exist_ok: If False, an error is raised if the directory already exists.
35
+
36
+ """
37
+
38
+ @abstractmethod
39
+ def folder_exists(self, path: str) -> bool:
40
+ """Check if a folder exists.
41
+
42
+ Args:
43
+ path: The path to the folder.
44
+
45
+ Returns:
46
+ bool: True if the folder exists, False otherwise.
47
+
48
+ """
49
+
50
+ @abstractmethod
51
+ def file_exists(self, path: str) -> bool:
52
+ """Check if a file exists.
53
+
54
+ Args:
55
+ path: The path to the file.
56
+
57
+ Returns:
58
+ bool: True if the file exists, False otherwise.
59
+
60
+ """
61
+
62
+ @abstractmethod
63
+ def write(self, file_path: str, file_content: str, overwrite: bool = True) -> None:
64
+ """Write content to a file.
65
+
66
+ Args:
67
+ file_path: The name of the file to write to.
68
+ file_content: The content to write to the file.
69
+ overwrite: If True, overwrite the file if it exists.
70
+
71
+ """
72
+
73
+ @abstractmethod
74
+ def read(
75
+ self, file_path: str, mode: str = "r", encoding: Optional[str] = None
76
+ ) -> str:
77
+ """Read content from a file.
78
+
79
+ Args:
80
+ file_path: The path to the file to read from.
81
+ mode: The mode in which to open the file.
82
+ encoding: The encoding to use for reading the file.
83
+
84
+ Returns:
85
+ str: The content of the file.
86
+
87
+ """
88
+
89
+ @abstractmethod
90
+ def read_bytes(self, file_path: str) -> bytes:
91
+ """Read binary content from a file.
92
+
93
+ Args:
94
+ file_path: The path to the file to read from.
95
+
96
+ Returns:
97
+ bytes: The binary content of the file.
98
+
99
+ """
100
+
101
+ @abstractmethod
102
+ def ls(self, path: str, recursive: bool = False) -> list[str]:
103
+ """List the contents of a directory.
104
+
105
+ Args:
106
+ path: The path to the directory.
107
+ recursive: If True, list the contents recursively.
108
+
109
+ Returns:
110
+ list[str]: A list of the contents of the directory.
111
+
112
+ """
113
+
114
+ @abstractmethod
115
+ def getcwd(self) -> str:
116
+ """Get the current working directory.
117
+
118
+ Returns:
119
+ str: The current working directory.
120
+
121
+ """
122
+
123
+ @abstractmethod
124
+ def telemetry_path_files(self, path: str) -> Path:
125
+ """Get the path to the telemetry files.
126
+
127
+ Args:
128
+ path: The path to the telemetry directory.
129
+
130
+ Returns:
131
+ Path: The path object representing the telemetry files.
132
+
133
+ """
@@ -0,0 +1,76 @@
1
+ # Copyright 2025 Snowflake Inc.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ from pathlib import Path
17
+ from typing import Optional
18
+
19
+ from snowflake.snowpark_checkpoints.io_utils import (
20
+ EnvStrategy,
21
+ IODefaultStrategy,
22
+ )
23
+ from snowflake.snowpark_checkpoints.singleton import Singleton
24
+
25
+
26
+ class IOFileManager(metaclass=Singleton):
27
+ def __init__(self, strategy: Optional[EnvStrategy] = None):
28
+ self.strategy = strategy or IODefaultStrategy()
29
+
30
+ def mkdir(self, path: str, exist_ok: bool = False) -> None:
31
+ return self.strategy.mkdir(path, exist_ok)
32
+
33
+ def folder_exists(self, path: str) -> bool:
34
+ return self.strategy.folder_exists(path)
35
+
36
+ def file_exists(self, path: str) -> bool:
37
+ return self.strategy.file_exists(path)
38
+
39
+ def write(self, file_path: str, file_content: str, overwrite: bool = True) -> None:
40
+ return self.strategy.write(file_path, file_content, overwrite)
41
+
42
+ def read(
43
+ self, file_path: str, mode: str = "r", encoding: Optional[str] = None
44
+ ) -> str:
45
+ return self.strategy.read(file_path, mode, encoding)
46
+
47
+ def read_bytes(self, file_path: str) -> bytes:
48
+ return self.strategy.read_bytes(file_path)
49
+
50
+ def ls(self, path: str, recursive: bool = False) -> list[str]:
51
+ return self.strategy.ls(path, recursive)
52
+
53
+ def getcwd(self) -> str:
54
+ return self.strategy.getcwd()
55
+
56
+ def telemetry_path_files(self, path: str) -> Path:
57
+ return self.strategy.telemetry_path_files(path)
58
+
59
+ def set_strategy(self, strategy: EnvStrategy):
60
+ """Set the strategy for file and directory operations.
61
+
62
+ Args:
63
+ strategy (EnvStrategy): The strategy to use for file and directory operations.
64
+
65
+ """
66
+ self.strategy = strategy
67
+
68
+
69
+ def get_io_file_manager():
70
+ """Get the singleton instance of IOFileManager.
71
+
72
+ Returns:
73
+ IOFileManager: The singleton instance of IOFileManager.
74
+
75
+ """
76
+ return IOFileManager()
@@ -18,6 +18,7 @@ import os
18
18
 
19
19
  from typing import Optional
20
20
 
21
+ from snowflake.snowpark_checkpoints.io_utils.io_file_manager import get_io_file_manager
21
22
  from snowflake.snowpark_checkpoints.utils.constants import (
22
23
  SNOWFLAKE_CHECKPOINT_CONTRACT_FILE_PATH_ENV_VAR,
23
24
  )
@@ -28,7 +29,48 @@ LOGGER = logging.getLogger(__name__)
28
29
 
29
30
  # noinspection DuplicatedCode
30
31
  def _get_checkpoint_contract_file_path() -> str:
31
- return os.environ.get(SNOWFLAKE_CHECKPOINT_CONTRACT_FILE_PATH_ENV_VAR, os.getcwd())
32
+ return os.environ.get(
33
+ SNOWFLAKE_CHECKPOINT_CONTRACT_FILE_PATH_ENV_VAR, get_io_file_manager().getcwd()
34
+ )
35
+
36
+
37
+ def _set_conf_io_strategy() -> None:
38
+ try:
39
+ from snowflake.snowpark_checkpoints.io_utils.io_default_strategy import (
40
+ IODefaultStrategy,
41
+ )
42
+ from snowflake.snowpark_checkpoints_configuration.io_utils.io_file_manager import (
43
+ EnvStrategy as ConfEnvStrategy,
44
+ )
45
+ from snowflake.snowpark_checkpoints_configuration.io_utils.io_file_manager import (
46
+ get_io_file_manager as get_conf_io_file_manager,
47
+ )
48
+
49
+ is_default_strategy = isinstance(
50
+ get_io_file_manager().strategy, IODefaultStrategy
51
+ )
52
+
53
+ if is_default_strategy:
54
+ return
55
+
56
+ class CustomConfEnvStrategy(ConfEnvStrategy):
57
+ def file_exists(self, path: str) -> bool:
58
+ return get_io_file_manager().file_exists(path)
59
+
60
+ def read(
61
+ self, file_path: str, mode: str = "r", encoding: Optional[str] = None
62
+ ) -> Optional[str]:
63
+ return get_io_file_manager().read(file_path, mode, encoding)
64
+
65
+ def getcwd(self) -> str:
66
+ return get_io_file_manager().getcwd()
67
+
68
+ get_conf_io_file_manager().set_strategy(CustomConfEnvStrategy())
69
+
70
+ except ImportError:
71
+ LOGGER.debug(
72
+ "snowpark-checkpoints-configuration is not installed. Cannot get a checkpoint metadata instance."
73
+ )
32
74
 
33
75
 
34
76
  # noinspection DuplicatedCode
@@ -39,6 +81,7 @@ def _get_metadata():
39
81
  )
40
82
 
41
83
  path = _get_checkpoint_contract_file_path()
84
+ _set_conf_io_strategy()
42
85
  LOGGER.debug("Loading checkpoint metadata from '%s'", path)
43
86
  metadata = CheckpointMetadata(path)
44
87
  return True, metadata
@@ -30,16 +30,34 @@ from sys import platform
30
30
  from typing import Any, Callable, Optional, TypeVar
31
31
  from uuid import getnode
32
32
 
33
- from snowflake.connector import (
34
- SNOWFLAKE_CONNECTOR_VERSION,
35
- time_util,
36
- )
37
- from snowflake.connector.constants import DIRS as SNOWFLAKE_DIRS
38
- from snowflake.connector.network import SnowflakeRestful
33
+ from snowflake.connector.description import PLATFORM as CONNECTOR_PLATFORM
39
34
  from snowflake.connector.telemetry import TelemetryClient
40
35
  from snowflake.snowpark import VERSION as SNOWPARK_VERSION
41
36
  from snowflake.snowpark import dataframe as snowpark_dataframe
42
37
  from snowflake.snowpark.session import Session
38
+ from snowflake.snowpark_checkpoints.io_utils.io_file_manager import (
39
+ get_io_file_manager,
40
+ )
41
+
42
+
43
+ try:
44
+ """
45
+ The following imports are used to log telemetry events in the Snowflake Connector.
46
+ """
47
+ from snowflake.connector import (
48
+ SNOWFLAKE_CONNECTOR_VERSION,
49
+ time_util,
50
+ )
51
+ from snowflake.connector.constants import DIRS as SNOWFLAKE_DIRS
52
+ from snowflake.connector.network import SnowflakeRestful
53
+ except Exception:
54
+ """
55
+ Set default import values for the Snowflake Connector when using snowpark-checkpoints in stored procedures.
56
+ """
57
+ SNOWFLAKE_CONNECTOR_VERSION = ""
58
+ time_util = None
59
+ SNOWFLAKE_DIRS = ""
60
+ SnowflakeRestful = None
43
61
 
44
62
 
45
63
  try:
@@ -92,7 +110,7 @@ class TelemetryManager(TelemetryClient):
92
110
  path: path to write telemetry.
93
111
 
94
112
  """
95
- os.makedirs(path, exist_ok=True)
113
+ get_io_file_manager().mkdir(str(path), exist_ok=True)
96
114
  self.sc_folder_path = path
97
115
 
98
116
  def sc_log_error(
@@ -200,7 +218,7 @@ class TelemetryManager(TelemetryClient):
200
218
 
201
219
  """
202
220
  try:
203
- os.makedirs(self.sc_folder_path, exist_ok=True)
221
+ get_io_file_manager().mkdir(str(self.sc_folder_path), exist_ok=True)
204
222
  for event in batch:
205
223
  message = event.get("message")
206
224
  if message is not None:
@@ -210,8 +228,7 @@ class TelemetryManager(TelemetryClient):
210
228
  f'_telemetry_{message.get("type")}.json'
211
229
  )
212
230
  json_content = self._sc_validate_folder_space(event)
213
- with open(file_path, "w") as json_file:
214
- json_file.write(json_content)
231
+ get_io_file_manager().write(str(file_path), json_content)
215
232
  except Exception:
216
233
  pass
217
234
 
@@ -238,10 +255,10 @@ class TelemetryManager(TelemetryClient):
238
255
  if not self.sc_is_enabled or self.sc_is_testing or not self._rest:
239
256
  return
240
257
  batch = []
241
- for file in self.sc_folder_path.glob("*.json"):
242
- with open(file) as json_file:
243
- data_dict = json.load(json_file)
244
- batch.append(data_dict)
258
+ for file in get_io_file_manager().ls(f"{self.sc_folder_path}/*.json"):
259
+ json_content = get_io_file_manager().read(file)
260
+ data_dict = json.loads(json_content)
261
+ batch.append(data_dict)
245
262
  if batch == []:
246
263
  return
247
264
  body = {"logs": batch}
@@ -253,14 +270,17 @@ class TelemetryManager(TelemetryClient):
253
270
  timeout=5,
254
271
  )
255
272
  if ret.get("success"):
256
- for file in self.sc_folder_path.glob("*.json"):
273
+ for file_path in get_io_file_manager().ls(f"{self.sc_folder_path}/*.json"):
274
+ file = get_io_file_manager().telemetry_path_files(file_path)
257
275
  file.unlink()
258
276
 
259
277
  def _sc_is_telemetry_testing(self) -> bool:
260
278
  is_testing = os.getenv("SNOWPARK_CHECKPOINTS_TELEMETRY_TESTING") == "true"
261
279
  if is_testing:
262
280
  local_telemetry_path = (
263
- Path(os.getcwd()) / "snowpark-checkpoints-output" / "telemetry"
281
+ Path(get_io_file_manager().getcwd())
282
+ / "snowpark-checkpoints-output"
283
+ / "telemetry"
264
284
  )
265
285
  self.set_sc_output_path(local_telemetry_path)
266
286
  self.sc_is_enabled = True
@@ -359,7 +379,7 @@ def _get_metadata() -> dict:
359
379
  }
360
380
 
361
381
 
362
- def _get_version() -> str:
382
+ def _get_version() -> Optional[str]:
363
383
  """Get the version of the package.
364
384
 
365
385
  Returns:
@@ -370,11 +390,10 @@ def _get_version() -> str:
370
390
  directory_levels_up = 1
371
391
  project_root = Path(__file__).resolve().parents[directory_levels_up]
372
392
  version_file_path = project_root / VERSION_FILE_NAME
373
- with open(version_file_path) as file:
374
- content = file.read()
375
- version_match = re.search(VERSION_VARIABLE_PATTERN, content, re.MULTILINE)
376
- if version_match:
377
- return version_match.group(1)
393
+ content = get_io_file_manager().read(str(version_file_path))
394
+ version_match = re.search(VERSION_VARIABLE_PATTERN, content, re.MULTILINE)
395
+ if version_match:
396
+ return version_match.group(1)
378
397
  return None
379
398
  except Exception:
380
399
  return None
@@ -390,7 +409,10 @@ def _get_folder_size(folder_path: Path) -> int:
390
409
  int: The size of the folder in bytes.
391
410
 
392
411
  """
393
- return sum(f.stat().st_size for f in folder_path.glob("*.json") if f.is_file())
412
+ sum_size = 0
413
+ for f in get_io_file_manager().ls(f"{folder_path}/*.json"):
414
+ sum_size += get_io_file_manager().telemetry_path_files(f).stat().st_size
415
+ return sum_size
394
416
 
395
417
 
396
418
  def _free_up_space(folder_path: Path, max_size: int) -> None:
@@ -401,9 +423,13 @@ def _free_up_space(folder_path: Path, max_size: int) -> None:
401
423
  max_size (int): The maximum allowed size of the folder in bytes.
402
424
 
403
425
  """
404
- files = sorted(folder_path.glob("*.json"), key=lambda f: f.stat().st_mtime)
426
+ files = sorted(
427
+ get_io_file_manager().ls(f"{folder_path}/*.json"),
428
+ key=lambda f: f.stat().st_mtime,
429
+ )
405
430
  current_size = _get_folder_size(folder_path)
406
- for file in files:
431
+ for file_path in files:
432
+ file = get_io_file_manager().telemetry_path_files(file_path)
407
433
  if current_size <= max_size:
408
434
  break
409
435
  current_size -= file.stat().st_size
@@ -482,12 +508,22 @@ def get_load_json(json_schema: str) -> dict:
482
508
 
483
509
  """
484
510
  try:
485
- with open(json_schema, encoding="utf-8") as file:
486
- return json.load(file)
511
+ file_content = get_io_file_manager().read(json_schema, encoding="utf-8")
512
+ return json.loads(file_content)
487
513
  except (OSError, json.JSONDecodeError) as e:
488
514
  raise ValueError(f"Error reading JSON schema file: {e}") from None
489
515
 
490
516
 
517
+ def _is_in_stored_procedure() -> bool:
518
+ """Check if the code is running in a stored procedure.
519
+
520
+ Returns:
521
+ bool: True if the code is running in a stored procedure, False otherwise.
522
+
523
+ """
524
+ return CONNECTOR_PLATFORM == "XP"
525
+
526
+
491
527
  def extract_parameters(
492
528
  func: Callable, args: tuple, kwargs: dict, params_list: Optional[list[str]]
493
529
  ) -> dict:
@@ -835,7 +871,10 @@ def report_telemetry(
835
871
  except Exception as err:
836
872
  func_exception = err
837
873
 
838
- if os.getenv("SNOWPARK_CHECKPOINTS_TELEMETRY_ENABLED") == "false":
874
+ if (
875
+ os.getenv("SNOWPARK_CHECKPOINTS_TELEMETRY_ENABLED") == "false"
876
+ or _is_in_stored_procedure()
877
+ ):
839
878
  return result
840
879
  telemetry_event = None
841
880
  data = None
@@ -28,6 +28,7 @@ from pandera import DataFrameSchema
28
28
 
29
29
  from snowflake.snowpark import DataFrame as SnowparkDataFrame
30
30
  from snowflake.snowpark_checkpoints.errors import SchemaValidationError
31
+ from snowflake.snowpark_checkpoints.io_utils.io_file_manager import get_io_file_manager
31
32
  from snowflake.snowpark_checkpoints.job_context import SnowparkJobContext
32
33
  from snowflake.snowpark_checkpoints.snowpark_sampler import (
33
34
  SamplingAdapter,
@@ -154,13 +155,15 @@ def _generate_schema(
154
155
  LOGGER.info(
155
156
  "Generating Pandera DataFrameSchema for checkpoint: '%s'", checkpoint_name
156
157
  )
157
- current_directory_path = output_path if output_path else os.getcwd()
158
+ current_directory_path = (
159
+ output_path if output_path else get_io_file_manager().getcwd()
160
+ )
158
161
 
159
162
  output_directory_path = os.path.join(
160
163
  current_directory_path, SNOWPARK_CHECKPOINTS_OUTPUT_DIRECTORY_NAME
161
164
  )
162
165
 
163
- if not os.path.exists(output_directory_path):
166
+ if not get_io_file_manager().folder_exists(output_directory_path):
164
167
  raise ValueError(
165
168
  """Output directory snowpark-checkpoints-output does not exist.
166
169
  Please run the Snowpark checkpoint collector first."""
@@ -171,14 +174,14 @@ Please run the Snowpark checkpoint collector first."""
171
174
  CHECKPOINT_JSON_OUTPUT_FILE_FORMAT_NAME.format(checkpoint_name),
172
175
  )
173
176
 
174
- if not os.path.exists(checkpoint_schema_file_path):
177
+ if not get_io_file_manager().file_exists(checkpoint_schema_file_path):
175
178
  raise ValueError(
176
179
  f"Checkpoint {checkpoint_name} JSON file not found. Please run the Snowpark checkpoint collector first."
177
180
  )
178
181
 
179
182
  LOGGER.info("Reading schema from file: '%s'", checkpoint_schema_file_path)
180
- with open(checkpoint_schema_file_path) as schema_file:
181
- checkpoint_schema_config = json.load(schema_file)
183
+ schema_file = get_io_file_manager().read(checkpoint_schema_file_path)
184
+ checkpoint_schema_config = json.loads(schema_file)
182
185
 
183
186
  if DATAFRAME_PANDERA_SCHEMA_KEY not in checkpoint_schema_config:
184
187
  raise ValueError(
@@ -354,7 +357,7 @@ def _get_relative_path(file_path: str) -> str:
354
357
  str: The relative path of the file.
355
358
 
356
359
  """
357
- current_directory = os.getcwd()
360
+ current_directory = get_io_file_manager().getcwd()
358
361
  return os.path.relpath(file_path, current_directory)
359
362
 
360
363
 
@@ -18,6 +18,7 @@ import os
18
18
 
19
19
  from typing import Optional
20
20
 
21
+ from snowflake.snowpark_checkpoints.io_utils.io_file_manager import get_io_file_manager
21
22
  from snowflake.snowpark_checkpoints.singleton import Singleton
22
23
  from snowflake.snowpark_checkpoints.utils.constants import (
23
24
  SNOWPARK_CHECKPOINTS_OUTPUT_DIRECTORY_NAME,
@@ -67,7 +68,9 @@ class ValidationResultsMetadata(metaclass=Singleton):
67
68
  Exception: If there is an error reading the validation results file.
68
69
 
69
70
  """
70
- self.validation_results_directory = path if path else os.getcwd()
71
+ self.validation_results_directory = (
72
+ path if path else get_io_file_manager().getcwd()
73
+ )
71
74
  self.validation_results_directory = os.path.join(
72
75
  self.validation_results_directory,
73
76
  SNOWPARK_CHECKPOINTS_OUTPUT_DIRECTORY_NAME,
@@ -89,20 +92,21 @@ class ValidationResultsMetadata(metaclass=Singleton):
89
92
 
90
93
  self.validation_results = ValidationResults(results=[])
91
94
 
92
- if os.path.exists(self.validation_results_file):
95
+ if get_io_file_manager().file_exists(self.validation_results_file):
93
96
  LOGGER.info(
94
97
  "Loading validation results from: '%s'", self.validation_results_file
95
98
  )
96
- with open(self.validation_results_file) as file:
97
- try:
98
- validation_result_json = file.read()
99
- self.validation_results = ValidationResults.model_validate_json(
100
- validation_result_json
101
- )
102
- except Exception as e:
103
- raise Exception(
104
- f"Error reading validation results file: {self.validation_results_file} \n {e}"
105
- ) from None
99
+ try:
100
+ validation_result_json = get_io_file_manager().read(
101
+ self.validation_results_file
102
+ )
103
+ self.validation_results = ValidationResults.model_validate_json(
104
+ validation_result_json
105
+ )
106
+ except Exception as e:
107
+ raise Exception(
108
+ f"Error reading validation results file: {self.validation_results_file} \n {e}"
109
+ ) from None
106
110
  else:
107
111
  LOGGER.info(
108
112
  "Validation results file not found: '%s'",
@@ -115,7 +119,7 @@ class ValidationResultsMetadata(metaclass=Singleton):
115
119
  This method empties the validation results list.
116
120
 
117
121
  """
118
- if not os.path.exists(self.validation_results_file):
122
+ if not get_io_file_manager().file_exists(self.validation_results_file):
119
123
  LOGGER.info("Cleaning validation results...")
120
124
  self.validation_results.results = []
121
125
 
@@ -123,7 +127,6 @@ class ValidationResultsMetadata(metaclass=Singleton):
123
127
  """Add a validation result to the pipeline result list.
124
128
 
125
129
  Args:
126
- checkpoint_name (str): The name of the checkpoint.
127
130
  validation_result (dict): The validation result to be added.
128
131
 
129
132
  """
@@ -140,16 +143,17 @@ class ValidationResultsMetadata(metaclass=Singleton):
140
143
  OSError: If the directory cannot be created or the file cannot be written.
141
144
 
142
145
  """
143
- if not os.path.exists(self.validation_results_directory):
146
+ if not get_io_file_manager().folder_exists(self.validation_results_directory):
144
147
  LOGGER.debug(
145
148
  "Validation results directory '%s' does not exist. Creating it...",
146
149
  self.validation_results_directory,
147
150
  )
148
- os.makedirs(self.validation_results_directory)
151
+ get_io_file_manager().mkdir(self.validation_results_directory)
149
152
 
150
- with open(self.validation_results_file, "w") as output_file:
151
- output_file.write(self.validation_results.model_dump_json())
152
- LOGGER.info(
153
- "Validation results successfully saved to: '%s'",
154
- self.validation_results_file,
155
- )
153
+ get_io_file_manager().write(
154
+ self.validation_results_file, self.validation_results.model_dump_json()
155
+ )
156
+ LOGGER.info(
157
+ "Validation results successfully saved to: '%s'",
158
+ self.validation_results_file,
159
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: snowpark-checkpoints-validators
3
- Version: 0.2.1
3
+ Version: 0.3.0
4
4
  Summary: Migration tools for Snowpark
5
5
  Project-URL: Bug Tracker, https://github.com/snowflakedb/snowpark-checkpoints/issues
6
6
  Project-URL: Source code, https://github.com/snowflakedb/snowpark-checkpoints/
@@ -27,9 +27,11 @@ Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
27
27
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
28
28
  Requires-Python: <3.12,>=3.9
29
29
  Requires-Dist: pandera[io]==0.20.4
30
+ Requires-Dist: pydantic>=2.0
30
31
  Requires-Dist: snowflake-connector-python[pandas]
31
32
  Requires-Dist: snowflake-snowpark-python>=1.23.0
32
33
  Provides-Extra: development
34
+ Requires-Dist: certifi==2025.1.31; extra == 'development'
33
35
  Requires-Dist: coverage>=7.6.7; extra == 'development'
34
36
  Requires-Dist: deepdiff==8.1.1; extra == 'development'
35
37
  Requires-Dist: deepdiff>=8.0.0; extra == 'development'
@@ -1,22 +1,26 @@
1
1
  snowflake/snowpark_checkpoints/__init__.py,sha256=p7fzH3f8foD5nhNJHZ00JT3ODTXJGGkWTd3xRKx-8aQ,1435
2
- snowflake/snowpark_checkpoints/__version__.py,sha256=jEnm4p_P4FqdYsTq3hnGQnhLZ4KwL0_Ew8fDF8BRL98,632
2
+ snowflake/snowpark_checkpoints/__version__.py,sha256=kbbDnlkY7JOLNHvfWYkCO_mOBOV9GniMGdxYoQpLhyg,632
3
3
  snowflake/snowpark_checkpoints/checkpoint.py,sha256=i-iDRYbGvQHy9ipW7UxHVhJhQ9BXNSO-bsCcHyg3oLA,22056
4
4
  snowflake/snowpark_checkpoints/errors.py,sha256=9KjzRf8bjDZTTNL4LeySJAwuucDOyz0Ka7EFBKWFpyg,1821
5
5
  snowflake/snowpark_checkpoints/job_context.py,sha256=RMK0g0HrbDVrOAvai4PgsGvsAn_GIo9aFmh-tWlyieY,4183
6
6
  snowflake/snowpark_checkpoints/singleton.py,sha256=7AgIHQBXVRvPBBCkmBplzkdrrm-xVWf_N8svzA2vF8E,836
7
7
  snowflake/snowpark_checkpoints/snowpark_sampler.py,sha256=Qxv-8nRGuf-ab3GoSUt8_MNL0ppjoBIMOFIMkqmwN5I,4668
8
8
  snowflake/snowpark_checkpoints/spark_migration.py,sha256=s2HqomYx76Hqn71g9TleBeHI3t1nirgfPvkggqQQdts,10253
9
- snowflake/snowpark_checkpoints/validation_result_metadata.py,sha256=fm2lKxjYlzlL6qsiv2icR9k5o7YNd2OwvFhiqGYrTpo,5745
9
+ snowflake/snowpark_checkpoints/validation_result_metadata.py,sha256=5C8f1g-Grs2ydpXiZBLGt5n9cvEHBaw2-CDeb2vnhpg,5847
10
10
  snowflake/snowpark_checkpoints/validation_results.py,sha256=J8OcpNty6hQD8RbAy8xmA0UMbPWfXSmQnHYspWWSisk,1502
11
+ snowflake/snowpark_checkpoints/io_utils/__init__.py,sha256=fmSEYcBGNASBanNvMVW-uv6hcoYre6kEH35K-RliuiA,954
12
+ snowflake/snowpark_checkpoints/io_utils/io_default_strategy.py,sha256=VMfdqj4uDgTEinmpC3D0zXncIB9FxWJod1rI-Yt3YVA,1869
13
+ snowflake/snowpark_checkpoints/io_utils/io_env_strategy.py,sha256=ltG_rxm0CkJFXpskOf__ByZw-C6B9LtycqlyB9EmaJI,3569
14
+ snowflake/snowpark_checkpoints/io_utils/io_file_manager.py,sha256=YHrxRBzTlhIUrSFrsoWkRY_Qa-TXgDWglr00T98Tc5g,2485
11
15
  snowflake/snowpark_checkpoints/utils/__init__.py,sha256=I4srmZ8G1q9DU6Suo1S91aVfNvETyisKH95uvLAvEJ0,609
12
16
  snowflake/snowpark_checkpoints/utils/constants.py,sha256=pgFttLDQ6fTa6obSdvivWBYClS21ap41YVDNGAS4sxY,4146
13
- snowflake/snowpark_checkpoints/utils/extra_config.py,sha256=LvOdIhvE450AV0wLVK5P_hANvcNzAv8pLNe7Ksr598U,2802
17
+ snowflake/snowpark_checkpoints/utils/extra_config.py,sha256=xOYaG6MfsUCAHI0C_7qWF_m96xcLIZWwrgxY4UlpaZI,4325
14
18
  snowflake/snowpark_checkpoints/utils/logging_utils.py,sha256=yyi6X5DqKeTg0HRhvsH6ymYp2P0wbnyKIzI2RzrQS7k,2278
15
19
  snowflake/snowpark_checkpoints/utils/pandera_check_manager.py,sha256=tQIozLO-2kM8WZ-gGKfRwmXBx1cDPaIZB0qIcArp8xA,16100
16
20
  snowflake/snowpark_checkpoints/utils/supported_types.py,sha256=GrMX2tHdSFnK7LlPbZx20UufD6Br6TNVRkkBwIxdPy0,1433
17
- snowflake/snowpark_checkpoints/utils/telemetry.py,sha256=_WOVo19BxcF6cpQDplID6BEOvgJfHTGK1JZI1-OI4uc,31370
18
- snowflake/snowpark_checkpoints/utils/utils_checks.py,sha256=LFdEzVgirkymXD5LlzuE_lv43yAa3OMIXEnloRAXkGc,14204
19
- snowpark_checkpoints_validators-0.2.1.dist-info/METADATA,sha256=nhKZaDnpjcwwsH4PTAxqtFCqJEZ_UY-p0J_S5863Tvs,11470
20
- snowpark_checkpoints_validators-0.2.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
21
- snowpark_checkpoints_validators-0.2.1.dist-info/licenses/LICENSE,sha256=pmjhbh6uVhV5MBXOlou_UZgFP7CYVQITkCCdvfcS5lY,11340
22
- snowpark_checkpoints_validators-0.2.1.dist-info/RECORD,,
21
+ snowflake/snowpark_checkpoints/utils/telemetry.py,sha256=GfuyIaI3QG4a4_qWwyJHvWRM0GENunNexuEJ6IgscF4,32684
22
+ snowflake/snowpark_checkpoints/utils/utils_checks.py,sha256=oQ1c4n-uAA2kFIpWIRPWhbCW8e-wwOIL8qDqLvr5Fok,14398
23
+ snowpark_checkpoints_validators-0.3.0.dist-info/METADATA,sha256=RbOlEHK5kumiBPP2S7-7k7zxzzLYag7Yb6TtQeOYbV0,11557
24
+ snowpark_checkpoints_validators-0.3.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
25
+ snowpark_checkpoints_validators-0.3.0.dist-info/licenses/LICENSE,sha256=pmjhbh6uVhV5MBXOlou_UZgFP7CYVQITkCCdvfcS5lY,11340
26
+ snowpark_checkpoints_validators-0.3.0.dist-info/RECORD,,