snowpark-checkpoints-collectors 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,4 +13,4 @@
13
13
  # See the License for the specific language governing permissions and
14
14
  # limitations under the License.
15
15
 
16
- __version__ = "0.2.1"
16
+ __version__ = "0.3.0"
@@ -21,6 +21,9 @@ from typing import Optional
21
21
  from snowflake.snowpark_checkpoints_collector.collection_result.model import (
22
22
  CollectionPointResult,
23
23
  )
24
+ from snowflake.snowpark_checkpoints_collector.io_utils.io_file_manager import (
25
+ get_io_file_manager,
26
+ )
24
27
  from snowflake.snowpark_checkpoints_collector.singleton import Singleton
25
28
  from snowflake.snowpark_checkpoints_collector.utils import file_utils
26
29
 
@@ -70,5 +73,4 @@ class CollectionPointResultManager(metaclass=Singleton):
70
73
  def _save_result(self) -> None:
71
74
  result_collection_json = self.to_json()
72
75
  LOGGER.info("Saving collection results to '%s'", self.output_file_path)
73
- with open(self.output_file_path, "w") as f:
74
- f.write(result_collection_json)
76
+ get_io_file_manager().write(self.output_file_path, result_collection_json)
@@ -0,0 +1,26 @@
1
+ # Copyright 2025 Snowflake Inc.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ __all__ = ["EnvStrategy", "IOFileManager", "IODefaultStrategy"]
17
+
18
+ from snowflake.snowpark_checkpoints_collector.io_utils.io_env_strategy import (
19
+ EnvStrategy,
20
+ )
21
+ from snowflake.snowpark_checkpoints_collector.io_utils.io_default_strategy import (
22
+ IODefaultStrategy,
23
+ )
24
+ from snowflake.snowpark_checkpoints_collector.io_utils.io_file_manager import (
25
+ IOFileManager,
26
+ )
@@ -0,0 +1,61 @@
1
+ # Copyright 2025 Snowflake Inc.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import glob
17
+ import os
18
+ import shutil
19
+
20
+ from pathlib import Path
21
+ from typing import Optional
22
+
23
+ from snowflake.snowpark_checkpoints_collector.io_utils import EnvStrategy
24
+
25
+
26
+ class IODefaultStrategy(EnvStrategy):
27
+ def mkdir(self, path: str, exist_ok: bool = False) -> None:
28
+ os.makedirs(path, exist_ok=exist_ok)
29
+
30
+ def folder_exists(self, path: str) -> bool:
31
+ return os.path.isdir(path)
32
+
33
+ def file_exists(self, path: str) -> bool:
34
+ return os.path.isfile(path)
35
+
36
+ def write(self, file_path: str, file_content: str, overwrite: bool = True) -> None:
37
+ mode = "w" if overwrite else "x"
38
+ with open(file_path, mode) as file:
39
+ file.write(file_content)
40
+
41
+ def read(
42
+ self, file_path: str, mode: str = "r", encoding: Optional[str] = None
43
+ ) -> str:
44
+ with open(file_path, mode=mode, encoding=encoding) as file:
45
+ return file.read()
46
+
47
+ def read_bytes(self, file_path: str) -> bytes:
48
+ with open(file_path, mode="rb") as f:
49
+ return f.read()
50
+
51
+ def ls(self, path: str, recursive: bool = False) -> list[str]:
52
+ return glob.glob(path, recursive=recursive)
53
+
54
+ def getcwd(self) -> str:
55
+ return os.getcwd()
56
+
57
+ def remove_dir(self, path: str) -> None:
58
+ shutil.rmtree(path)
59
+
60
+ def telemetry_path_files(self, path: str) -> Path:
61
+ return Path(path)
@@ -0,0 +1,142 @@
1
+ # Copyright 2025 Snowflake Inc.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ from abc import ABC, abstractmethod
17
+ from pathlib import Path
18
+ from typing import Optional
19
+
20
+
21
+ class EnvStrategy(ABC):
22
+
23
+ """An abstract base class that defines methods for file and directory operations.
24
+
25
+ Subclasses should implement these methods to provide environment-specific behavior.
26
+ """
27
+
28
+ @abstractmethod
29
+ def mkdir(self, path: str, exist_ok: bool = False) -> None:
30
+ """Create a directory.
31
+
32
+ Args:
33
+ path: The name of the directory to create.
34
+ exist_ok: If False, an error is raised if the directory already exists.
35
+
36
+ """
37
+
38
+ @abstractmethod
39
+ def folder_exists(self, path: str) -> bool:
40
+ """Check if a folder exists.
41
+
42
+ Args:
43
+ path: The path to the folder.
44
+
45
+ Returns:
46
+ bool: True if the folder exists, False otherwise.
47
+
48
+ """
49
+
50
+ @abstractmethod
51
+ def file_exists(self, path: str) -> bool:
52
+ """Check if a file exists.
53
+
54
+ Args:
55
+ path: The path to the file.
56
+
57
+ Returns:
58
+ bool: True if the file exists, False otherwise.
59
+
60
+ """
61
+
62
+ @abstractmethod
63
+ def write(self, file_path: str, file_content: str, overwrite: bool = True) -> None:
64
+ """Write content to a file.
65
+
66
+ Args:
67
+ file_path: The name of the file to write to.
68
+ file_content: The content to write to the file.
69
+ overwrite: If True, overwrite the file if it exists.
70
+
71
+ """
72
+
73
+ @abstractmethod
74
+ def read(
75
+ self, file_path: str, mode: str = "r", encoding: Optional[str] = None
76
+ ) -> str:
77
+ """Read content from a file.
78
+
79
+ Args:
80
+ file_path: The path to the file to read from.
81
+ mode: The mode in which to open the file.
82
+ encoding: The encoding to use for reading the file.
83
+
84
+ Returns:
85
+ str: The content of the file.
86
+
87
+ """
88
+
89
+ @abstractmethod
90
+ def read_bytes(self, file_path: str) -> bytes:
91
+ """Read binary content from a file.
92
+
93
+ Args:
94
+ file_path: The path to the file to read from.
95
+
96
+ Returns:
97
+ bytes: The binary content of the file.
98
+
99
+ """
100
+
101
+ @abstractmethod
102
+ def ls(self, path: str, recursive: bool = False) -> list[str]:
103
+ """List the contents of a directory.
104
+
105
+ Args:
106
+ path: The path to the directory.
107
+ recursive: If True, list the contents recursively.
108
+
109
+ Returns:
110
+ list[str]: A list of the contents of the directory.
111
+
112
+ """
113
+
114
+ @abstractmethod
115
+ def getcwd(self) -> str:
116
+ """Get the current working directory.
117
+
118
+ Returns:
119
+ str: The current working directory.
120
+
121
+ """
122
+
123
+ @abstractmethod
124
+ def remove_dir(self, path: str) -> None:
125
+ """Remove a directory and all its contents.
126
+
127
+ Args:
128
+ path: The path to the directory to remove.
129
+
130
+ """
131
+
132
+ @abstractmethod
133
+ def telemetry_path_files(self, path: str) -> Path:
134
+ """Get the path to the telemetry files.
135
+
136
+ Args:
137
+ path: The path to the telemetry directory.
138
+
139
+ Returns:
140
+ Path: The path object representing the telemetry files.
141
+
142
+ """
@@ -0,0 +1,79 @@
1
+ # Copyright 2025 Snowflake Inc.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ from pathlib import Path
17
+ from typing import Optional
18
+
19
+ from snowflake.snowpark_checkpoints_collector.io_utils import (
20
+ EnvStrategy,
21
+ IODefaultStrategy,
22
+ )
23
+ from snowflake.snowpark_checkpoints_collector.singleton import Singleton
24
+
25
+
26
+ class IOFileManager(metaclass=Singleton):
27
+ def __init__(self, strategy: Optional[EnvStrategy] = None):
28
+ self.strategy = strategy or IODefaultStrategy()
29
+
30
+ def mkdir(self, path: str, exist_ok: bool = False) -> None:
31
+ return self.strategy.mkdir(path, exist_ok)
32
+
33
+ def folder_exists(self, path: str) -> bool:
34
+ return self.strategy.folder_exists(path)
35
+
36
+ def file_exists(self, path: str) -> bool:
37
+ return self.strategy.file_exists(path)
38
+
39
+ def write(self, file_path: str, file_content: str, overwrite: bool = True) -> None:
40
+ return self.strategy.write(file_path, file_content, overwrite)
41
+
42
+ def read(
43
+ self, file_path: str, mode: str = "r", encoding: Optional[str] = None
44
+ ) -> str:
45
+ return self.strategy.read(file_path, mode, encoding)
46
+
47
+ def read_bytes(self, file_path: str) -> bytes:
48
+ return self.strategy.read_bytes(file_path)
49
+
50
+ def ls(self, path: str, recursive: bool = False) -> list[str]:
51
+ return self.strategy.ls(path, recursive)
52
+
53
+ def getcwd(self) -> str:
54
+ return self.strategy.getcwd()
55
+
56
+ def remove_dir(self, path: str) -> None:
57
+ return self.strategy.remove_dir(path)
58
+
59
+ def telemetry_path_files(self, path: str) -> Path:
60
+ return self.strategy.telemetry_path_files(path)
61
+
62
+ def set_strategy(self, strategy: EnvStrategy):
63
+ """Set the strategy for file and directory operations.
64
+
65
+ Args:
66
+ strategy (EnvStrategy): The strategy to use for file and directory operations.
67
+
68
+ """
69
+ self.strategy = strategy
70
+
71
+
72
+ def get_io_file_manager():
73
+ """Get the singleton instance of IOFileManager.
74
+
75
+ Returns:
76
+ IOFileManager: The singleton instance of IOFileManager.
77
+
78
+ """
79
+ return IOFileManager()
@@ -13,7 +13,7 @@
13
13
  # See the License for the specific language governing permissions and
14
14
  # limitations under the License.
15
15
 
16
- import glob
16
+ import io
17
17
  import logging
18
18
  import os.path
19
19
  import time
@@ -25,6 +25,9 @@ from snowflake.snowpark import Session
25
25
  from snowflake.snowpark_checkpoints_collector.collection_common import (
26
26
  DOT_PARQUET_EXTENSION,
27
27
  )
28
+ from snowflake.snowpark_checkpoints_collector.io_utils.io_file_manager import (
29
+ get_io_file_manager,
30
+ )
28
31
 
29
32
 
30
33
  STAGE_NAME = "CHECKPOINT_STAGE"
@@ -130,11 +133,13 @@ class SnowConnection:
130
133
  )
131
134
 
132
135
  def filter_files(name: str):
133
- return os.path.isfile(name) and (filter_func(name) if filter_func else True)
136
+ return get_io_file_manager().file_exists(name) and (
137
+ filter_func(name) if filter_func else True
138
+ )
134
139
 
135
140
  target_dir = os.path.join(input_path, "**", "*")
136
141
  LOGGER.debug("Searching for files in '%s'", input_path)
137
- files_collection = glob.glob(target_dir, recursive=True)
142
+ files_collection = get_io_file_manager().ls(target_dir, recursive=True)
138
143
 
139
144
  files = [file for file in files_collection if filter_files(file)]
140
145
  files_count = len(files)
@@ -152,17 +157,14 @@ class SnowConnection:
152
157
  if not os.path.isabs(file)
153
158
  else str(Path(file).resolve())
154
159
  )
155
- # Snowflake required URI format for input in the put.
156
- normalize_file_path = Path(file_full_path).as_uri()
157
160
  new_file_path = file_full_path.replace(input_path, folder_name)
158
161
  # as Posix to convert Windows dir to posix
159
162
  new_file_path = Path(new_file_path).as_posix()
160
163
  stage_file_path = STAGE_PATH_FORMAT.format(stage_name, new_file_path)
161
- put_statement = PUT_FILE_IN_STAGE_STATEMENT_FORMAT.format(
162
- normalize_file_path, stage_file_path
163
- )
164
+ parquet_file = get_io_file_manager().read_bytes(file_full_path)
165
+ binary_parquet = io.BytesIO(parquet_file)
164
166
  LOGGER.info("Loading file '%s' to %s", file_full_path, stage_file_path)
165
- self.session.sql(put_statement).collect()
167
+ self.session.file.put_stream(binary_parquet, stage_file_path)
166
168
 
167
169
  def create_table_from_parquet(
168
170
  self, table_name: str, stage_directory_path: str
@@ -12,12 +12,9 @@
12
12
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
13
  # See the License for the specific language governing permissions and
14
14
  # limitations under the License.
15
-
16
- import glob
17
15
  import json
18
16
  import logging
19
17
  import os
20
- import shutil
21
18
 
22
19
  from typing import Optional
23
20
 
@@ -54,6 +51,9 @@ from snowflake.snowpark_checkpoints_collector.column_collection import (
54
51
  from snowflake.snowpark_checkpoints_collector.column_pandera_checks import (
55
52
  PanderaColumnChecksManager,
56
53
  )
54
+ from snowflake.snowpark_checkpoints_collector.io_utils.io_file_manager import (
55
+ get_io_file_manager,
56
+ )
57
57
  from snowflake.snowpark_checkpoints_collector.snow_connection_model import (
58
58
  SnowConnection,
59
59
  )
@@ -321,8 +321,7 @@ def _generate_json_checkpoint_file(
321
321
  output_directory_path = file_utils.get_output_directory_path(output_path)
322
322
  checkpoint_file_path = os.path.join(output_directory_path, checkpoint_file_name)
323
323
  LOGGER.info("Writing DataFrame JSON schema file to '%s'", checkpoint_file_path)
324
- with open(checkpoint_file_path, "w") as f:
325
- f.write(dataframe_schema_contract)
324
+ get_io_file_manager().write(checkpoint_file_path, dataframe_schema_contract)
326
325
 
327
326
 
328
327
  @report_telemetry(params_list=["df"])
@@ -366,17 +365,17 @@ def generate_parquet_for_spark_df(spark_df: SparkDataFrame, output_path: str) ->
366
365
  ]
367
366
  converted_df = spark_df.select(new_cols)
368
367
 
369
- if os.path.exists(output_path):
368
+ if get_io_file_manager().folder_exists(output_path):
370
369
  LOGGER.warning(
371
370
  "Output directory '%s' already exists. Deleting it...", output_path
372
371
  )
373
- shutil.rmtree(output_path)
372
+ get_io_file_manager().remove_dir(output_path)
374
373
 
375
374
  LOGGER.info("Writing DataFrame to parquet files at '%s'", output_path)
376
375
  converted_df.write.parquet(output_path, mode="overwrite")
377
376
 
378
377
  target_dir = os.path.join(output_path, "**", f"*{DOT_PARQUET_EXTENSION}")
379
- parquet_files = glob.glob(target_dir, recursive=True)
378
+ parquet_files = get_io_file_manager().ls(target_dir, recursive=True)
380
379
  parquet_files_count = len(parquet_files)
381
380
  if parquet_files_count == 0:
382
381
  raise Exception("No parquet files were generated.")
@@ -22,13 +22,57 @@ from snowflake.snowpark_checkpoints_collector.collection_common import (
22
22
  SNOWFLAKE_CHECKPOINT_CONTRACT_FILE_PATH_ENV_VAR,
23
23
  CheckpointMode,
24
24
  )
25
+ from snowflake.snowpark_checkpoints_collector.io_utils.io_file_manager import (
26
+ get_io_file_manager,
27
+ )
25
28
 
26
29
 
27
30
  LOGGER = logging.getLogger(__name__)
28
31
 
29
32
  # noinspection DuplicatedCode
30
33
  def _get_checkpoint_contract_file_path() -> str:
31
- return os.environ.get(SNOWFLAKE_CHECKPOINT_CONTRACT_FILE_PATH_ENV_VAR, os.getcwd())
34
+ return os.environ.get(
35
+ SNOWFLAKE_CHECKPOINT_CONTRACT_FILE_PATH_ENV_VAR, get_io_file_manager().getcwd()
36
+ )
37
+
38
+
39
+ def _set_conf_io_strategy() -> None:
40
+ try:
41
+ from snowflake.snowpark_checkpoints_collector.io_utils.io_default_strategy import (
42
+ IODefaultStrategy,
43
+ )
44
+ from snowflake.snowpark_checkpoints_configuration.io_utils.io_file_manager import (
45
+ EnvStrategy as ConfEnvStrategy,
46
+ )
47
+ from snowflake.snowpark_checkpoints_configuration.io_utils.io_file_manager import (
48
+ get_io_file_manager as get_conf_io_file_manager,
49
+ )
50
+
51
+ is_default_strategy = isinstance(
52
+ get_io_file_manager().strategy, IODefaultStrategy
53
+ )
54
+
55
+ if is_default_strategy:
56
+ return
57
+
58
+ class CustomConfEnvStrategy(ConfEnvStrategy):
59
+ def file_exists(self, path: str) -> bool:
60
+ return get_io_file_manager().file_exists(path)
61
+
62
+ def read(
63
+ self, file_path: str, mode: str = "r", encoding: Optional[str] = None
64
+ ) -> Optional[str]:
65
+ return get_io_file_manager().read(file_path, mode, encoding)
66
+
67
+ def getcwd(self) -> str:
68
+ return get_io_file_manager().getcwd()
69
+
70
+ get_conf_io_file_manager().set_strategy(CustomConfEnvStrategy())
71
+
72
+ except ImportError:
73
+ LOGGER.debug(
74
+ "snowpark-checkpoints-configuration is not installed. Cannot get a checkpoint metadata instance."
75
+ )
32
76
 
33
77
 
34
78
  # noinspection DuplicatedCode
@@ -38,6 +82,7 @@ def _get_metadata():
38
82
  CheckpointMetadata,
39
83
  )
40
84
 
85
+ _set_conf_io_strategy()
41
86
  path = _get_checkpoint_contract_file_path()
42
87
  LOGGER.debug("Loading checkpoint metadata from '%s'", path)
43
88
  metadata = CheckpointMetadata(path)
@@ -25,6 +25,9 @@ from snowflake.snowpark_checkpoints_collector.collection_common import (
25
25
  UNKNOWN_LINE_OF_CODE,
26
26
  UNKNOWN_SOURCE_FILE,
27
27
  )
28
+ from snowflake.snowpark_checkpoints_collector.io_utils.io_file_manager import (
29
+ get_io_file_manager,
30
+ )
28
31
 
29
32
 
30
33
  def get_output_file_path(out_path: Optional[str] = None) -> str:
@@ -63,11 +66,13 @@ def get_output_directory_path(output_path: Optional[str] = None) -> str:
63
66
  str: returns the output directory path.
64
67
 
65
68
  """
66
- current_working_directory_path = output_path if output_path else os.getcwd()
69
+ current_working_directory_path = (
70
+ output_path if output_path else get_io_file_manager().getcwd()
71
+ )
67
72
  checkpoints_output_directory_path = os.path.join(
68
73
  current_working_directory_path, SNOWPARK_CHECKPOINTS_OUTPUT_DIRECTORY_NAME
69
74
  )
70
- os.makedirs(checkpoints_output_directory_path, exist_ok=True)
75
+ get_io_file_manager().mkdir(checkpoints_output_directory_path, exist_ok=True)
71
76
  return checkpoints_output_directory_path
72
77
 
73
78
 
@@ -120,8 +125,8 @@ def _is_temporal_path(path: str) -> bool:
120
125
 
121
126
 
122
127
  def _get_ipynb_file_path_collection() -> list[str]:
123
- current_working_directory_path = os.getcwd()
124
- cwd_file_name_collection = os.listdir(current_working_directory_path)
128
+ current_working_directory_path = get_io_file_manager().getcwd()
129
+ cwd_file_name_collection = get_io_file_manager().ls(current_working_directory_path)
125
130
  ipynb_file_path_collection = []
126
131
  for file_name in cwd_file_name_collection:
127
132
  is_ipynb_file = file_name.endswith(DOT_IPYNB_EXTENSION)
@@ -19,16 +19,34 @@ from sys import platform
19
19
  from typing import Any, Callable, Optional, TypeVar
20
20
  from uuid import getnode
21
21
 
22
- from snowflake.connector import (
23
- SNOWFLAKE_CONNECTOR_VERSION,
24
- time_util,
25
- )
26
- from snowflake.connector.constants import DIRS as SNOWFLAKE_DIRS
27
- from snowflake.connector.network import SnowflakeRestful
22
+ from snowflake.connector.description import PLATFORM as CONNECTOR_PLATFORM
28
23
  from snowflake.connector.telemetry import TelemetryClient
29
24
  from snowflake.snowpark import VERSION as SNOWPARK_VERSION
30
25
  from snowflake.snowpark import dataframe as snowpark_dataframe
31
26
  from snowflake.snowpark.session import Session
27
+ from snowflake.snowpark_checkpoints_collector.io_utils.io_file_manager import (
28
+ get_io_file_manager,
29
+ )
30
+
31
+
32
+ try:
33
+ """
34
+ The following imports are used to log telemetry events in the Snowflake Connector.
35
+ """
36
+ from snowflake.connector import (
37
+ SNOWFLAKE_CONNECTOR_VERSION,
38
+ time_util,
39
+ )
40
+ from snowflake.connector.constants import DIRS as SNOWFLAKE_DIRS
41
+ from snowflake.connector.network import SnowflakeRestful
42
+ except Exception:
43
+ """
44
+ Set default import values for the Snowflake Connector when using snowpark-checkpoints in stored procedures.
45
+ """
46
+ SNOWFLAKE_CONNECTOR_VERSION = ""
47
+ time_util = None
48
+ SNOWFLAKE_DIRS = ""
49
+ SnowflakeRestful = None
32
50
 
33
51
 
34
52
  try:
@@ -81,7 +99,7 @@ class TelemetryManager(TelemetryClient):
81
99
  path: path to write telemetry.
82
100
 
83
101
  """
84
- os.makedirs(path, exist_ok=True)
102
+ get_io_file_manager().mkdir(str(path), exist_ok=True)
85
103
  self.sc_folder_path = path
86
104
 
87
105
  def sc_log_error(
@@ -189,7 +207,7 @@ class TelemetryManager(TelemetryClient):
189
207
 
190
208
  """
191
209
  try:
192
- os.makedirs(self.sc_folder_path, exist_ok=True)
210
+ get_io_file_manager().mkdir(str(self.sc_folder_path), exist_ok=True)
193
211
  for event in batch:
194
212
  message = event.get("message")
195
213
  if message is not None:
@@ -199,8 +217,7 @@ class TelemetryManager(TelemetryClient):
199
217
  f'_telemetry_{message.get("type")}.json'
200
218
  )
201
219
  json_content = self._sc_validate_folder_space(event)
202
- with open(file_path, "w") as json_file:
203
- json_file.write(json_content)
220
+ get_io_file_manager().write(str(file_path), json_content)
204
221
  except Exception:
205
222
  pass
206
223
 
@@ -227,10 +244,10 @@ class TelemetryManager(TelemetryClient):
227
244
  if not self.sc_is_enabled or self.sc_is_testing or not self._rest:
228
245
  return
229
246
  batch = []
230
- for file in self.sc_folder_path.glob("*.json"):
231
- with open(file) as json_file:
232
- data_dict = json.load(json_file)
233
- batch.append(data_dict)
247
+ for file in get_io_file_manager().ls(f"{self.sc_folder_path}/*.json"):
248
+ json_content = get_io_file_manager().read(file)
249
+ data_dict = json.loads(json_content)
250
+ batch.append(data_dict)
234
251
  if batch == []:
235
252
  return
236
253
  body = {"logs": batch}
@@ -242,14 +259,17 @@ class TelemetryManager(TelemetryClient):
242
259
  timeout=5,
243
260
  )
244
261
  if ret.get("success"):
245
- for file in self.sc_folder_path.glob("*.json"):
262
+ for file_path in get_io_file_manager().ls(f"{self.sc_folder_path}/*.json"):
263
+ file = get_io_file_manager().telemetry_path_files(file_path)
246
264
  file.unlink()
247
265
 
248
266
  def _sc_is_telemetry_testing(self) -> bool:
249
267
  is_testing = os.getenv("SNOWPARK_CHECKPOINTS_TELEMETRY_TESTING") == "true"
250
268
  if is_testing:
251
269
  local_telemetry_path = (
252
- Path(os.getcwd()) / "snowpark-checkpoints-output" / "telemetry"
270
+ Path(get_io_file_manager().getcwd())
271
+ / "snowpark-checkpoints-output"
272
+ / "telemetry"
253
273
  )
254
274
  self.set_sc_output_path(local_telemetry_path)
255
275
  self.sc_is_enabled = True
@@ -348,7 +368,7 @@ def _get_metadata() -> dict:
348
368
  }
349
369
 
350
370
 
351
- def _get_version() -> str:
371
+ def _get_version() -> Optional[str]:
352
372
  """Get the version of the package.
353
373
 
354
374
  Returns:
@@ -359,11 +379,10 @@ def _get_version() -> str:
359
379
  directory_levels_up = 1
360
380
  project_root = Path(__file__).resolve().parents[directory_levels_up]
361
381
  version_file_path = project_root / VERSION_FILE_NAME
362
- with open(version_file_path) as file:
363
- content = file.read()
364
- version_match = re.search(VERSION_VARIABLE_PATTERN, content, re.MULTILINE)
365
- if version_match:
366
- return version_match.group(1)
382
+ content = get_io_file_manager().read(str(version_file_path))
383
+ version_match = re.search(VERSION_VARIABLE_PATTERN, content, re.MULTILINE)
384
+ if version_match:
385
+ return version_match.group(1)
367
386
  return None
368
387
  except Exception:
369
388
  return None
@@ -379,7 +398,10 @@ def _get_folder_size(folder_path: Path) -> int:
379
398
  int: The size of the folder in bytes.
380
399
 
381
400
  """
382
- return sum(f.stat().st_size for f in folder_path.glob("*.json") if f.is_file())
401
+ sum_size = 0
402
+ for f in get_io_file_manager().ls(f"{folder_path}/*.json"):
403
+ sum_size += get_io_file_manager().telemetry_path_files(f).stat().st_size
404
+ return sum_size
383
405
 
384
406
 
385
407
  def _free_up_space(folder_path: Path, max_size: int) -> None:
@@ -390,9 +412,13 @@ def _free_up_space(folder_path: Path, max_size: int) -> None:
390
412
  max_size (int): The maximum allowed size of the folder in bytes.
391
413
 
392
414
  """
393
- files = sorted(folder_path.glob("*.json"), key=lambda f: f.stat().st_mtime)
415
+ files = sorted(
416
+ get_io_file_manager().ls(f"{folder_path}/*.json"),
417
+ key=lambda f: f.stat().st_mtime,
418
+ )
394
419
  current_size = _get_folder_size(folder_path)
395
- for file in files:
420
+ for file_path in files:
421
+ file = get_io_file_manager().telemetry_path_files(file_path)
396
422
  if current_size <= max_size:
397
423
  break
398
424
  current_size -= file.stat().st_size
@@ -471,12 +497,22 @@ def get_load_json(json_schema: str) -> dict:
471
497
 
472
498
  """
473
499
  try:
474
- with open(json_schema, encoding="utf-8") as file:
475
- return json.load(file)
500
+ file_content = get_io_file_manager().read(json_schema, encoding="utf-8")
501
+ return json.loads(file_content)
476
502
  except (OSError, json.JSONDecodeError) as e:
477
503
  raise ValueError(f"Error reading JSON schema file: {e}") from None
478
504
 
479
505
 
506
+ def _is_in_stored_procedure() -> bool:
507
+ """Check if the code is running in a stored procedure.
508
+
509
+ Returns:
510
+ bool: True if the code is running in a stored procedure, False otherwise.
511
+
512
+ """
513
+ return CONNECTOR_PLATFORM == "XP"
514
+
515
+
480
516
  def extract_parameters(
481
517
  func: Callable, args: tuple, kwargs: dict, params_list: Optional[list[str]]
482
518
  ) -> dict:
@@ -824,7 +860,10 @@ def report_telemetry(
824
860
  except Exception as err:
825
861
  func_exception = err
826
862
 
827
- if os.getenv("SNOWPARK_CHECKPOINTS_TELEMETRY_ENABLED") == "false":
863
+ if (
864
+ os.getenv("SNOWPARK_CHECKPOINTS_TELEMETRY_ENABLED") == "false"
865
+ or _is_in_stored_procedure()
866
+ ):
828
867
  return result
829
868
  telemetry_event = None
830
869
  data = None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: snowpark-checkpoints-collectors
3
- Version: 0.2.1
3
+ Version: 0.3.0
4
4
  Summary: Snowpark column and table statistics collection
5
5
  Project-URL: Bug Tracker, https://github.com/snowflakedb/snowpark-checkpoints/issues
6
6
  Project-URL: Source code, https://github.com/snowflakedb/snowpark-checkpoints/
@@ -30,6 +30,7 @@ Requires-Dist: pandera[io]==0.20.4
30
30
  Requires-Dist: snowflake-connector-python
31
31
  Requires-Dist: snowflake-snowpark-python>=1.23.0
32
32
  Provides-Extra: development
33
+ Requires-Dist: certifi==2025.1.31; extra == 'development'
33
34
  Requires-Dist: coverage>=7.6.7; extra == 'development'
34
35
  Requires-Dist: deepdiff>=8.0.0; extra == 'development'
35
36
  Requires-Dist: hatchling==1.25.0; extra == 'development'
@@ -1,11 +1,11 @@
1
1
  snowflake/snowpark_checkpoints_collector/__init__.py,sha256=GIESlH2W6g_qdcnyRqw9yjsvEkt0aniFvGixKlF4K7A,1096
2
- snowflake/snowpark_checkpoints_collector/__version__.py,sha256=jEnm4p_P4FqdYsTq3hnGQnhLZ4KwL0_Ew8fDF8BRL98,632
2
+ snowflake/snowpark_checkpoints_collector/__version__.py,sha256=kbbDnlkY7JOLNHvfWYkCO_mOBOV9GniMGdxYoQpLhyg,632
3
3
  snowflake/snowpark_checkpoints_collector/collection_common.py,sha256=ff5vYffrTRjoJXZQvVQBaOlegAUj_vXBbl1IZidz8Qo,4510
4
4
  snowflake/snowpark_checkpoints_collector/singleton.py,sha256=7AgIHQBXVRvPBBCkmBplzkdrrm-xVWf_N8svzA2vF8E,836
5
- snowflake/snowpark_checkpoints_collector/summary_stats_collector.py,sha256=Jd881L0Ju3I-aGhj0dkZZ_ZjW7x-NXKqcMD4sCRCaDw,14619
5
+ snowflake/snowpark_checkpoints_collector/summary_stats_collector.py,sha256=SD5MRF7zSDKXpxekMWdg5gO7ZcZr6Y548vkkKpG_jZs,14745
6
6
  snowflake/snowpark_checkpoints_collector/collection_result/model/__init__.py,sha256=jZzx29WzrjH7C_6ZsBGoe4PxbW_oM4uIjySS1axIM34,1000
7
7
  snowflake/snowpark_checkpoints_collector/collection_result/model/collection_point_result.py,sha256=8xD9zGnFJ7Rz9RUXIys7JnV3kQD4mk8QwNOTxAihSjQ,2908
8
- snowflake/snowpark_checkpoints_collector/collection_result/model/collection_point_result_manager.py,sha256=6XbjHiehEm_RN_9y2MRlr0MaSgk3cWTczwZEYqUHCpM,2565
8
+ snowflake/snowpark_checkpoints_collector/collection_result/model/collection_point_result_manager.py,sha256=EY6WIIXRbvkTYC4bQn7jFALHh7D2PirVoiLZ5Kq8dNs,2659
9
9
  snowflake/snowpark_checkpoints_collector/column_collection/__init__.py,sha256=hpTh1V7hqBSHxNUqISwfxdz-NLD-7oZEMLXDUuRsoOU,783
10
10
  snowflake/snowpark_checkpoints_collector/column_collection/column_collector_manager.py,sha256=Vav_vbiipHFIAdHxeQG4ZK1BAmWTi_18hBnVeIeXFRs,9670
11
11
  snowflake/snowpark_checkpoints_collector/column_collection/model/__init__.py,sha256=d0WNMeayDyUKYFLLaVAMIC5Qt-DoWoWgOjj2ygJaHWA,2919
@@ -26,14 +26,18 @@ snowflake/snowpark_checkpoints_collector/column_collection/model/timestamp_colum
26
26
  snowflake/snowpark_checkpoints_collector/column_collection/model/timestamp_ntz_column_collector.py,sha256=glUUnCLgTbGiPLpF2pSZ11KCgKSpHDRt5uhi1ZT9bxA,2578
27
27
  snowflake/snowpark_checkpoints_collector/column_pandera_checks/__init__.py,sha256=JNZPOYx8rUTONGz_d7xyfAvEC2_umHmGkJLoNSATLs4,793
28
28
  snowflake/snowpark_checkpoints_collector/column_pandera_checks/pandera_column_checks_manager.py,sha256=X1Mm37DKt-WZ5AegvoUA3itU1nBUxvhBxvjO85QqcGE,7893
29
+ snowflake/snowpark_checkpoints_collector/io_utils/__init__.py,sha256=RhADOBizQJq4CoewWYleuZZErthjzRMHNagObTW-IsI,984
30
+ snowflake/snowpark_checkpoints_collector/io_utils/io_default_strategy.py,sha256=RG5uL6OM5R55AmyKhrNGw78nlIvSLy9iGw0Rd7WdOl8,1967
31
+ snowflake/snowpark_checkpoints_collector/io_utils/io_env_strategy.py,sha256=kJMbg2VOKNXXdkGCt_tMMLGEZ2aUl1_nie1qYvx5M-c,3770
32
+ snowflake/snowpark_checkpoints_collector/io_utils/io_file_manager.py,sha256=M17EtANswD5gcgGnmT13OImO_W1uH4K3ewu2CXL9aes,2597
29
33
  snowflake/snowpark_checkpoints_collector/snow_connection_model/__init__.py,sha256=kLjZId-aGCljK7lF6yeEw-syEqeTOJDxdXfpv9YxvZA,755
30
- snowflake/snowpark_checkpoints_collector/snow_connection_model/snow_connection.py,sha256=odKGTzc0xov8WOgJSR6WmVs0IT-f6O4YoaLqH6CbbFo,7263
34
+ snowflake/snowpark_checkpoints_collector/snow_connection_model/snow_connection.py,sha256=r3IPnmDMb8151PTgE4YojOhWnxWGPLyBWlgFvvhOfRY,7314
31
35
  snowflake/snowpark_checkpoints_collector/utils/checkpoint_name_utils.py,sha256=Xc4k3JU6A96-79VFRR8NrNAUPeO3V1DEAhngg-hLlU4,1787
32
- snowflake/snowpark_checkpoints_collector/utils/extra_config.py,sha256=t8WakSiHA3sgnXxz0WXE7q2MG7czWlnSYB5XR9swIhs,3643
33
- snowflake/snowpark_checkpoints_collector/utils/file_utils.py,sha256=deetkhQZOB0GUxQJvUHw4Ridp_rNYiCqmK9li3uwBL0,4324
36
+ snowflake/snowpark_checkpoints_collector/utils/extra_config.py,sha256=3kVf6WVA-EuyMpTO3ycTlXMSCHtytGtT6wkV4U2Hyjw,5195
37
+ snowflake/snowpark_checkpoints_collector/utils/file_utils.py,sha256=C1gZmQHvLMgHMVc5kTTpcCaUPw5PtpajY_Uu18mMy6c,4515
34
38
  snowflake/snowpark_checkpoints_collector/utils/logging_utils.py,sha256=yyi6X5DqKeTg0HRhvsH6ymYp2P0wbnyKIzI2RzrQS7k,2278
35
- snowflake/snowpark_checkpoints_collector/utils/telemetry.py,sha256=7S0yFE3Zq96SEGmVuVbpYc_wtXIQUpL--6KfGoxwJcA,30837
36
- snowpark_checkpoints_collectors-0.2.1.dist-info/METADATA,sha256=xu9uudc0KMeTcai8EF89QVaJZRN00E3kH79aRzfRkw0,6003
37
- snowpark_checkpoints_collectors-0.2.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
38
- snowpark_checkpoints_collectors-0.2.1.dist-info/licenses/LICENSE,sha256=DVQuDIgE45qn836wDaWnYhSdxoLXgpRRKH4RuTjpRZQ,10174
39
- snowpark_checkpoints_collectors-0.2.1.dist-info/RECORD,,
39
+ snowflake/snowpark_checkpoints_collector/utils/telemetry.py,sha256=ueN9vM8j5YNax7jMcnEj_UrgGkoeMv_hJHVKjN7hiJE,32161
40
+ snowpark_checkpoints_collectors-0.3.0.dist-info/METADATA,sha256=4nXrRjc1glZUTrb9J8brIHPzyrE43GRKNu7lrqfGMZU,6061
41
+ snowpark_checkpoints_collectors-0.3.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
42
+ snowpark_checkpoints_collectors-0.3.0.dist-info/licenses/LICENSE,sha256=DVQuDIgE45qn836wDaWnYhSdxoLXgpRRKH4RuTjpRZQ,10174
43
+ snowpark_checkpoints_collectors-0.3.0.dist-info/RECORD,,