snowpark-checkpoints-validators 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/snowpark_checkpoints/__version__.py +1 -1
- snowflake/snowpark_checkpoints/io_utils/__init__.py +26 -0
- snowflake/snowpark_checkpoints/io_utils/io_default_strategy.py +57 -0
- snowflake/snowpark_checkpoints/io_utils/io_env_strategy.py +133 -0
- snowflake/snowpark_checkpoints/io_utils/io_file_manager.py +76 -0
- snowflake/snowpark_checkpoints/utils/extra_config.py +44 -1
- snowflake/snowpark_checkpoints/utils/telemetry.py +67 -28
- snowflake/snowpark_checkpoints/utils/utils_checks.py +9 -6
- snowflake/snowpark_checkpoints/validation_result_metadata.py +26 -22
- {snowpark_checkpoints_validators-0.2.1.dist-info → snowpark_checkpoints_validators-0.3.0.dist-info}/METADATA +3 -1
- {snowpark_checkpoints_validators-0.2.1.dist-info → snowpark_checkpoints_validators-0.3.0.dist-info}/RECORD +13 -9
- {snowpark_checkpoints_validators-0.2.1.dist-info → snowpark_checkpoints_validators-0.3.0.dist-info}/WHEEL +0 -0
- {snowpark_checkpoints_validators-0.2.1.dist-info → snowpark_checkpoints_validators-0.3.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,26 @@
|
|
1
|
+
# Copyright 2025 Snowflake Inc.
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
3
|
+
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
# you may not use this file except in compliance with the License.
|
6
|
+
# You may obtain a copy of the License at
|
7
|
+
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
15
|
+
|
16
|
+
__all__ = ["EnvStrategy", "IOFileManager", "IODefaultStrategy"]
|
17
|
+
|
18
|
+
from snowflake.snowpark_checkpoints.io_utils.io_env_strategy import (
|
19
|
+
EnvStrategy,
|
20
|
+
)
|
21
|
+
from snowflake.snowpark_checkpoints.io_utils.io_default_strategy import (
|
22
|
+
IODefaultStrategy,
|
23
|
+
)
|
24
|
+
from snowflake.snowpark_checkpoints.io_utils.io_file_manager import (
|
25
|
+
IOFileManager,
|
26
|
+
)
|
@@ -0,0 +1,57 @@
|
|
1
|
+
# Copyright 2025 Snowflake Inc.
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
3
|
+
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
# you may not use this file except in compliance with the License.
|
6
|
+
# You may obtain a copy of the License at
|
7
|
+
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
15
|
+
|
16
|
+
import glob
|
17
|
+
import os
|
18
|
+
|
19
|
+
from pathlib import Path
|
20
|
+
from typing import Optional
|
21
|
+
|
22
|
+
from snowflake.snowpark_checkpoints.io_utils import EnvStrategy
|
23
|
+
|
24
|
+
|
25
|
+
class IODefaultStrategy(EnvStrategy):
|
26
|
+
def mkdir(self, path: str, exist_ok: bool = False) -> None:
|
27
|
+
os.makedirs(path, exist_ok=exist_ok)
|
28
|
+
|
29
|
+
def folder_exists(self, path: str) -> bool:
|
30
|
+
return os.path.isdir(path)
|
31
|
+
|
32
|
+
def file_exists(self, path: str) -> bool:
|
33
|
+
return os.path.isfile(path)
|
34
|
+
|
35
|
+
def write(self, file_path: str, file_content: str, overwrite: bool = True) -> None:
|
36
|
+
mode = "w" if overwrite else "x"
|
37
|
+
with open(file_path, mode) as file:
|
38
|
+
file.write(file_content)
|
39
|
+
|
40
|
+
def read(
|
41
|
+
self, file_path: str, mode: str = "r", encoding: Optional[str] = None
|
42
|
+
) -> str:
|
43
|
+
with open(file_path, mode=mode, encoding=encoding) as file:
|
44
|
+
return file.read()
|
45
|
+
|
46
|
+
def read_bytes(self, file_path: str) -> bytes:
|
47
|
+
with open(file_path, mode="rb") as f:
|
48
|
+
return f.read()
|
49
|
+
|
50
|
+
def ls(self, path: str, recursive: bool = False) -> list[str]:
|
51
|
+
return glob.glob(path, recursive=recursive)
|
52
|
+
|
53
|
+
def getcwd(self) -> str:
|
54
|
+
return os.getcwd()
|
55
|
+
|
56
|
+
def telemetry_path_files(self, path: str) -> Path:
|
57
|
+
return Path(path)
|
@@ -0,0 +1,133 @@
|
|
1
|
+
# Copyright 2025 Snowflake Inc.
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
3
|
+
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
# you may not use this file except in compliance with the License.
|
6
|
+
# You may obtain a copy of the License at
|
7
|
+
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
15
|
+
|
16
|
+
from abc import ABC, abstractmethod
|
17
|
+
from pathlib import Path
|
18
|
+
from typing import Optional
|
19
|
+
|
20
|
+
|
21
|
+
class EnvStrategy(ABC):
|
22
|
+
|
23
|
+
"""An abstract base class that defines methods for file and directory operations.
|
24
|
+
|
25
|
+
Subclasses should implement these methods to provide environment-specific behavior.
|
26
|
+
"""
|
27
|
+
|
28
|
+
@abstractmethod
|
29
|
+
def mkdir(self, path: str, exist_ok: bool = False) -> None:
|
30
|
+
"""Create a directory.
|
31
|
+
|
32
|
+
Args:
|
33
|
+
path: The name of the directory to create.
|
34
|
+
exist_ok: If False, an error is raised if the directory already exists.
|
35
|
+
|
36
|
+
"""
|
37
|
+
|
38
|
+
@abstractmethod
|
39
|
+
def folder_exists(self, path: str) -> bool:
|
40
|
+
"""Check if a folder exists.
|
41
|
+
|
42
|
+
Args:
|
43
|
+
path: The path to the folder.
|
44
|
+
|
45
|
+
Returns:
|
46
|
+
bool: True if the folder exists, False otherwise.
|
47
|
+
|
48
|
+
"""
|
49
|
+
|
50
|
+
@abstractmethod
|
51
|
+
def file_exists(self, path: str) -> bool:
|
52
|
+
"""Check if a file exists.
|
53
|
+
|
54
|
+
Args:
|
55
|
+
path: The path to the file.
|
56
|
+
|
57
|
+
Returns:
|
58
|
+
bool: True if the file exists, False otherwise.
|
59
|
+
|
60
|
+
"""
|
61
|
+
|
62
|
+
@abstractmethod
|
63
|
+
def write(self, file_path: str, file_content: str, overwrite: bool = True) -> None:
|
64
|
+
"""Write content to a file.
|
65
|
+
|
66
|
+
Args:
|
67
|
+
file_path: The name of the file to write to.
|
68
|
+
file_content: The content to write to the file.
|
69
|
+
overwrite: If True, overwrite the file if it exists.
|
70
|
+
|
71
|
+
"""
|
72
|
+
|
73
|
+
@abstractmethod
|
74
|
+
def read(
|
75
|
+
self, file_path: str, mode: str = "r", encoding: Optional[str] = None
|
76
|
+
) -> str:
|
77
|
+
"""Read content from a file.
|
78
|
+
|
79
|
+
Args:
|
80
|
+
file_path: The path to the file to read from.
|
81
|
+
mode: The mode in which to open the file.
|
82
|
+
encoding: The encoding to use for reading the file.
|
83
|
+
|
84
|
+
Returns:
|
85
|
+
str: The content of the file.
|
86
|
+
|
87
|
+
"""
|
88
|
+
|
89
|
+
@abstractmethod
|
90
|
+
def read_bytes(self, file_path: str) -> bytes:
|
91
|
+
"""Read binary content from a file.
|
92
|
+
|
93
|
+
Args:
|
94
|
+
file_path: The path to the file to read from.
|
95
|
+
|
96
|
+
Returns:
|
97
|
+
bytes: The binary content of the file.
|
98
|
+
|
99
|
+
"""
|
100
|
+
|
101
|
+
@abstractmethod
|
102
|
+
def ls(self, path: str, recursive: bool = False) -> list[str]:
|
103
|
+
"""List the contents of a directory.
|
104
|
+
|
105
|
+
Args:
|
106
|
+
path: The path to the directory.
|
107
|
+
recursive: If True, list the contents recursively.
|
108
|
+
|
109
|
+
Returns:
|
110
|
+
list[str]: A list of the contents of the directory.
|
111
|
+
|
112
|
+
"""
|
113
|
+
|
114
|
+
@abstractmethod
|
115
|
+
def getcwd(self) -> str:
|
116
|
+
"""Get the current working directory.
|
117
|
+
|
118
|
+
Returns:
|
119
|
+
str: The current working directory.
|
120
|
+
|
121
|
+
"""
|
122
|
+
|
123
|
+
@abstractmethod
|
124
|
+
def telemetry_path_files(self, path: str) -> Path:
|
125
|
+
"""Get the path to the telemetry files.
|
126
|
+
|
127
|
+
Args:
|
128
|
+
path: The path to the telemetry directory.
|
129
|
+
|
130
|
+
Returns:
|
131
|
+
Path: The path object representing the telemetry files.
|
132
|
+
|
133
|
+
"""
|
@@ -0,0 +1,76 @@
|
|
1
|
+
# Copyright 2025 Snowflake Inc.
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
3
|
+
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
# you may not use this file except in compliance with the License.
|
6
|
+
# You may obtain a copy of the License at
|
7
|
+
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
15
|
+
|
16
|
+
from pathlib import Path
|
17
|
+
from typing import Optional
|
18
|
+
|
19
|
+
from snowflake.snowpark_checkpoints.io_utils import (
|
20
|
+
EnvStrategy,
|
21
|
+
IODefaultStrategy,
|
22
|
+
)
|
23
|
+
from snowflake.snowpark_checkpoints.singleton import Singleton
|
24
|
+
|
25
|
+
|
26
|
+
class IOFileManager(metaclass=Singleton):
|
27
|
+
def __init__(self, strategy: Optional[EnvStrategy] = None):
|
28
|
+
self.strategy = strategy or IODefaultStrategy()
|
29
|
+
|
30
|
+
def mkdir(self, path: str, exist_ok: bool = False) -> None:
|
31
|
+
return self.strategy.mkdir(path, exist_ok)
|
32
|
+
|
33
|
+
def folder_exists(self, path: str) -> bool:
|
34
|
+
return self.strategy.folder_exists(path)
|
35
|
+
|
36
|
+
def file_exists(self, path: str) -> bool:
|
37
|
+
return self.strategy.file_exists(path)
|
38
|
+
|
39
|
+
def write(self, file_path: str, file_content: str, overwrite: bool = True) -> None:
|
40
|
+
return self.strategy.write(file_path, file_content, overwrite)
|
41
|
+
|
42
|
+
def read(
|
43
|
+
self, file_path: str, mode: str = "r", encoding: Optional[str] = None
|
44
|
+
) -> str:
|
45
|
+
return self.strategy.read(file_path, mode, encoding)
|
46
|
+
|
47
|
+
def read_bytes(self, file_path: str) -> bytes:
|
48
|
+
return self.strategy.read_bytes(file_path)
|
49
|
+
|
50
|
+
def ls(self, path: str, recursive: bool = False) -> list[str]:
|
51
|
+
return self.strategy.ls(path, recursive)
|
52
|
+
|
53
|
+
def getcwd(self) -> str:
|
54
|
+
return self.strategy.getcwd()
|
55
|
+
|
56
|
+
def telemetry_path_files(self, path: str) -> Path:
|
57
|
+
return self.strategy.telemetry_path_files(path)
|
58
|
+
|
59
|
+
def set_strategy(self, strategy: EnvStrategy):
|
60
|
+
"""Set the strategy for file and directory operations.
|
61
|
+
|
62
|
+
Args:
|
63
|
+
strategy (EnvStrategy): The strategy to use for file and directory operations.
|
64
|
+
|
65
|
+
"""
|
66
|
+
self.strategy = strategy
|
67
|
+
|
68
|
+
|
69
|
+
def get_io_file_manager():
|
70
|
+
"""Get the singleton instance of IOFileManager.
|
71
|
+
|
72
|
+
Returns:
|
73
|
+
IOFileManager: The singleton instance of IOFileManager.
|
74
|
+
|
75
|
+
"""
|
76
|
+
return IOFileManager()
|
@@ -18,6 +18,7 @@ import os
|
|
18
18
|
|
19
19
|
from typing import Optional
|
20
20
|
|
21
|
+
from snowflake.snowpark_checkpoints.io_utils.io_file_manager import get_io_file_manager
|
21
22
|
from snowflake.snowpark_checkpoints.utils.constants import (
|
22
23
|
SNOWFLAKE_CHECKPOINT_CONTRACT_FILE_PATH_ENV_VAR,
|
23
24
|
)
|
@@ -28,7 +29,48 @@ LOGGER = logging.getLogger(__name__)
|
|
28
29
|
|
29
30
|
# noinspection DuplicatedCode
|
30
31
|
def _get_checkpoint_contract_file_path() -> str:
|
31
|
-
return os.environ.get(
|
32
|
+
return os.environ.get(
|
33
|
+
SNOWFLAKE_CHECKPOINT_CONTRACT_FILE_PATH_ENV_VAR, get_io_file_manager().getcwd()
|
34
|
+
)
|
35
|
+
|
36
|
+
|
37
|
+
def _set_conf_io_strategy() -> None:
|
38
|
+
try:
|
39
|
+
from snowflake.snowpark_checkpoints.io_utils.io_default_strategy import (
|
40
|
+
IODefaultStrategy,
|
41
|
+
)
|
42
|
+
from snowflake.snowpark_checkpoints_configuration.io_utils.io_file_manager import (
|
43
|
+
EnvStrategy as ConfEnvStrategy,
|
44
|
+
)
|
45
|
+
from snowflake.snowpark_checkpoints_configuration.io_utils.io_file_manager import (
|
46
|
+
get_io_file_manager as get_conf_io_file_manager,
|
47
|
+
)
|
48
|
+
|
49
|
+
is_default_strategy = isinstance(
|
50
|
+
get_io_file_manager().strategy, IODefaultStrategy
|
51
|
+
)
|
52
|
+
|
53
|
+
if is_default_strategy:
|
54
|
+
return
|
55
|
+
|
56
|
+
class CustomConfEnvStrategy(ConfEnvStrategy):
|
57
|
+
def file_exists(self, path: str) -> bool:
|
58
|
+
return get_io_file_manager().file_exists(path)
|
59
|
+
|
60
|
+
def read(
|
61
|
+
self, file_path: str, mode: str = "r", encoding: Optional[str] = None
|
62
|
+
) -> Optional[str]:
|
63
|
+
return get_io_file_manager().read(file_path, mode, encoding)
|
64
|
+
|
65
|
+
def getcwd(self) -> str:
|
66
|
+
return get_io_file_manager().getcwd()
|
67
|
+
|
68
|
+
get_conf_io_file_manager().set_strategy(CustomConfEnvStrategy())
|
69
|
+
|
70
|
+
except ImportError:
|
71
|
+
LOGGER.debug(
|
72
|
+
"snowpark-checkpoints-configuration is not installed. Cannot get a checkpoint metadata instance."
|
73
|
+
)
|
32
74
|
|
33
75
|
|
34
76
|
# noinspection DuplicatedCode
|
@@ -39,6 +81,7 @@ def _get_metadata():
|
|
39
81
|
)
|
40
82
|
|
41
83
|
path = _get_checkpoint_contract_file_path()
|
84
|
+
_set_conf_io_strategy()
|
42
85
|
LOGGER.debug("Loading checkpoint metadata from '%s'", path)
|
43
86
|
metadata = CheckpointMetadata(path)
|
44
87
|
return True, metadata
|
@@ -30,16 +30,34 @@ from sys import platform
|
|
30
30
|
from typing import Any, Callable, Optional, TypeVar
|
31
31
|
from uuid import getnode
|
32
32
|
|
33
|
-
from snowflake.connector import
|
34
|
-
SNOWFLAKE_CONNECTOR_VERSION,
|
35
|
-
time_util,
|
36
|
-
)
|
37
|
-
from snowflake.connector.constants import DIRS as SNOWFLAKE_DIRS
|
38
|
-
from snowflake.connector.network import SnowflakeRestful
|
33
|
+
from snowflake.connector.description import PLATFORM as CONNECTOR_PLATFORM
|
39
34
|
from snowflake.connector.telemetry import TelemetryClient
|
40
35
|
from snowflake.snowpark import VERSION as SNOWPARK_VERSION
|
41
36
|
from snowflake.snowpark import dataframe as snowpark_dataframe
|
42
37
|
from snowflake.snowpark.session import Session
|
38
|
+
from snowflake.snowpark_checkpoints.io_utils.io_file_manager import (
|
39
|
+
get_io_file_manager,
|
40
|
+
)
|
41
|
+
|
42
|
+
|
43
|
+
try:
|
44
|
+
"""
|
45
|
+
The following imports are used to log telemetry events in the Snowflake Connector.
|
46
|
+
"""
|
47
|
+
from snowflake.connector import (
|
48
|
+
SNOWFLAKE_CONNECTOR_VERSION,
|
49
|
+
time_util,
|
50
|
+
)
|
51
|
+
from snowflake.connector.constants import DIRS as SNOWFLAKE_DIRS
|
52
|
+
from snowflake.connector.network import SnowflakeRestful
|
53
|
+
except Exception:
|
54
|
+
"""
|
55
|
+
Set default import values for the Snowflake Connector when using snowpark-checkpoints in stored procedures.
|
56
|
+
"""
|
57
|
+
SNOWFLAKE_CONNECTOR_VERSION = ""
|
58
|
+
time_util = None
|
59
|
+
SNOWFLAKE_DIRS = ""
|
60
|
+
SnowflakeRestful = None
|
43
61
|
|
44
62
|
|
45
63
|
try:
|
@@ -92,7 +110,7 @@ class TelemetryManager(TelemetryClient):
|
|
92
110
|
path: path to write telemetry.
|
93
111
|
|
94
112
|
"""
|
95
|
-
|
113
|
+
get_io_file_manager().mkdir(str(path), exist_ok=True)
|
96
114
|
self.sc_folder_path = path
|
97
115
|
|
98
116
|
def sc_log_error(
|
@@ -200,7 +218,7 @@ class TelemetryManager(TelemetryClient):
|
|
200
218
|
|
201
219
|
"""
|
202
220
|
try:
|
203
|
-
|
221
|
+
get_io_file_manager().mkdir(str(self.sc_folder_path), exist_ok=True)
|
204
222
|
for event in batch:
|
205
223
|
message = event.get("message")
|
206
224
|
if message is not None:
|
@@ -210,8 +228,7 @@ class TelemetryManager(TelemetryClient):
|
|
210
228
|
f'_telemetry_{message.get("type")}.json'
|
211
229
|
)
|
212
230
|
json_content = self._sc_validate_folder_space(event)
|
213
|
-
|
214
|
-
json_file.write(json_content)
|
231
|
+
get_io_file_manager().write(str(file_path), json_content)
|
215
232
|
except Exception:
|
216
233
|
pass
|
217
234
|
|
@@ -238,10 +255,10 @@ class TelemetryManager(TelemetryClient):
|
|
238
255
|
if not self.sc_is_enabled or self.sc_is_testing or not self._rest:
|
239
256
|
return
|
240
257
|
batch = []
|
241
|
-
for file in self.sc_folder_path
|
242
|
-
|
243
|
-
|
244
|
-
|
258
|
+
for file in get_io_file_manager().ls(f"{self.sc_folder_path}/*.json"):
|
259
|
+
json_content = get_io_file_manager().read(file)
|
260
|
+
data_dict = json.loads(json_content)
|
261
|
+
batch.append(data_dict)
|
245
262
|
if batch == []:
|
246
263
|
return
|
247
264
|
body = {"logs": batch}
|
@@ -253,14 +270,17 @@ class TelemetryManager(TelemetryClient):
|
|
253
270
|
timeout=5,
|
254
271
|
)
|
255
272
|
if ret.get("success"):
|
256
|
-
for
|
273
|
+
for file_path in get_io_file_manager().ls(f"{self.sc_folder_path}/*.json"):
|
274
|
+
file = get_io_file_manager().telemetry_path_files(file_path)
|
257
275
|
file.unlink()
|
258
276
|
|
259
277
|
def _sc_is_telemetry_testing(self) -> bool:
|
260
278
|
is_testing = os.getenv("SNOWPARK_CHECKPOINTS_TELEMETRY_TESTING") == "true"
|
261
279
|
if is_testing:
|
262
280
|
local_telemetry_path = (
|
263
|
-
Path(
|
281
|
+
Path(get_io_file_manager().getcwd())
|
282
|
+
/ "snowpark-checkpoints-output"
|
283
|
+
/ "telemetry"
|
264
284
|
)
|
265
285
|
self.set_sc_output_path(local_telemetry_path)
|
266
286
|
self.sc_is_enabled = True
|
@@ -359,7 +379,7 @@ def _get_metadata() -> dict:
|
|
359
379
|
}
|
360
380
|
|
361
381
|
|
362
|
-
def _get_version() -> str:
|
382
|
+
def _get_version() -> Optional[str]:
|
363
383
|
"""Get the version of the package.
|
364
384
|
|
365
385
|
Returns:
|
@@ -370,11 +390,10 @@ def _get_version() -> str:
|
|
370
390
|
directory_levels_up = 1
|
371
391
|
project_root = Path(__file__).resolve().parents[directory_levels_up]
|
372
392
|
version_file_path = project_root / VERSION_FILE_NAME
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
return version_match.group(1)
|
393
|
+
content = get_io_file_manager().read(str(version_file_path))
|
394
|
+
version_match = re.search(VERSION_VARIABLE_PATTERN, content, re.MULTILINE)
|
395
|
+
if version_match:
|
396
|
+
return version_match.group(1)
|
378
397
|
return None
|
379
398
|
except Exception:
|
380
399
|
return None
|
@@ -390,7 +409,10 @@ def _get_folder_size(folder_path: Path) -> int:
|
|
390
409
|
int: The size of the folder in bytes.
|
391
410
|
|
392
411
|
"""
|
393
|
-
|
412
|
+
sum_size = 0
|
413
|
+
for f in get_io_file_manager().ls(f"{folder_path}/*.json"):
|
414
|
+
sum_size += get_io_file_manager().telemetry_path_files(f).stat().st_size
|
415
|
+
return sum_size
|
394
416
|
|
395
417
|
|
396
418
|
def _free_up_space(folder_path: Path, max_size: int) -> None:
|
@@ -401,9 +423,13 @@ def _free_up_space(folder_path: Path, max_size: int) -> None:
|
|
401
423
|
max_size (int): The maximum allowed size of the folder in bytes.
|
402
424
|
|
403
425
|
"""
|
404
|
-
files = sorted(
|
426
|
+
files = sorted(
|
427
|
+
get_io_file_manager().ls(f"{folder_path}/*.json"),
|
428
|
+
key=lambda f: f.stat().st_mtime,
|
429
|
+
)
|
405
430
|
current_size = _get_folder_size(folder_path)
|
406
|
-
for
|
431
|
+
for file_path in files:
|
432
|
+
file = get_io_file_manager().telemetry_path_files(file_path)
|
407
433
|
if current_size <= max_size:
|
408
434
|
break
|
409
435
|
current_size -= file.stat().st_size
|
@@ -482,12 +508,22 @@ def get_load_json(json_schema: str) -> dict:
|
|
482
508
|
|
483
509
|
"""
|
484
510
|
try:
|
485
|
-
|
486
|
-
|
511
|
+
file_content = get_io_file_manager().read(json_schema, encoding="utf-8")
|
512
|
+
return json.loads(file_content)
|
487
513
|
except (OSError, json.JSONDecodeError) as e:
|
488
514
|
raise ValueError(f"Error reading JSON schema file: {e}") from None
|
489
515
|
|
490
516
|
|
517
|
+
def _is_in_stored_procedure() -> bool:
|
518
|
+
"""Check if the code is running in a stored procedure.
|
519
|
+
|
520
|
+
Returns:
|
521
|
+
bool: True if the code is running in a stored procedure, False otherwise.
|
522
|
+
|
523
|
+
"""
|
524
|
+
return CONNECTOR_PLATFORM == "XP"
|
525
|
+
|
526
|
+
|
491
527
|
def extract_parameters(
|
492
528
|
func: Callable, args: tuple, kwargs: dict, params_list: Optional[list[str]]
|
493
529
|
) -> dict:
|
@@ -835,7 +871,10 @@ def report_telemetry(
|
|
835
871
|
except Exception as err:
|
836
872
|
func_exception = err
|
837
873
|
|
838
|
-
if
|
874
|
+
if (
|
875
|
+
os.getenv("SNOWPARK_CHECKPOINTS_TELEMETRY_ENABLED") == "false"
|
876
|
+
or _is_in_stored_procedure()
|
877
|
+
):
|
839
878
|
return result
|
840
879
|
telemetry_event = None
|
841
880
|
data = None
|
@@ -28,6 +28,7 @@ from pandera import DataFrameSchema
|
|
28
28
|
|
29
29
|
from snowflake.snowpark import DataFrame as SnowparkDataFrame
|
30
30
|
from snowflake.snowpark_checkpoints.errors import SchemaValidationError
|
31
|
+
from snowflake.snowpark_checkpoints.io_utils.io_file_manager import get_io_file_manager
|
31
32
|
from snowflake.snowpark_checkpoints.job_context import SnowparkJobContext
|
32
33
|
from snowflake.snowpark_checkpoints.snowpark_sampler import (
|
33
34
|
SamplingAdapter,
|
@@ -154,13 +155,15 @@ def _generate_schema(
|
|
154
155
|
LOGGER.info(
|
155
156
|
"Generating Pandera DataFrameSchema for checkpoint: '%s'", checkpoint_name
|
156
157
|
)
|
157
|
-
current_directory_path =
|
158
|
+
current_directory_path = (
|
159
|
+
output_path if output_path else get_io_file_manager().getcwd()
|
160
|
+
)
|
158
161
|
|
159
162
|
output_directory_path = os.path.join(
|
160
163
|
current_directory_path, SNOWPARK_CHECKPOINTS_OUTPUT_DIRECTORY_NAME
|
161
164
|
)
|
162
165
|
|
163
|
-
if not
|
166
|
+
if not get_io_file_manager().folder_exists(output_directory_path):
|
164
167
|
raise ValueError(
|
165
168
|
"""Output directory snowpark-checkpoints-output does not exist.
|
166
169
|
Please run the Snowpark checkpoint collector first."""
|
@@ -171,14 +174,14 @@ Please run the Snowpark checkpoint collector first."""
|
|
171
174
|
CHECKPOINT_JSON_OUTPUT_FILE_FORMAT_NAME.format(checkpoint_name),
|
172
175
|
)
|
173
176
|
|
174
|
-
if not
|
177
|
+
if not get_io_file_manager().file_exists(checkpoint_schema_file_path):
|
175
178
|
raise ValueError(
|
176
179
|
f"Checkpoint {checkpoint_name} JSON file not found. Please run the Snowpark checkpoint collector first."
|
177
180
|
)
|
178
181
|
|
179
182
|
LOGGER.info("Reading schema from file: '%s'", checkpoint_schema_file_path)
|
180
|
-
|
181
|
-
|
183
|
+
schema_file = get_io_file_manager().read(checkpoint_schema_file_path)
|
184
|
+
checkpoint_schema_config = json.loads(schema_file)
|
182
185
|
|
183
186
|
if DATAFRAME_PANDERA_SCHEMA_KEY not in checkpoint_schema_config:
|
184
187
|
raise ValueError(
|
@@ -354,7 +357,7 @@ def _get_relative_path(file_path: str) -> str:
|
|
354
357
|
str: The relative path of the file.
|
355
358
|
|
356
359
|
"""
|
357
|
-
current_directory =
|
360
|
+
current_directory = get_io_file_manager().getcwd()
|
358
361
|
return os.path.relpath(file_path, current_directory)
|
359
362
|
|
360
363
|
|
@@ -18,6 +18,7 @@ import os
|
|
18
18
|
|
19
19
|
from typing import Optional
|
20
20
|
|
21
|
+
from snowflake.snowpark_checkpoints.io_utils.io_file_manager import get_io_file_manager
|
21
22
|
from snowflake.snowpark_checkpoints.singleton import Singleton
|
22
23
|
from snowflake.snowpark_checkpoints.utils.constants import (
|
23
24
|
SNOWPARK_CHECKPOINTS_OUTPUT_DIRECTORY_NAME,
|
@@ -67,7 +68,9 @@ class ValidationResultsMetadata(metaclass=Singleton):
|
|
67
68
|
Exception: If there is an error reading the validation results file.
|
68
69
|
|
69
70
|
"""
|
70
|
-
self.validation_results_directory =
|
71
|
+
self.validation_results_directory = (
|
72
|
+
path if path else get_io_file_manager().getcwd()
|
73
|
+
)
|
71
74
|
self.validation_results_directory = os.path.join(
|
72
75
|
self.validation_results_directory,
|
73
76
|
SNOWPARK_CHECKPOINTS_OUTPUT_DIRECTORY_NAME,
|
@@ -89,20 +92,21 @@ class ValidationResultsMetadata(metaclass=Singleton):
|
|
89
92
|
|
90
93
|
self.validation_results = ValidationResults(results=[])
|
91
94
|
|
92
|
-
if
|
95
|
+
if get_io_file_manager().file_exists(self.validation_results_file):
|
93
96
|
LOGGER.info(
|
94
97
|
"Loading validation results from: '%s'", self.validation_results_file
|
95
98
|
)
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
99
|
+
try:
|
100
|
+
validation_result_json = get_io_file_manager().read(
|
101
|
+
self.validation_results_file
|
102
|
+
)
|
103
|
+
self.validation_results = ValidationResults.model_validate_json(
|
104
|
+
validation_result_json
|
105
|
+
)
|
106
|
+
except Exception as e:
|
107
|
+
raise Exception(
|
108
|
+
f"Error reading validation results file: {self.validation_results_file} \n {e}"
|
109
|
+
) from None
|
106
110
|
else:
|
107
111
|
LOGGER.info(
|
108
112
|
"Validation results file not found: '%s'",
|
@@ -115,7 +119,7 @@ class ValidationResultsMetadata(metaclass=Singleton):
|
|
115
119
|
This method empties the validation results list.
|
116
120
|
|
117
121
|
"""
|
118
|
-
if not
|
122
|
+
if not get_io_file_manager().file_exists(self.validation_results_file):
|
119
123
|
LOGGER.info("Cleaning validation results...")
|
120
124
|
self.validation_results.results = []
|
121
125
|
|
@@ -123,7 +127,6 @@ class ValidationResultsMetadata(metaclass=Singleton):
|
|
123
127
|
"""Add a validation result to the pipeline result list.
|
124
128
|
|
125
129
|
Args:
|
126
|
-
checkpoint_name (str): The name of the checkpoint.
|
127
130
|
validation_result (dict): The validation result to be added.
|
128
131
|
|
129
132
|
"""
|
@@ -140,16 +143,17 @@ class ValidationResultsMetadata(metaclass=Singleton):
|
|
140
143
|
OSError: If the directory cannot be created or the file cannot be written.
|
141
144
|
|
142
145
|
"""
|
143
|
-
if not
|
146
|
+
if not get_io_file_manager().folder_exists(self.validation_results_directory):
|
144
147
|
LOGGER.debug(
|
145
148
|
"Validation results directory '%s' does not exist. Creating it...",
|
146
149
|
self.validation_results_directory,
|
147
150
|
)
|
148
|
-
|
151
|
+
get_io_file_manager().mkdir(self.validation_results_directory)
|
149
152
|
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
153
|
+
get_io_file_manager().write(
|
154
|
+
self.validation_results_file, self.validation_results.model_dump_json()
|
155
|
+
)
|
156
|
+
LOGGER.info(
|
157
|
+
"Validation results successfully saved to: '%s'",
|
158
|
+
self.validation_results_file,
|
159
|
+
)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: snowpark-checkpoints-validators
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.3.0
|
4
4
|
Summary: Migration tools for Snowpark
|
5
5
|
Project-URL: Bug Tracker, https://github.com/snowflakedb/snowpark-checkpoints/issues
|
6
6
|
Project-URL: Source code, https://github.com/snowflakedb/snowpark-checkpoints/
|
@@ -27,9 +27,11 @@ Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
|
|
27
27
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
28
28
|
Requires-Python: <3.12,>=3.9
|
29
29
|
Requires-Dist: pandera[io]==0.20.4
|
30
|
+
Requires-Dist: pydantic>=2.0
|
30
31
|
Requires-Dist: snowflake-connector-python[pandas]
|
31
32
|
Requires-Dist: snowflake-snowpark-python>=1.23.0
|
32
33
|
Provides-Extra: development
|
34
|
+
Requires-Dist: certifi==2025.1.31; extra == 'development'
|
33
35
|
Requires-Dist: coverage>=7.6.7; extra == 'development'
|
34
36
|
Requires-Dist: deepdiff==8.1.1; extra == 'development'
|
35
37
|
Requires-Dist: deepdiff>=8.0.0; extra == 'development'
|
@@ -1,22 +1,26 @@
|
|
1
1
|
snowflake/snowpark_checkpoints/__init__.py,sha256=p7fzH3f8foD5nhNJHZ00JT3ODTXJGGkWTd3xRKx-8aQ,1435
|
2
|
-
snowflake/snowpark_checkpoints/__version__.py,sha256=
|
2
|
+
snowflake/snowpark_checkpoints/__version__.py,sha256=kbbDnlkY7JOLNHvfWYkCO_mOBOV9GniMGdxYoQpLhyg,632
|
3
3
|
snowflake/snowpark_checkpoints/checkpoint.py,sha256=i-iDRYbGvQHy9ipW7UxHVhJhQ9BXNSO-bsCcHyg3oLA,22056
|
4
4
|
snowflake/snowpark_checkpoints/errors.py,sha256=9KjzRf8bjDZTTNL4LeySJAwuucDOyz0Ka7EFBKWFpyg,1821
|
5
5
|
snowflake/snowpark_checkpoints/job_context.py,sha256=RMK0g0HrbDVrOAvai4PgsGvsAn_GIo9aFmh-tWlyieY,4183
|
6
6
|
snowflake/snowpark_checkpoints/singleton.py,sha256=7AgIHQBXVRvPBBCkmBplzkdrrm-xVWf_N8svzA2vF8E,836
|
7
7
|
snowflake/snowpark_checkpoints/snowpark_sampler.py,sha256=Qxv-8nRGuf-ab3GoSUt8_MNL0ppjoBIMOFIMkqmwN5I,4668
|
8
8
|
snowflake/snowpark_checkpoints/spark_migration.py,sha256=s2HqomYx76Hqn71g9TleBeHI3t1nirgfPvkggqQQdts,10253
|
9
|
-
snowflake/snowpark_checkpoints/validation_result_metadata.py,sha256=
|
9
|
+
snowflake/snowpark_checkpoints/validation_result_metadata.py,sha256=5C8f1g-Grs2ydpXiZBLGt5n9cvEHBaw2-CDeb2vnhpg,5847
|
10
10
|
snowflake/snowpark_checkpoints/validation_results.py,sha256=J8OcpNty6hQD8RbAy8xmA0UMbPWfXSmQnHYspWWSisk,1502
|
11
|
+
snowflake/snowpark_checkpoints/io_utils/__init__.py,sha256=fmSEYcBGNASBanNvMVW-uv6hcoYre6kEH35K-RliuiA,954
|
12
|
+
snowflake/snowpark_checkpoints/io_utils/io_default_strategy.py,sha256=VMfdqj4uDgTEinmpC3D0zXncIB9FxWJod1rI-Yt3YVA,1869
|
13
|
+
snowflake/snowpark_checkpoints/io_utils/io_env_strategy.py,sha256=ltG_rxm0CkJFXpskOf__ByZw-C6B9LtycqlyB9EmaJI,3569
|
14
|
+
snowflake/snowpark_checkpoints/io_utils/io_file_manager.py,sha256=YHrxRBzTlhIUrSFrsoWkRY_Qa-TXgDWglr00T98Tc5g,2485
|
11
15
|
snowflake/snowpark_checkpoints/utils/__init__.py,sha256=I4srmZ8G1q9DU6Suo1S91aVfNvETyisKH95uvLAvEJ0,609
|
12
16
|
snowflake/snowpark_checkpoints/utils/constants.py,sha256=pgFttLDQ6fTa6obSdvivWBYClS21ap41YVDNGAS4sxY,4146
|
13
|
-
snowflake/snowpark_checkpoints/utils/extra_config.py,sha256=
|
17
|
+
snowflake/snowpark_checkpoints/utils/extra_config.py,sha256=xOYaG6MfsUCAHI0C_7qWF_m96xcLIZWwrgxY4UlpaZI,4325
|
14
18
|
snowflake/snowpark_checkpoints/utils/logging_utils.py,sha256=yyi6X5DqKeTg0HRhvsH6ymYp2P0wbnyKIzI2RzrQS7k,2278
|
15
19
|
snowflake/snowpark_checkpoints/utils/pandera_check_manager.py,sha256=tQIozLO-2kM8WZ-gGKfRwmXBx1cDPaIZB0qIcArp8xA,16100
|
16
20
|
snowflake/snowpark_checkpoints/utils/supported_types.py,sha256=GrMX2tHdSFnK7LlPbZx20UufD6Br6TNVRkkBwIxdPy0,1433
|
17
|
-
snowflake/snowpark_checkpoints/utils/telemetry.py,sha256=
|
18
|
-
snowflake/snowpark_checkpoints/utils/utils_checks.py,sha256=
|
19
|
-
snowpark_checkpoints_validators-0.
|
20
|
-
snowpark_checkpoints_validators-0.
|
21
|
-
snowpark_checkpoints_validators-0.
|
22
|
-
snowpark_checkpoints_validators-0.
|
21
|
+
snowflake/snowpark_checkpoints/utils/telemetry.py,sha256=GfuyIaI3QG4a4_qWwyJHvWRM0GENunNexuEJ6IgscF4,32684
|
22
|
+
snowflake/snowpark_checkpoints/utils/utils_checks.py,sha256=oQ1c4n-uAA2kFIpWIRPWhbCW8e-wwOIL8qDqLvr5Fok,14398
|
23
|
+
snowpark_checkpoints_validators-0.3.0.dist-info/METADATA,sha256=RbOlEHK5kumiBPP2S7-7k7zxzzLYag7Yb6TtQeOYbV0,11557
|
24
|
+
snowpark_checkpoints_validators-0.3.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
25
|
+
snowpark_checkpoints_validators-0.3.0.dist-info/licenses/LICENSE,sha256=pmjhbh6uVhV5MBXOlou_UZgFP7CYVQITkCCdvfcS5lY,11340
|
26
|
+
snowpark_checkpoints_validators-0.3.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|