snowpark-checkpoints-collectors 0.2.0rc1__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/snowpark_checkpoints_collector/__init__.py +30 -0
- snowflake/snowpark_checkpoints_collector/__version__.py +16 -0
- snowflake/snowpark_checkpoints_collector/collection_common.py +160 -0
- snowflake/snowpark_checkpoints_collector/collection_result/model/__init__.py +24 -0
- snowflake/snowpark_checkpoints_collector/collection_result/model/collection_point_result.py +91 -0
- snowflake/snowpark_checkpoints_collector/collection_result/model/collection_point_result_manager.py +76 -0
- snowflake/snowpark_checkpoints_collector/column_collection/__init__.py +22 -0
- snowflake/snowpark_checkpoints_collector/column_collection/column_collector_manager.py +276 -0
- snowflake/snowpark_checkpoints_collector/column_collection/model/__init__.py +75 -0
- snowflake/snowpark_checkpoints_collector/column_collection/model/array_column_collector.py +113 -0
- snowflake/snowpark_checkpoints_collector/column_collection/model/binary_column_collector.py +87 -0
- snowflake/snowpark_checkpoints_collector/column_collection/model/boolean_column_collector.py +71 -0
- snowflake/snowpark_checkpoints_collector/column_collection/model/column_collector_base.py +95 -0
- snowflake/snowpark_checkpoints_collector/column_collection/model/date_column_collector.py +74 -0
- snowflake/snowpark_checkpoints_collector/column_collection/model/day_time_interval_column_collector.py +67 -0
- snowflake/snowpark_checkpoints_collector/column_collection/model/decimal_column_collector.py +92 -0
- snowflake/snowpark_checkpoints_collector/column_collection/model/empty_column_collector.py +88 -0
- snowflake/snowpark_checkpoints_collector/column_collection/model/map_column_collector.py +120 -0
- snowflake/snowpark_checkpoints_collector/column_collection/model/null_column_collector.py +49 -0
- snowflake/snowpark_checkpoints_collector/column_collection/model/numeric_column_collector.py +108 -0
- snowflake/snowpark_checkpoints_collector/column_collection/model/string_column_collector.py +70 -0
- snowflake/snowpark_checkpoints_collector/column_collection/model/struct_column_collector.py +102 -0
- snowflake/snowpark_checkpoints_collector/column_collection/model/timestamp_column_collector.py +75 -0
- snowflake/snowpark_checkpoints_collector/column_collection/model/timestamp_ntz_column_collector.py +75 -0
- snowflake/snowpark_checkpoints_collector/column_pandera_checks/__init__.py +20 -0
- snowflake/snowpark_checkpoints_collector/column_pandera_checks/pandera_column_checks_manager.py +241 -0
- snowflake/snowpark_checkpoints_collector/io_utils/__init__.py +26 -0
- snowflake/snowpark_checkpoints_collector/io_utils/io_default_strategy.py +61 -0
- snowflake/snowpark_checkpoints_collector/io_utils/io_env_strategy.py +142 -0
- snowflake/snowpark_checkpoints_collector/io_utils/io_file_manager.py +79 -0
- snowflake/snowpark_checkpoints_collector/singleton.py +23 -0
- snowflake/snowpark_checkpoints_collector/snow_connection_model/__init__.py +20 -0
- snowflake/snowpark_checkpoints_collector/snow_connection_model/snow_connection.py +203 -0
- snowflake/snowpark_checkpoints_collector/summary_stats_collector.py +409 -0
- snowflake/snowpark_checkpoints_collector/utils/checkpoint_name_utils.py +53 -0
- snowflake/snowpark_checkpoints_collector/utils/extra_config.py +164 -0
- snowflake/snowpark_checkpoints_collector/utils/file_utils.py +137 -0
- snowflake/snowpark_checkpoints_collector/utils/logging_utils.py +67 -0
- snowflake/snowpark_checkpoints_collector/utils/telemetry.py +928 -0
- snowpark_checkpoints_collectors-0.3.0.dist-info/METADATA +159 -0
- snowpark_checkpoints_collectors-0.3.0.dist-info/RECORD +43 -0
- {snowpark_checkpoints_collectors-0.2.0rc1.dist-info → snowpark_checkpoints_collectors-0.3.0.dist-info}/licenses/LICENSE +0 -25
- snowpark_checkpoints_collectors-0.2.0rc1.dist-info/METADATA +0 -347
- snowpark_checkpoints_collectors-0.2.0rc1.dist-info/RECORD +0 -4
- {snowpark_checkpoints_collectors-0.2.0rc1.dist-info → snowpark_checkpoints_collectors-0.3.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,164 @@
|
|
1
|
+
# Copyright 2025 Snowflake Inc.
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
3
|
+
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
# you may not use this file except in compliance with the License.
|
6
|
+
# You may obtain a copy of the License at
|
7
|
+
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
15
|
+
|
16
|
+
import logging
|
17
|
+
import os
|
18
|
+
|
19
|
+
from typing import Optional
|
20
|
+
|
21
|
+
from snowflake.snowpark_checkpoints_collector.collection_common import (
|
22
|
+
SNOWFLAKE_CHECKPOINT_CONTRACT_FILE_PATH_ENV_VAR,
|
23
|
+
CheckpointMode,
|
24
|
+
)
|
25
|
+
from snowflake.snowpark_checkpoints_collector.io_utils.io_file_manager import (
|
26
|
+
get_io_file_manager,
|
27
|
+
)
|
28
|
+
|
29
|
+
|
30
|
+
LOGGER = logging.getLogger(__name__)
|
31
|
+
|
32
|
+
# noinspection DuplicatedCode
|
33
|
+
def _get_checkpoint_contract_file_path() -> str:
|
34
|
+
return os.environ.get(
|
35
|
+
SNOWFLAKE_CHECKPOINT_CONTRACT_FILE_PATH_ENV_VAR, get_io_file_manager().getcwd()
|
36
|
+
)
|
37
|
+
|
38
|
+
|
39
|
+
def _set_conf_io_strategy() -> None:
|
40
|
+
try:
|
41
|
+
from snowflake.snowpark_checkpoints_collector.io_utils.io_default_strategy import (
|
42
|
+
IODefaultStrategy,
|
43
|
+
)
|
44
|
+
from snowflake.snowpark_checkpoints_configuration.io_utils.io_file_manager import (
|
45
|
+
EnvStrategy as ConfEnvStrategy,
|
46
|
+
)
|
47
|
+
from snowflake.snowpark_checkpoints_configuration.io_utils.io_file_manager import (
|
48
|
+
get_io_file_manager as get_conf_io_file_manager,
|
49
|
+
)
|
50
|
+
|
51
|
+
is_default_strategy = isinstance(
|
52
|
+
get_io_file_manager().strategy, IODefaultStrategy
|
53
|
+
)
|
54
|
+
|
55
|
+
if is_default_strategy:
|
56
|
+
return
|
57
|
+
|
58
|
+
class CustomConfEnvStrategy(ConfEnvStrategy):
|
59
|
+
def file_exists(self, path: str) -> bool:
|
60
|
+
return get_io_file_manager().file_exists(path)
|
61
|
+
|
62
|
+
def read(
|
63
|
+
self, file_path: str, mode: str = "r", encoding: Optional[str] = None
|
64
|
+
) -> Optional[str]:
|
65
|
+
return get_io_file_manager().read(file_path, mode, encoding)
|
66
|
+
|
67
|
+
def getcwd(self) -> str:
|
68
|
+
return get_io_file_manager().getcwd()
|
69
|
+
|
70
|
+
get_conf_io_file_manager().set_strategy(CustomConfEnvStrategy())
|
71
|
+
|
72
|
+
except ImportError:
|
73
|
+
LOGGER.debug(
|
74
|
+
"snowpark-checkpoints-configuration is not installed. Cannot get a checkpoint metadata instance."
|
75
|
+
)
|
76
|
+
|
77
|
+
|
78
|
+
# noinspection DuplicatedCode
|
79
|
+
def _get_metadata():
|
80
|
+
try:
|
81
|
+
from snowflake.snowpark_checkpoints_configuration.checkpoint_metadata import (
|
82
|
+
CheckpointMetadata,
|
83
|
+
)
|
84
|
+
|
85
|
+
_set_conf_io_strategy()
|
86
|
+
path = _get_checkpoint_contract_file_path()
|
87
|
+
LOGGER.debug("Loading checkpoint metadata from '%s'", path)
|
88
|
+
metadata = CheckpointMetadata(path)
|
89
|
+
return True, metadata
|
90
|
+
|
91
|
+
except ImportError:
|
92
|
+
LOGGER.debug(
|
93
|
+
"snowpark-checkpoints-configuration is not installed. Cannot get a checkpoint metadata instance."
|
94
|
+
)
|
95
|
+
return False, None
|
96
|
+
|
97
|
+
|
98
|
+
def is_checkpoint_enabled(checkpoint_name: str) -> bool:
|
99
|
+
"""Check if a checkpoint is enabled.
|
100
|
+
|
101
|
+
Args:
|
102
|
+
checkpoint_name (str): The name of the checkpoint.
|
103
|
+
|
104
|
+
Returns:
|
105
|
+
bool: True if the checkpoint is enabled, False otherwise.
|
106
|
+
|
107
|
+
"""
|
108
|
+
enabled, metadata = _get_metadata()
|
109
|
+
if enabled:
|
110
|
+
config = metadata.get_checkpoint(checkpoint_name)
|
111
|
+
return config.enabled
|
112
|
+
return True
|
113
|
+
|
114
|
+
|
115
|
+
def get_checkpoint_sample(
|
116
|
+
checkpoint_name: str, sample: Optional[float] = None
|
117
|
+
) -> float:
|
118
|
+
"""Get the checkpoint sample.
|
119
|
+
|
120
|
+
Following this order first, the sample passed as argument, second, the sample from the checkpoint configuration,
|
121
|
+
third, the default sample value 1.0.
|
122
|
+
|
123
|
+
Args:
|
124
|
+
checkpoint_name (str): The name of the checkpoint.
|
125
|
+
sample (float, optional): The value passed to the function.
|
126
|
+
|
127
|
+
Returns:
|
128
|
+
float: returns the sample for that specific checkpoint.
|
129
|
+
|
130
|
+
"""
|
131
|
+
default_sample = 1.0
|
132
|
+
|
133
|
+
enabled, metadata = _get_metadata()
|
134
|
+
if enabled:
|
135
|
+
config = metadata.get_checkpoint(checkpoint_name)
|
136
|
+
default_sample = config.sample if config.sample is not None else default_sample
|
137
|
+
|
138
|
+
return sample if sample is not None else default_sample
|
139
|
+
|
140
|
+
|
141
|
+
def get_checkpoint_mode(
|
142
|
+
checkpoint_name: str, mode: Optional[CheckpointMode] = None
|
143
|
+
) -> CheckpointMode:
|
144
|
+
"""Get the checkpoint mode.
|
145
|
+
|
146
|
+
Following this order first, the mode passed as argument, second, the mode from the checkpoint configuration,
|
147
|
+
third, the default mode value 1.
|
148
|
+
|
149
|
+
Args:
|
150
|
+
checkpoint_name (str): The name of the checkpoint.
|
151
|
+
mode (int, optional): The value passed to the function.
|
152
|
+
|
153
|
+
Returns:
|
154
|
+
int: returns the mode for that specific checkpoint.
|
155
|
+
|
156
|
+
"""
|
157
|
+
default_mode = CheckpointMode.SCHEMA
|
158
|
+
|
159
|
+
enabled, metadata = _get_metadata()
|
160
|
+
if enabled:
|
161
|
+
config = metadata.get_checkpoint(checkpoint_name)
|
162
|
+
default_mode = config.mode if config.mode is not None else default_mode
|
163
|
+
|
164
|
+
return mode if mode is not None else default_mode
|
@@ -0,0 +1,137 @@
|
|
1
|
+
# Copyright 2025 Snowflake Inc.
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
3
|
+
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
# you may not use this file except in compliance with the License.
|
6
|
+
# You may obtain a copy of the License at
|
7
|
+
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
15
|
+
import inspect
|
16
|
+
import os
|
17
|
+
import tempfile
|
18
|
+
|
19
|
+
from typing import Optional
|
20
|
+
|
21
|
+
from snowflake.snowpark_checkpoints_collector.collection_common import (
|
22
|
+
COLLECTION_RESULT_FILE_NAME,
|
23
|
+
DOT_IPYNB_EXTENSION,
|
24
|
+
SNOWPARK_CHECKPOINTS_OUTPUT_DIRECTORY_NAME,
|
25
|
+
UNKNOWN_LINE_OF_CODE,
|
26
|
+
UNKNOWN_SOURCE_FILE,
|
27
|
+
)
|
28
|
+
from snowflake.snowpark_checkpoints_collector.io_utils.io_file_manager import (
|
29
|
+
get_io_file_manager,
|
30
|
+
)
|
31
|
+
|
32
|
+
|
33
|
+
def get_output_file_path(out_path: Optional[str] = None) -> str:
|
34
|
+
"""Get the output file path.
|
35
|
+
|
36
|
+
Args:
|
37
|
+
out_path (Optional[str], optional): the output path. Defaults to None.
|
38
|
+
|
39
|
+
Returns:
|
40
|
+
str: returns the output file path.
|
41
|
+
|
42
|
+
"""
|
43
|
+
output_directory_path = get_output_directory_path(out_path)
|
44
|
+
output_file_path = os.path.join(output_directory_path, COLLECTION_RESULT_FILE_NAME)
|
45
|
+
return output_file_path
|
46
|
+
|
47
|
+
|
48
|
+
def get_relative_file_path(path: str) -> str:
|
49
|
+
"""Get the relative file path.
|
50
|
+
|
51
|
+
Args:
|
52
|
+
path (str): a file path.
|
53
|
+
|
54
|
+
Returns:
|
55
|
+
str: returns the relative file path of the given file.
|
56
|
+
|
57
|
+
"""
|
58
|
+
relative_file_path = os.path.relpath(path)
|
59
|
+
return relative_file_path
|
60
|
+
|
61
|
+
|
62
|
+
def get_output_directory_path(output_path: Optional[str] = None) -> str:
|
63
|
+
"""Get the output directory path.
|
64
|
+
|
65
|
+
Returns:
|
66
|
+
str: returns the output directory path.
|
67
|
+
|
68
|
+
"""
|
69
|
+
current_working_directory_path = (
|
70
|
+
output_path if output_path else get_io_file_manager().getcwd()
|
71
|
+
)
|
72
|
+
checkpoints_output_directory_path = os.path.join(
|
73
|
+
current_working_directory_path, SNOWPARK_CHECKPOINTS_OUTPUT_DIRECTORY_NAME
|
74
|
+
)
|
75
|
+
get_io_file_manager().mkdir(checkpoints_output_directory_path, exist_ok=True)
|
76
|
+
return checkpoints_output_directory_path
|
77
|
+
|
78
|
+
|
79
|
+
def get_collection_point_source_file_path() -> str:
|
80
|
+
"""Get the path of the source file where collection point it is.
|
81
|
+
|
82
|
+
Returns:
|
83
|
+
str: returns the path of source file where collection point it is.
|
84
|
+
|
85
|
+
"""
|
86
|
+
try:
|
87
|
+
collection_point_file_path = inspect.stack()[2].filename
|
88
|
+
is_temporal_file_path = _is_temporal_path(collection_point_file_path)
|
89
|
+
if is_temporal_file_path:
|
90
|
+
ipynb_file_path_collection = _get_ipynb_file_path_collection()
|
91
|
+
if len(ipynb_file_path_collection) == 1:
|
92
|
+
collection_point_file_path = ipynb_file_path_collection[0]
|
93
|
+
else:
|
94
|
+
collection_point_file_path = UNKNOWN_SOURCE_FILE
|
95
|
+
|
96
|
+
return collection_point_file_path
|
97
|
+
|
98
|
+
except Exception:
|
99
|
+
return UNKNOWN_SOURCE_FILE
|
100
|
+
|
101
|
+
|
102
|
+
def get_collection_point_line_of_code() -> int:
|
103
|
+
"""Find the line of code of the source file where collection point it is.
|
104
|
+
|
105
|
+
Returns:
|
106
|
+
int: returns the line of code of the source file where collection point it is.
|
107
|
+
|
108
|
+
"""
|
109
|
+
try:
|
110
|
+
collection_point_file_path = inspect.stack()[2].filename
|
111
|
+
collection_point_line_of_code = inspect.stack()[2].lineno
|
112
|
+
is_temporal_file_path = _is_temporal_path(collection_point_file_path)
|
113
|
+
if is_temporal_file_path:
|
114
|
+
collection_point_line_of_code = UNKNOWN_LINE_OF_CODE
|
115
|
+
return collection_point_line_of_code
|
116
|
+
|
117
|
+
except Exception:
|
118
|
+
return UNKNOWN_LINE_OF_CODE
|
119
|
+
|
120
|
+
|
121
|
+
def _is_temporal_path(path: str) -> bool:
|
122
|
+
temporal_directory_path = tempfile.gettempdir()
|
123
|
+
is_temporal_path = path.startswith(temporal_directory_path)
|
124
|
+
return is_temporal_path
|
125
|
+
|
126
|
+
|
127
|
+
def _get_ipynb_file_path_collection() -> list[str]:
|
128
|
+
current_working_directory_path = get_io_file_manager().getcwd()
|
129
|
+
cwd_file_name_collection = get_io_file_manager().ls(current_working_directory_path)
|
130
|
+
ipynb_file_path_collection = []
|
131
|
+
for file_name in cwd_file_name_collection:
|
132
|
+
is_ipynb_file = file_name.endswith(DOT_IPYNB_EXTENSION)
|
133
|
+
if is_ipynb_file:
|
134
|
+
file_path = os.path.join(current_working_directory_path, file_name)
|
135
|
+
ipynb_file_path_collection.append(file_path)
|
136
|
+
|
137
|
+
return ipynb_file_path_collection
|
@@ -0,0 +1,67 @@
|
|
1
|
+
# Copyright 2025 Snowflake Inc.
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
3
|
+
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
# you may not use this file except in compliance with the License.
|
6
|
+
# You may obtain a copy of the License at
|
7
|
+
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
15
|
+
|
16
|
+
import logging
|
17
|
+
|
18
|
+
from functools import wraps
|
19
|
+
from typing import Callable, Optional, TypeVar
|
20
|
+
|
21
|
+
from typing_extensions import ParamSpec
|
22
|
+
|
23
|
+
|
24
|
+
P = ParamSpec("P")
|
25
|
+
R = TypeVar("R")
|
26
|
+
|
27
|
+
|
28
|
+
def log(
|
29
|
+
_func: Optional[Callable[P, R]] = None,
|
30
|
+
*,
|
31
|
+
logger: Optional[logging.Logger] = None,
|
32
|
+
log_args: bool = True,
|
33
|
+
) -> Callable[[Callable[P, R]], Callable[P, R]]:
|
34
|
+
"""Log the function call and any exceptions that occur.
|
35
|
+
|
36
|
+
Args:
|
37
|
+
_func: The function to log.
|
38
|
+
logger: The logger to use for logging. If not provided, a logger will be created using the
|
39
|
+
function's module name.
|
40
|
+
log_args: Whether to log the arguments passed to the function.
|
41
|
+
|
42
|
+
Returns:
|
43
|
+
A decorator that logs the function call and any exceptions that occur.
|
44
|
+
|
45
|
+
"""
|
46
|
+
|
47
|
+
def decorator(func: Callable[P, R]) -> Callable[P, R]:
|
48
|
+
@wraps(func)
|
49
|
+
def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
|
50
|
+
_logger = logging.getLogger(func.__module__) if logger is None else logger
|
51
|
+
if log_args:
|
52
|
+
args_repr = [repr(a) for a in args]
|
53
|
+
kwargs_repr = [f"{k}={v!r}" for k, v in kwargs.items()]
|
54
|
+
formatted_args = ", ".join([*args_repr, *kwargs_repr])
|
55
|
+
_logger.debug("%s called with args %s", func.__name__, formatted_args)
|
56
|
+
try:
|
57
|
+
return func(*args, **kwargs)
|
58
|
+
except Exception:
|
59
|
+
_logger.exception("An error occurred in %s", func.__name__)
|
60
|
+
raise
|
61
|
+
|
62
|
+
return wrapper
|
63
|
+
|
64
|
+
# Handle the case where the decorator is used without parentheses
|
65
|
+
if _func is None:
|
66
|
+
return decorator
|
67
|
+
return decorator(_func)
|