PyPI - snowpark-checkpoints-collectors - Versions diffs - 0.1.3__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

snowpark-checkpoints-collectors 0.1.3py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

snowflake/snowpark_checkpoints_collector/__init__.py CHANGED Viewed

@@ -13,10 +13,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import logging
+# Add a NullHandler to prevent logging messages from being output to
+# sys.stderr if no logging configuration is provided.
+logging.getLogger(__name__).addHandler(logging.NullHandler())
+# ruff: noqa: E402
 __all__ = ["collect_dataframe_checkpoint", "CheckpointMode"]
+from snowflake.snowpark_checkpoints_collector.collection_common import CheckpointMode
 from snowflake.snowpark_checkpoints_collector.summary_stats_collector import (
     collect_dataframe_checkpoint,
 )
-from snowflake.snowpark_checkpoints_collector.collection_common import CheckpointMode

snowflake/snowpark_checkpoints_collector/__version__.py CHANGED Viewed

@@ -13,4 +13,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__version__ = "0.1.3"
+__version__ = "0.2.0"

snowflake/snowpark_checkpoints_collector/collection_result/model/collection_point_result_manager.py CHANGED Viewed

@@ -12,7 +12,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import json
+import logging
 from typing import Optional
@@ -24,6 +26,7 @@ from snowflake.snowpark_checkpoints_collector.utils import file_utils
 RESULTS_KEY = "results"
+LOGGER = logging.getLogger(__name__)
 class CollectionPointResultManager(metaclass=Singleton):
@@ -49,6 +52,7 @@ class CollectionPointResultManager(metaclass=Singleton):
         """
         result_json = result.get_collection_result_data()
+        LOGGER.debug("Adding a new collection result: %s", result_json)
         self.result_collection.append(result_json)
         self._save_result()
@@ -65,5 +69,6 @@ class CollectionPointResultManager(metaclass=Singleton):
     def _save_result(self) -> None:
         result_collection_json = self.to_json()
+        LOGGER.info("Saving collection results to '%s'", self.output_file_path)
         with open(self.output_file_path, "w") as f:
             f.write(result_collection_json)

snowflake/snowpark_checkpoints_collector/column_collection/column_collector_manager.py CHANGED Viewed

@@ -12,6 +12,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import logging
 from pyspark.sql import DataFrame as SparkDataFrame
 from pyspark.sql.types import StructField
@@ -53,6 +56,9 @@ from snowflake.snowpark_checkpoints_collector.column_collection.model import (
 )
+LOGGER = logging.getLogger(__name__)
 def collector_register(cls):
     """Decorate a class with the collection type mechanism.
@@ -63,6 +69,7 @@ def collector_register(cls):
         The class to decorate.
     """
+    LOGGER.debug("Starting to register collectors from class %s", cls.__name__)
     cls._collectors = {}
     for method_name in dir(cls):
         method = getattr(cls, method_name)
@@ -70,6 +77,11 @@ def collector_register(cls):
             col_type_collection = method._column_type
             for col_type in col_type_collection:
                 cls._collectors[col_type] = method_name
+                LOGGER.debug(
+                    "Registered collector '%s' for column type '%s'",
+                    method_name,
+                    col_type,
+                )
     return cls
@@ -114,10 +126,21 @@ class ColumnCollectorManager:
         """
         clm_type = struct_field.dataType.typeName()
         if clm_type not in self._collectors:
+            LOGGER.debug(
+                "No collectors found for column '%s' of type '%s'. Skipping collection for this column.",
+                clm_name,
+                clm_type,
+            )
             return {}
         func_name = self._collectors[clm_type]
         func = getattr(self, func_name)
+        LOGGER.debug(
+            "Collecting custom data for column '%s' of type '%s' using collector method '%s'",
+            clm_name,
+            clm_type,
+            func_name,
+        )
         data = func(clm_name, struct_field, values)
         return data

snowflake/snowpark_checkpoints_collector/column_pandera_checks/pandera_column_checks_manager.py CHANGED Viewed

@@ -12,6 +12,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import logging
 import pandas as pd
 from pandera import Check, Column
@@ -39,6 +42,9 @@ from snowflake.snowpark_checkpoints_collector.collection_common import (
 )
+LOGGER = logging.getLogger(__name__)
 def collector_register(cls):
     """Decorate a class with the checks mechanism.
@@ -49,6 +55,7 @@ def collector_register(cls):
         The class to decorate.
     """
+    LOGGER.debug("Starting to register checks from class %s", cls.__name__)
     cls._collectors = {}
     for method_name in dir(cls):
         method = getattr(cls, method_name)
@@ -56,6 +63,9 @@ def collector_register(cls):
             col_type_collection = method._column_type
             for col_type in col_type_collection:
                 cls._collectors[col_type] = method_name
+                LOGGER.debug(
+                    "Registered check '%s' for column type '%s'", method_name, col_type
+                )
     return cls
@@ -101,10 +111,18 @@ class PanderaColumnChecksManager:
         """
         if clm_type not in self._collectors:
+            LOGGER.debug(
+                "No Pandera checks found for column '%s' of type '%s'. Skipping checks for this column.",
+                clm_name,
+                clm_type,
+            )
             return
         func_name = self._collectors[clm_type]
         func = getattr(self, func_name)
+        LOGGER.debug(
+            "Adding Pandera checks to column '%s' of type '%s'", clm_name, clm_type
+        )
         func(clm_name, pyspark_df, pandera_column)
     @column_register(BOOLEAN_COLUMN_TYPE)

snowflake/snowpark_checkpoints_collector/snow_connection_model/snow_connection.py CHANGED Viewed

@@ -12,7 +12,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import glob
+import logging
 import os.path
 import time
@@ -30,6 +32,7 @@ CREATE_STAGE_STATEMENT_FORMAT = "CREATE TEMP STAGE IF NOT EXISTS {}"
 REMOVE_STAGE_FOLDER_STATEMENT_FORMAT = "REMOVE {}"
 STAGE_PATH_FORMAT = "'@{}/{}'"
 PUT_FILE_IN_STAGE_STATEMENT_FORMAT = "PUT '{}' {} AUTO_COMPRESS=FALSE"
+LOGGER = logging.getLogger(__name__)
 class SnowConnection:
@@ -41,14 +44,16 @@ class SnowConnection:
     """
-    def __init__(self, session: Session = None) -> None:
+    def __init__(self, session: Optional[Session] = None) -> None:
         """Init SnowConnection.
         Args:
             session (Snowpark.Session): the Snowpark session.
         """
-        self.session = session if session is not None else Session.builder.getOrCreate()
+        self.session = (
+            session if session is not None else self._create_snowpark_session()
+        )
         self.stage_id = int(time.time())
     def create_snowflake_table_from_local_parquet(
@@ -84,8 +89,8 @@ class SnowConnection:
                 stage_name, stage_path, input_path, is_parquet_file
             )
             self.create_table_from_parquet(table_name, stage_directory_path)
         finally:
+            LOGGER.info("Removing stage folder %s", stage_directory_path)
             self.session.sql(
                 REMOVE_STAGE_FOLDER_STATEMENT_FORMAT.format(stage_directory_path)
             ).collect()
@@ -98,6 +103,7 @@ class SnowConnection:
         """
         create_stage_statement = CREATE_STAGE_STATEMENT_FORMAT.format(stage_name)
+        LOGGER.info("Creating temporal stage '%s'", stage_name)
         self.session.sql(create_stage_statement).collect()
     def load_files_to_stage(
@@ -105,7 +111,7 @@ class SnowConnection:
         stage_name: str,
         folder_name: str,
         input_path: str,
-        filter_func: Callable = None,
+        filter_func: Optional[Callable] = None,
     ) -> None:
         """Load files to a stage in Snowflake.
@@ -116,6 +122,7 @@ class SnowConnection:
             filter_func (Callable): the filter function to apply to the files.
         """
+        LOGGER.info("Starting to load files to '%s'", stage_name)
         input_path = (
             os.path.abspath(input_path)
             if not os.path.isabs(input_path)
@@ -126,16 +133,20 @@ class SnowConnection:
             return os.path.isfile(name) and (filter_func(name) if filter_func else True)
         target_dir = os.path.join(input_path, "**", "*")
+        LOGGER.debug("Searching for files in '%s'", input_path)
         files_collection = glob.glob(target_dir, recursive=True)
         files = [file for file in files_collection if filter_files(file)]
+        files_count = len(files)
-        if len(files) == 0:
+        if files_count == 0:
             raise Exception(f"No files were found in the input directory: {input_path}")
+        LOGGER.debug("Found %s files in '%s'", files_count, input_path)
         for file in files:
             # if file is relative path, convert to absolute path
-            # if absolute path, then try to resolve as some Win32 paths are  not in LPN.
+            # if absolute path, then try to resolve as some Win32 paths are not in LPN.
             file_full_path = (
                 str(os.path.abspath(file))
                 if not os.path.isabs(file)
@@ -150,6 +161,7 @@ class SnowConnection:
             put_statement = PUT_FILE_IN_STAGE_STATEMENT_FORMAT.format(
                 normalize_file_path, stage_file_path
             )
+            LOGGER.info("Loading file '%s' to %s", file_full_path, stage_file_path)
             self.session.sql(put_statement).collect()
     def create_table_from_parquet(
@@ -165,8 +177,25 @@ class SnowConnection:
             Exception: No parquet files were found in the stage
         """
-        files = self.session.sql(f"LIST {stage_directory_path}").collect()
-        if len(files) == 0:
-            raise Exception("No parquet files were found in the stage.")
+        LOGGER.info("Starting to create table '%s' from parquet files", table_name)
+        parquet_files = self.session.sql(
+            f"LIST {stage_directory_path} PATTERN='.*{DOT_PARQUET_EXTENSION}'"
+        ).collect()
+        parquet_files_count = len(parquet_files)
+        if parquet_files_count == 0:
+            raise Exception(
+                f"No parquet files were found in the stage: {stage_directory_path}"
+            )
+        LOGGER.info(
+            "Reading %s parquet files from %s",
+            parquet_files_count,
+            stage_directory_path,
+        )
         dataframe = self.session.read.parquet(path=stage_directory_path)
+        LOGGER.info("Creating table '%s' from parquet files", table_name)
         dataframe.write.save_as_table(table_name=table_name, mode="overwrite")
+    def _create_snowpark_session(self) -> Session:
+        LOGGER.info("Creating a Snowpark session using the default connection")
+        return Session.builder.getOrCreate()

snowflake/snowpark_checkpoints_collector/summary_stats_collector.py CHANGED Viewed

@@ -12,8 +12,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import glob
 import json
+import logging
 import os
 import shutil
@@ -64,9 +66,14 @@ from snowflake.snowpark_checkpoints_collector.utils.extra_config import (
     get_checkpoint_sample,
     is_checkpoint_enabled,
 )
+from snowflake.snowpark_checkpoints_collector.utils.logging_utils import log
 from snowflake.snowpark_checkpoints_collector.utils.telemetry import report_telemetry
+LOGGER = logging.getLogger(__name__)
+@log
 def collect_dataframe_checkpoint(
     df: SparkDataFrame,
     checkpoint_name: str,
@@ -91,80 +98,90 @@ def collect_dataframe_checkpoint(
         Exception: Invalid checkpoint name. Checkpoint names must only contain alphanumeric characters and underscores.
     """
-    try:
-        normalized_checkpoint_name = checkpoint_name_utils.normalize_checkpoint_name(
-            checkpoint_name
+    normalized_checkpoint_name = checkpoint_name_utils.normalize_checkpoint_name(
+        checkpoint_name
+    )
+    if normalized_checkpoint_name != checkpoint_name:
+        LOGGER.info(
+            "Checkpoint name '%s' was normalized to '%s'",
+            checkpoint_name,
+            normalized_checkpoint_name,
+        )
+    is_valid_checkpoint_name = checkpoint_name_utils.is_valid_checkpoint_name(
+        normalized_checkpoint_name
+    )
+    if not is_valid_checkpoint_name:
+        raise Exception(
+            f"Invalid checkpoint name: {normalized_checkpoint_name}. "
+            f"Checkpoint names must only contain alphanumeric characters and underscores."
         )
-        is_valid_checkpoint_name = checkpoint_name_utils.is_valid_checkpoint_name(
-            normalized_checkpoint_name
+    if not is_checkpoint_enabled(normalized_checkpoint_name):
+        LOGGER.info(
+            "Checkpoint '%s' is disabled. Skipping collection.",
+            normalized_checkpoint_name,
         )
-        if not is_valid_checkpoint_name:
+        return
+    LOGGER.info("Starting to collect checkpoint '%s'", normalized_checkpoint_name)
+    LOGGER.debug("DataFrame size: %s rows", df.count())
+    LOGGER.debug("DataFrame schema: %s", df.schema)
+    collection_point_file_path = file_utils.get_collection_point_source_file_path()
+    collection_point_line_of_code = file_utils.get_collection_point_line_of_code()
+    collection_point_result = CollectionPointResult(
+        collection_point_file_path,
+        collection_point_line_of_code,
+        normalized_checkpoint_name,
+    )
+    try:
+        if _is_empty_dataframe_without_schema(df):
             raise Exception(
-                f"Invalid checkpoint name: {checkpoint_name}. Checkpoint names must only contain alphanumeric "
-                f"characters and underscores."
+                "It is not possible to collect an empty DataFrame without schema"
             )
-        if is_checkpoint_enabled(normalized_checkpoint_name):
+        _mode = get_checkpoint_mode(normalized_checkpoint_name, mode)
-            collection_point_file_path = (
-                file_utils.get_collection_point_source_file_path()
-            )
-            collection_point_line_of_code = (
-                file_utils.get_collection_point_line_of_code()
+        if _mode == CheckpointMode.SCHEMA:
+            column_type_dict = _get_spark_column_types(df)
+            _sample = get_checkpoint_sample(normalized_checkpoint_name, sample)
+            LOGGER.info(
+                "Collecting checkpoint in %s mode using sample value %s",
+                CheckpointMode.SCHEMA.name,
+                _sample,
             )
-            collection_point_result = CollectionPointResult(
-                collection_point_file_path,
-                collection_point_line_of_code,
+            _collect_dataframe_checkpoint_mode_schema(
                 normalized_checkpoint_name,
+                df,
+                _sample,
+                column_type_dict,
+                output_path,
             )
+        elif _mode == CheckpointMode.DATAFRAME:
+            LOGGER.info(
+                "Collecting checkpoint in %s mode", CheckpointMode.DATAFRAME.name
+            )
+            snow_connection = SnowConnection()
+            _collect_dataframe_checkpoint_mode_dataframe(
+                normalized_checkpoint_name, df, snow_connection, output_path
+            )
+        else:
+            raise Exception(f"Invalid mode value: {_mode}")
-            try:
-                _sample = get_checkpoint_sample(normalized_checkpoint_name, sample)
-                if _is_empty_dataframe_without_schema(df):
-                    raise Exception(
-                        "It is not possible to collect an empty DataFrame without schema"
-                    )
-                _mode = get_checkpoint_mode(normalized_checkpoint_name, mode)
-                if _mode == CheckpointMode.SCHEMA:
-                    column_type_dict = _get_spark_column_types(df)
-                    _collect_dataframe_checkpoint_mode_schema(
-                        normalized_checkpoint_name,
-                        df,
-                        _sample,
-                        column_type_dict,
-                        output_path,
-                    )
-                elif _mode == CheckpointMode.DATAFRAME:
-                    snow_connection = SnowConnection()
-                    _collect_dataframe_checkpoint_mode_dataframe(
-                        normalized_checkpoint_name, df, snow_connection, output_path
-                    )
-                else:
-                    raise Exception("Invalid mode value.")
-                collection_point_result.result = CollectionResult.PASS
-            except Exception as err:
-                collection_point_result.result = CollectionResult.FAIL
-                error_message = str(err)
-                raise Exception(error_message) from err
-            finally:
-                collection_point_result_manager = CollectionPointResultManager(
-                    output_path
-                )
-                collection_point_result_manager.add_result(collection_point_result)
+        collection_point_result.result = CollectionResult.PASS
+        LOGGER.info(
+            "Checkpoint '%s' collected successfully", normalized_checkpoint_name
+        )
     except Exception as err:
+        collection_point_result.result = CollectionResult.FAIL
         error_message = str(err)
         raise Exception(error_message) from err
+    finally:
+        collection_point_result_manager = CollectionPointResultManager(output_path)
+        collection_point_result_manager.add_result(collection_point_result)
 @report_telemetry(params_list=["column_type_dict"])
 def _collect_dataframe_checkpoint_mode_schema(
@@ -176,12 +193,19 @@ def _collect_dataframe_checkpoint_mode_schema(
 ) -> None:
     sampled_df = df.sample(sample)
     if sampled_df.isEmpty():
+        LOGGER.warning("Sampled DataFrame is empty. Collecting full DataFrame.")
         sampled_df = df
     pandas_df = _to_pandas(sampled_df)
     is_empty_df_with_object_column = _is_empty_dataframe_with_object_column(df)
-    pandera_infer_schema = (
-        pa.infer_schema(pandas_df) if not is_empty_df_with_object_column else {}
-    )
+    if is_empty_df_with_object_column:
+        LOGGER.debug(
+            "DataFrame is empty with object column. Skipping Pandera schema inference."
+        )
+        pandera_infer_schema = {}
+    else:
+        LOGGER.debug("Inferring Pandera schema from DataFrame")
+        pandera_infer_schema = pa.infer_schema(pandas_df)
     column_name_collection = df.schema.names
     columns_to_remove_from_pandera_schema_collection = []
@@ -192,19 +216,20 @@ def _collect_dataframe_checkpoint_mode_schema(
     for column_name in column_name_collection:
         struct_field_column = column_type_dict[column_name]
         column_type = struct_field_column.dataType.typeName()
+        LOGGER.info("Collecting column '%s' of type '%s'", column_name, column_type)
         pyspark_column = df.select(col(column_name))
-        is_empty_column = (
-            pyspark_column.dropna().isEmpty() and column_type is not NULL_COLUMN_TYPE
-        )
         is_column_to_remove_from_pandera_schema = (
             _is_column_to_remove_from_pandera_schema(column_type)
         )
         if is_column_to_remove_from_pandera_schema:
             columns_to_remove_from_pandera_schema_collection.append(column_name)
+        is_empty_column = (
+            pyspark_column.dropna().isEmpty() and column_type is not NULL_COLUMN_TYPE
+        )
         if is_empty_column:
+            LOGGER.debug("Column '%s' is empty.", column_name)
             custom_data = column_collector_manager.collect_empty_custom_data(
                 column_name, struct_field_column, pyspark_column
             )
@@ -280,6 +305,7 @@ def _get_pandera_infer_schema_as_dict(
     pandera_infer_schema_dict = json.loads(pandera_infer_schema.to_json())
     for column in columns_to_remove_collection:
+        LOGGER.debug("Removing column '%s' from Pandera schema", column)
         del pandera_infer_schema_dict[COLUMNS_KEY][column]
     return pandera_infer_schema_dict
@@ -293,6 +319,7 @@ def _generate_json_checkpoint_file(
     )
     output_directory_path = file_utils.get_output_directory_path(output_path)
     checkpoint_file_path = os.path.join(output_directory_path, checkpoint_file_name)
+    LOGGER.info("Writing DataFrame JSON schema file to '%s'", checkpoint_file_path)
     with open(checkpoint_file_path, "w") as f:
         f.write(dataframe_schema_contract)
@@ -339,14 +366,24 @@ def generate_parquet_for_spark_df(spark_df: SparkDataFrame, output_path: str) ->
     converted_df = spark_df.select(new_cols)
     if os.path.exists(output_path):
+        LOGGER.warning(
+            "Output directory '%s' already exists. Deleting it...", output_path
+        )
         shutil.rmtree(output_path)
+    LOGGER.info("Writing DataFrame to parquet files at '%s'", output_path)
     converted_df.write.parquet(output_path, mode="overwrite")
     target_dir = os.path.join(output_path, "**", f"*{DOT_PARQUET_EXTENSION}")
-    files = glob.glob(target_dir, recursive=True)
-    if len(files) == 0:
+    parquet_files = glob.glob(target_dir, recursive=True)
+    parquet_files_count = len(parquet_files)
+    if parquet_files_count == 0:
         raise Exception("No parquet files were generated.")
+    LOGGER.info(
+        "%s parquet files were written in '%s'",
+        parquet_files_count,
+        output_path,
+    )
 def _create_snowflake_table_from_parquet(
@@ -356,11 +393,17 @@ def _create_snowflake_table_from_parquet(
 def _to_pandas(sampled_df: SparkDataFrame) -> pandas.DataFrame:
+    LOGGER.debug("Converting Spark DataFrame to Pandas DataFrame")
     pandas_df = sampled_df.toPandas()
     for field in sampled_df.schema.fields:
         has_nan = pandas_df[field.name].isna().any()
         is_integer = field.dataType.typeName() in INTEGER_TYPE_COLLECTION
         if has_nan and is_integer:
+            LOGGER.debug(
+                "Converting column '%s' to '%s' type",
+                field.name,
+                PANDAS_LONG_TYPE,
+            )
             pandas_df[field.name] = pandas_df[field.name].astype(PANDAS_LONG_TYPE)
     return pandas_df

snowflake/snowpark_checkpoints_collector/utils/extra_config.py CHANGED Viewed

@@ -12,6 +12,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import logging
 import os
 from typing import Optional
@@ -22,6 +24,8 @@ from snowflake.snowpark_checkpoints_collector.collection_common import (
 )
+LOGGER = logging.getLogger(__name__)
 # noinspection DuplicatedCode
 def _get_checkpoint_contract_file_path() -> str:
     return os.environ.get(SNOWFLAKE_CHECKPOINT_CONTRACT_FILE_PATH_ENV_VAR, os.getcwd())
@@ -35,10 +39,14 @@ def _get_metadata():
         )
         path = _get_checkpoint_contract_file_path()
+        LOGGER.debug("Loading checkpoint metadata from '%s'", path)
         metadata = CheckpointMetadata(path)
         return True, metadata
     except ImportError:
+        LOGGER.debug(
+            "snowpark-checkpoints-configuration is not installed. Cannot get a checkpoint metadata instance."
+        )
         return False, None
@@ -56,8 +64,7 @@ def is_checkpoint_enabled(checkpoint_name: str) -> bool:
     if enabled:
         config = metadata.get_checkpoint(checkpoint_name)
         return config.enabled
-    else:
-        return True
+    return True
 def get_checkpoint_sample(

snowflake/snowpark_checkpoints_collector/utils/logging_utils.py ADDED Viewed

@@ -0,0 +1,67 @@
+# Copyright 2025 Snowflake Inc.
+# SPDX-License-Identifier: Apache-2.0
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+from functools import wraps
+from typing import Callable, Optional, TypeVar
+from typing_extensions import ParamSpec
+P = ParamSpec("P")
+R = TypeVar("R")
+def log(
+    _func: Optional[Callable[P, R]] = None,
+    *,
+    logger: Optional[logging.Logger] = None,
+    log_args: bool = True,
+) -> Callable[[Callable[P, R]], Callable[P, R]]:
+    """Log the function call and any exceptions that occur.
+    Args:
+        _func: The function to log.
+        logger: The logger to use for logging. If not provided, a logger will be created using the
+                function's module name.
+        log_args: Whether to log the arguments passed to the function.
+    Returns:
+        A decorator that logs the function call and any exceptions that occur.
+    """
+    def decorator(func: Callable[P, R]) -> Callable[P, R]:
+        @wraps(func)
+        def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
+            _logger = logging.getLogger(func.__module__) if logger is None else logger
+            if log_args:
+                args_repr = [repr(a) for a in args]
+                kwargs_repr = [f"{k}={v!r}" for k, v in kwargs.items()]
+                formatted_args = ", ".join([*args_repr, *kwargs_repr])
+                _logger.debug("%s called with args %s", func.__name__, formatted_args)
+            try:
+                return func(*args, **kwargs)
+            except Exception:
+                _logger.exception("An error occurred in %s", func.__name__)
+                raise
+        return wrapper
+    # Handle the case where the decorator is used without parentheses
+    if _func is None:
+        return decorator
+    return decorator(_func)

{snowpark_checkpoints_collectors-0.1.3.dist-info → snowpark_checkpoints_collectors-0.2.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: snowpark-checkpoints-collectors
-Version: 0.1.3
+Version: 0.2.0
 Summary: Snowpark column and table statistics collection
 Project-URL: Bug Tracker, https://github.com/snowflakedb/snowpark-checkpoints/issues
 Project-URL: Source code, https://github.com/snowflakedb/snowpark-checkpoints/
@@ -27,19 +27,21 @@ Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
 Classifier: Topic :: Software Development :: Libraries :: Python Modules
 Requires-Python: <3.12,>=3.9
 Requires-Dist: pandera[io]==0.20.4
-Requires-Dist: pyspark
 Requires-Dist: snowflake-connector-python
-Requires-Dist: snowflake-snowpark-python==1.26.0
+Requires-Dist: snowflake-snowpark-python>=1.23.0
 Provides-Extra: development
 Requires-Dist: coverage>=7.6.7; extra == 'development'
 Requires-Dist: deepdiff>=8.0.0; extra == 'development'
 Requires-Dist: hatchling==1.25.0; extra == 'development'
 Requires-Dist: pre-commit>=4.0.1; extra == 'development'
 Requires-Dist: pyarrow>=18.0.0; extra == 'development'
+Requires-Dist: pyspark>=3.5.0; extra == 'development'
 Requires-Dist: pytest-cov>=6.0.0; extra == 'development'
 Requires-Dist: pytest>=8.3.3; extra == 'development'
 Requires-Dist: setuptools>=70.0.0; extra == 'development'
 Requires-Dist: twine==5.1.1; extra == 'development'
+Provides-Extra: pyspark
+Requires-Dist: pyspark>=3.5.0; extra == 'pyspark'
 Description-Content-Type: text/markdown
 # snowpark-checkpoints-collectors
@@ -50,6 +52,18 @@ Description-Content-Type: text/markdown
 ---
 **snowpark-checkpoints-collector** package offers a function for extracting information from PySpark dataframes. We can then use that data to validate against the converted Snowpark dataframes to ensure that behavioral equivalence has been achieved.
+---
+## Install the library
+```bash
+pip install snowpark-checkpoints-collectors
+```
+This package requires PySpark to be installed in the same environment. If you do not have it, you can install PySpark alongside Snowpark Checkpoints by running the following command:
+```bash
+pip install "snowpark-checkpoints-collectors[pyspark]"
+```
+---
 ## Features
 - Schema inference collected data mode (Schema): This is the default mode, which leverages Pandera schema inference to obtain the metadata and checks that will be evaluated for the specified dataframe. This mode also collects custom data from columns of the DataFrame based on the PySpark type.

{snowpark_checkpoints_collectors-0.1.3.dist-info → snowpark_checkpoints_collectors-0.2.0.dist-info}/RECORD RENAMED Viewed

@@ -1,13 +1,13 @@
-snowflake/snowpark_checkpoints_collector/__init__.py,sha256=yf_DmREHUwtC8y_boY8iaQC3qaKi1miEb5kytllrAaw,874
-snowflake/snowpark_checkpoints_collector/__version__.py,sha256=OfdAqrd8gnFI-pK7o_olRVrRKIWfQhQOoo_wR3u1s5s,632
+snowflake/snowpark_checkpoints_collector/__init__.py,sha256=GIESlH2W6g_qdcnyRqw9yjsvEkt0aniFvGixKlF4K7A,1096
+snowflake/snowpark_checkpoints_collector/__version__.py,sha256=ajnGza8ucK69-PA8wEbHmWZxDwd3bsTm74yMKiIWNHY,632
 snowflake/snowpark_checkpoints_collector/collection_common.py,sha256=ff5vYffrTRjoJXZQvVQBaOlegAUj_vXBbl1IZidz8Qo,4510
 snowflake/snowpark_checkpoints_collector/singleton.py,sha256=7AgIHQBXVRvPBBCkmBplzkdrrm-xVWf_N8svzA2vF8E,836
-snowflake/snowpark_checkpoints_collector/summary_stats_collector.py,sha256=cvG1C9rLyF4w3Fybr3o_cno6mEHbXsbU17D_y2RrNck,12823
+snowflake/snowpark_checkpoints_collector/summary_stats_collector.py,sha256=_U-gfBjk2QU_dDyJPGKekfzuP1Stkx-FyTuZiecvt6M,14572
 snowflake/snowpark_checkpoints_collector/collection_result/model/__init__.py,sha256=jZzx29WzrjH7C_6ZsBGoe4PxbW_oM4uIjySS1axIM34,1000
 snowflake/snowpark_checkpoints_collector/collection_result/model/collection_point_result.py,sha256=8xD9zGnFJ7Rz9RUXIys7JnV3kQD4mk8QwNOTxAihSjQ,2908
-snowflake/snowpark_checkpoints_collector/collection_result/model/collection_point_result_manager.py,sha256=4rFBPUdjjf-SuqEaz0_lxBv8szEWI6N1x48P6zDbqVw,2360
+snowflake/snowpark_checkpoints_collector/collection_result/model/collection_point_result_manager.py,sha256=6XbjHiehEm_RN_9y2MRlr0MaSgk3cWTczwZEYqUHCpM,2565
 snowflake/snowpark_checkpoints_collector/column_collection/__init__.py,sha256=hpTh1V7hqBSHxNUqISwfxdz-NLD-7oZEMLXDUuRsoOU,783
-snowflake/snowpark_checkpoints_collector/column_collection/column_collector_manager.py,sha256=_8CjfN0Q6g0g_hkvx6zBMat0RNAqQ89xfkid0MPLsRE,8961
+snowflake/snowpark_checkpoints_collector/column_collection/column_collector_manager.py,sha256=Vav_vbiipHFIAdHxeQG4ZK1BAmWTi_18hBnVeIeXFRs,9670
 snowflake/snowpark_checkpoints_collector/column_collection/model/__init__.py,sha256=d0WNMeayDyUKYFLLaVAMIC5Qt-DoWoWgOjj2ygJaHWA,2919
 snowflake/snowpark_checkpoints_collector/column_collection/model/array_column_collector.py,sha256=10ITldLcri_3LoQaqrZJMUwvpcgs5gQy3-BFKQB77EA,4268
 snowflake/snowpark_checkpoints_collector/column_collection/model/binary_column_collector.py,sha256=TuvKnwCIyoc3B9DfSeckGk6-bLLrDVDZdW8NDFkitMI,3255
@@ -25,14 +25,15 @@ snowflake/snowpark_checkpoints_collector/column_collection/model/struct_column_c
 snowflake/snowpark_checkpoints_collector/column_collection/model/timestamp_column_collector.py,sha256=FAWxRUX25ep2XhhagsBVuNmB3QUMA1xMfNTVkaHilbY,2572
 snowflake/snowpark_checkpoints_collector/column_collection/model/timestamp_ntz_column_collector.py,sha256=glUUnCLgTbGiPLpF2pSZ11KCgKSpHDRt5uhi1ZT9bxA,2578
 snowflake/snowpark_checkpoints_collector/column_pandera_checks/__init__.py,sha256=JNZPOYx8rUTONGz_d7xyfAvEC2_umHmGkJLoNSATLs4,793
-snowflake/snowpark_checkpoints_collector/column_pandera_checks/pandera_column_checks_manager.py,sha256=uugv4Pyq0wpYvJRFyQmJR1SvnXjlqBNHTLIDiTLTLhA,7311
+snowflake/snowpark_checkpoints_collector/column_pandera_checks/pandera_column_checks_manager.py,sha256=X1Mm37DKt-WZ5AegvoUA3itU1nBUxvhBxvjO85QqcGE,7893
 snowflake/snowpark_checkpoints_collector/snow_connection_model/__init__.py,sha256=kLjZId-aGCljK7lF6yeEw-syEqeTOJDxdXfpv9YxvZA,755
-snowflake/snowpark_checkpoints_collector/snow_connection_model/snow_connection.py,sha256=QH3kPQ5rHS9CV7f-djw0mhM7KT99cFNYXpjU6ADJHuo,6047
+snowflake/snowpark_checkpoints_collector/snow_connection_model/snow_connection.py,sha256=odKGTzc0xov8WOgJSR6WmVs0IT-f6O4YoaLqH6CbbFo,7263
 snowflake/snowpark_checkpoints_collector/utils/checkpoint_name_utils.py,sha256=WExQaZ4oL4otDCtM8kyGbf0Gn_v1a-tzM5j1p0wVDVg,1767
-snowflake/snowpark_checkpoints_collector/utils/extra_config.py,sha256=xkXFH1PIS0Mtzpu-LrcOKBjzCbptp2zWqgGN9X1P_A0,3393
+snowflake/snowpark_checkpoints_collector/utils/extra_config.py,sha256=t8WakSiHA3sgnXxz0WXE7q2MG7czWlnSYB5XR9swIhs,3643
 snowflake/snowpark_checkpoints_collector/utils/file_utils.py,sha256=deetkhQZOB0GUxQJvUHw4Ridp_rNYiCqmK9li3uwBL0,4324
+snowflake/snowpark_checkpoints_collector/utils/logging_utils.py,sha256=yyi6X5DqKeTg0HRhvsH6ymYp2P0wbnyKIzI2RzrQS7k,2278
 snowflake/snowpark_checkpoints_collector/utils/telemetry.py,sha256=7S0yFE3Zq96SEGmVuVbpYc_wtXIQUpL--6KfGoxwJcA,30837
-snowpark_checkpoints_collectors-0.1.3.dist-info/METADATA,sha256=gfG0BmaLZS39w6mhL2nQ5qP9XrAxTU4hBgst0iZTaCk,5559
-snowpark_checkpoints_collectors-0.1.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-snowpark_checkpoints_collectors-0.1.3.dist-info/licenses/LICENSE,sha256=DVQuDIgE45qn836wDaWnYhSdxoLXgpRRKH4RuTjpRZQ,10174
-snowpark_checkpoints_collectors-0.1.3.dist-info/RECORD,,
+snowpark_checkpoints_collectors-0.2.0.dist-info/METADATA,sha256=LPo0O5OEDHGXHKla-KDJioKIX8bqwBPbgP6BS8ufnQA,6003
+snowpark_checkpoints_collectors-0.2.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+snowpark_checkpoints_collectors-0.2.0.dist-info/licenses/LICENSE,sha256=DVQuDIgE45qn836wDaWnYhSdxoLXgpRRKH4RuTjpRZQ,10174
+snowpark_checkpoints_collectors-0.2.0.dist-info/RECORD,,

{snowpark_checkpoints_collectors-0.1.3.dist-info → snowpark_checkpoints_collectors-0.2.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{snowpark_checkpoints_collectors-0.1.3.dist-info → snowpark_checkpoints_collectors-0.2.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

snowpark-checkpoints-collectors 0.1.3__py3-none-any.whl → 0.2.0__py3-none-any.whl

snowpark-checkpoints-collectors 0.1.3py3-none-any.whl → 0.2.0py3-none-any.whl