PyPI - snowflake-ml-python - Versions diffs - 1.7.2__py3-none-any.whl → 1.7.4__py3-none-any.whl - Mend

snowflake-ml-python 1.7.2py3-none-any.whl → 1.7.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (237) hide show

snowflake/ml/model/_packager/model_handlers/snowmlmodel.py CHANGED Viewed

@@ -68,21 +68,45 @@ class SnowMLModelHandler(_base.BaseModelHandler["BaseEstimator"]):
         return cast("BaseEstimator", model)
     @classmethod
-    def _get_supported_object_for_explainability(cls, estimator: "BaseEstimator") -> Any:
+    def _get_supported_object_for_explainability(
+        cls,
+        estimator: "BaseEstimator",
+        background_data: Optional[model_types.SupportedDataType],
+        enable_explainability: Optional[bool],
+    ) -> Any:
         from snowflake.ml.modeling import pipeline as snowml_pipeline
         # handle pipeline objects separately
         if isinstance(estimator, snowml_pipeline.Pipeline):  # type: ignore[attr-defined]
             return None
-        methods = ["to_xgboost", "to_lightgbm", "to_sklearn"]
-        for method_name in methods:
+        tree_methods = ["to_xgboost", "to_lightgbm", "to_sklearn"]
+        non_tree_methods = ["to_sklearn"]
+        for method_name in tree_methods:
+            if hasattr(estimator, method_name):
+                try:
+                    result = getattr(estimator, method_name)()
+                    return result
+                except exceptions.SnowflakeMLException:
+                    pass  # Do nothing and continue to the next method
+        for method_name in non_tree_methods:
             if hasattr(estimator, method_name):
                 try:
                     result = getattr(estimator, method_name)()
+                    if enable_explainability is None and background_data is None:
+                        return None  # cannot get explain without background data
+                    elif enable_explainability and background_data is None:
+                        raise ValueError(
+                            "Provide `sample_input_data` to generate explanations for sklearn Snowpark ML models."
+                        )
                     return result
                 except exceptions.SnowflakeMLException:
                     pass  # Do nothing and continue to the next method
+        if enable_explainability:
+            raise ValueError(
+                "Explain only supported for xgboost, lightgbm and sklearn (not pipeline) Snowpark ML models."
+            )
         return None
     @classmethod
@@ -127,34 +151,39 @@ class SnowMLModelHandler(_base.BaseModelHandler["BaseEstimator"]):
                         raise ValueError(f"Target method {method_name} does not exist in the model.")
                 model_meta.signatures = temp_model_signature_dict
-        if enable_explainability or enable_explainability is None:
-            python_base_obj = cls._get_supported_object_for_explainability(model)
-            if python_base_obj is None:
-                if enable_explainability:  # if user set enable_explainability to True, throw error else silently skip
-                    raise ValueError(
-                        "Explain only supported for xgboost, lightgbm and sklearn (not pipeline) Snowpark ML models."
-                    )
+        python_base_obj = cls._get_supported_object_for_explainability(model, sample_input_data, enable_explainability)
+        explain_target_method = handlers_utils.get_explain_target_method(model_meta, cls.EXPLAIN_TARGET_METHODS)
+        if enable_explainability:
+            if explain_target_method is None:
+                raise ValueError(
+                    "The model must have one of the following methods to enable explainability: "
+                    + ", ".join(cls.EXPLAIN_TARGET_METHODS)
+                )
+        if enable_explainability is None:
+            if python_base_obj is None or explain_target_method is None:
                 # set None to False so we don't include shap in the environment
                 enable_explainability = False
             else:
-                model_task_and_output_type = model_task_utils.get_model_task_and_output_type(python_base_obj)
-                model_meta.task = handlers_utils.validate_model_task(model_meta.task, model_task_and_output_type.task)
-                explain_target_method = handlers_utils.get_explain_target_method(model_meta, cls.EXPLAIN_TARGET_METHODS)
-                model_meta = handlers_utils.add_explain_method_signature(
-                    model_meta=model_meta,
-                    explain_method="explain",
-                    target_method=explain_target_method,
-                    output_return_type=model_task_and_output_type.output_type,
-                )
                 enable_explainability = True
-                background_data = handlers_utils.get_explainability_supported_background(
-                    sample_input_data, model_meta, explain_target_method
+        if enable_explainability:
+            model_task_and_output_type = model_task_utils.resolve_model_task_and_output_type(
+                python_base_obj, model_meta.task
+            )
+            model_meta.task = model_task_and_output_type.task
+            model_meta = handlers_utils.add_explain_method_signature(
+                model_meta=model_meta,
+                explain_method="explain",
+                target_method=explain_target_method,
+                output_return_type=model_task_and_output_type.output_type,
+            )
+            background_data = handlers_utils.get_explainability_supported_background(
+                sample_input_data, model_meta, explain_target_method
+            )
+            if background_data is not None:
+                handlers_utils.save_background_data(
+                    model_blobs_dir_path, cls.EXPLAIN_ARTIFACTS_DIR, cls.BG_DATA_FILE_SUFFIX, name, background_data
                 )
-                if background_data is not None:
-                    handlers_utils.save_background_data(
-                        model_blobs_dir_path, cls.EXPLAIN_ARTIFACTS_DIR, cls.BG_DATA_FILE_SUFFIX, name, background_data
-                    )
         model_blob_path = os.path.join(model_blobs_dir_path, name)
         os.makedirs(model_blob_path, exist_ok=True)
@@ -237,8 +266,17 @@ class SnowMLModelHandler(_base.BaseModelHandler["BaseEstimator"]):
                 def explain_fn(self: custom_model.CustomModel, X: pd.DataFrame) -> pd.DataFrame:
                     import shap
-                    methods = ["to_xgboost", "to_lightgbm", "to_sklearn"]
-                    for method_name in methods:
+                    tree_methods = ["to_xgboost", "to_lightgbm"]
+                    non_tree_methods = ["to_sklearn"]
+                    for method_name in tree_methods:
+                        try:
+                            base_model = getattr(raw_model, method_name)()
+                            explainer = shap.TreeExplainer(base_model)
+                            df = handlers_utils.convert_explanations_to_2D_df(raw_model, explainer.shap_values(X))
+                            return model_signature_utils.rename_pandas_df(df, signature.outputs)
+                        except exceptions.SnowflakeMLException:
+                            pass  # Do nothing and continue to the next method
+                    for method_name in non_tree_methods:
                         try:
                             base_model = getattr(raw_model, method_name)()
                             explainer = shap.Explainer(base_model, masker=background_data)

snowflake/ml/model/_packager/model_handlers/tensorflow.py CHANGED Viewed

@@ -3,6 +3,7 @@ from typing import TYPE_CHECKING, Callable, Dict, Optional, Type, cast, final
 import numpy as np
 import pandas as pd
+from packaging import version
 from typing_extensions import TypeGuard, Unpack
 from snowflake.ml._internal import type_utils
@@ -73,13 +74,42 @@ class TensorFlowHandler(_base.BaseModelHandler["tensorflow.Module"]):
         if enable_explainability:
             raise NotImplementedError("Explainability is not supported for Tensorflow model.")
+        # When tensorflow is installed, keras is also installed.
+        import keras
         import tensorflow
         assert isinstance(model, tensorflow.Module)
         is_keras_model = type_utils.LazyType("tensorflow.keras.Model").isinstance(model) or type_utils.LazyType(
-            "tf_keras.Model"
+            "keras.Model"
         ).isinstance(model)
+        is_tf_keras_model = type_utils.LazyType("tf_keras.Model").isinstance(model)
+        is_keras_functional_or_sequential_model = (
+            getattr(model, "_is_graph_network", False)
+            or type_utils.LazyType("tensorflow.keras.engine.sequential.Sequential").isinstance(model)
+            or type_utils.LazyType("keras.engine.sequential.Sequential").isinstance(model)
+            or type_utils.LazyType("tf_keras.engine.sequential.Sequential").isinstance(model)
+        )
+        assert isinstance(model, tensorflow.Module)
+        keras_version = version.parse(keras.__version__)
+        # Tensorflow and keras model save format is different.
+        # Keras functional or sequential models are saved as keras format
+        # Keras v3 other models are saved using cloudpickle
+        # Keras v2 other models are saved using tensorflow saved model format
+        # Tensorflow models are saved using tensorflow saved model format
+        if is_keras_model or is_tf_keras_model:
+            if is_keras_functional_or_sequential_model:
+                save_format = "keras"
+            elif keras_version.major == 2 or is_tf_keras_model:
+                save_format = "keras_tf"
+            else:
+                save_format = "cloudpickle"
+        else:
+            save_format = "tf"
         if is_keras_model:
             default_target_methods = ["predict"]
@@ -93,6 +123,9 @@ class TensorFlowHandler(_base.BaseModelHandler["tensorflow.Module"]):
                 default_target_methods=default_target_methods,
             )
+            if is_keras_model and len(target_methods) > 1:
+                raise ValueError("Keras model can only have one target method.")
             def get_prediction(
                 target_method_name: str, sample_input_data: "model_types.SupportedLocalDataType"
             ) -> model_types.SupportedLocalDataType:
@@ -122,31 +155,43 @@ class TensorFlowHandler(_base.BaseModelHandler["tensorflow.Module"]):
         model_blob_path = os.path.join(model_blobs_dir_path, name)
         os.makedirs(model_blob_path, exist_ok=True)
-        if is_keras_model:
-            tensorflow.keras.models.save_model(model, os.path.join(model_blob_path, cls.MODEL_BLOB_FILE_OR_DIR))
-            model_meta.env.include_if_absent(
-                [
-                    model_env.ModelDependency(requirement="keras<3", pip_name="keras"),
-                ],
-                check_local_version=False,
-            )
+        save_path = os.path.join(model_blob_path, cls.MODEL_BLOB_FILE_OR_DIR)
+        if save_format == "keras":
+            model.save(save_path, save_format="keras")
+        elif save_format == "keras_tf":
+            model.save(save_path, save_format="tf")
+        elif save_format == "cloudpickle":
+            import cloudpickle
+            with open(save_path, "wb") as f:
+                cloudpickle.dump(model, f)
         else:
-            tensorflow.saved_model.save(model, os.path.join(model_blob_path, cls.MODEL_BLOB_FILE_OR_DIR))
+            tensorflow.saved_model.save(
+                model,
+                save_path,
+                options=tensorflow.saved_model.SaveOptions(experimental_custom_gradients=False),
+            )
         base_meta = model_blob_meta.ModelBlobMeta(
             name=name,
             model_type=cls.HANDLER_TYPE,
             handler_version=cls.HANDLER_VERSION,
             path=cls.MODEL_BLOB_FILE_OR_DIR,
-            options=model_meta_schema.TensorflowModelBlobOptions(is_keras_model=is_keras_model),
+            options=model_meta_schema.TensorflowModelBlobOptions(save_format=save_format),
         )
         model_meta.models[name] = base_meta
         model_meta.min_snowpark_ml_version = cls._MIN_SNOWPARK_ML_VERSION
+        dependencies = [
+            model_env.ModelDependency(requirement="tensorflow", pip_name="tensorflow"),
+        ]
+        if is_keras_model:
+            dependencies.append(model_env.ModelDependency(requirement="keras", pip_name="keras"))
+        elif is_tf_keras_model:
+            dependencies.append(model_env.ModelDependency(requirement="tf-keras", pip_name="tf-keras"))
         model_meta.env.include_if_absent(
-            [
-                model_env.ModelDependency(requirement="tensorflow", pip_name="tensorflow"),
-            ],
+            dependencies,
             check_local_version=True,
         )
         model_meta.env.cuda_version = kwargs.get("cuda_version", model_env.DEFAULT_CUDA_VERSION)
@@ -166,10 +211,18 @@ class TensorFlowHandler(_base.BaseModelHandler["tensorflow.Module"]):
         model_blob_metadata = model_blobs_metadata[name]
         model_blob_filename = model_blob_metadata.path
         model_blob_options = cast(model_meta_schema.TensorflowModelBlobOptions, model_blob_metadata.options)
-        if model_blob_options.get("is_keras_model", False):
-            m = tensorflow.keras.models.load_model(os.path.join(model_blob_path, model_blob_filename), compile=False)
+        load_path = os.path.join(model_blob_path, model_blob_filename)
+        save_format = model_blob_options.get("save_format", "tf")
+        if save_format == "keras" or save_format == "keras_tf":
+            m = tensorflow.keras.models.load_model(load_path)
+        elif save_format == "cloudpickle":
+            import cloudpickle
+            with open(load_path, "rb") as f:
+                m = cloudpickle.load(f)
         else:
-            m = tensorflow.saved_model.load(os.path.join(model_blob_path, model_blob_filename))
+            m = tensorflow.saved_model.load(load_path)
         return cast(tensorflow.Module, m)
     @classmethod

snowflake/ml/model/_packager/model_handlers/xgboost.py CHANGED Viewed

@@ -117,8 +117,8 @@ class XGBModelHandler(_base.BaseModelHandler[Union["xgboost.Booster", "xgboost.X
                 sample_input_data=sample_input_data,
                 get_prediction_fn=get_prediction,
             )
-            model_task_and_output = model_task_utils.get_model_task_and_output_type(model)
-            model_meta.task = handlers_utils.validate_model_task(model_meta.task, model_task_and_output.task)
+            model_task_and_output = model_task_utils.resolve_model_task_and_output_type(model, model_meta.task)
+            model_meta.task = model_task_and_output.task
             if enable_explainability:
                 model_meta = handlers_utils.add_explain_method_signature(
                     model_meta=model_meta,
@@ -254,7 +254,7 @@ class XGBModelHandler(_base.BaseModelHandler[Union["xgboost.Booster", "xgboost.X
                     import shap
                     explainer = shap.TreeExplainer(raw_model)
-                    df = handlers_utils.convert_explanations_to_2D_df(raw_model, explainer(X).values)
+                    df = handlers_utils.convert_explanations_to_2D_df(raw_model, explainer.shap_values(X))
                     return model_signature_utils.rename_pandas_df(df, signature.outputs)
                 if target_method == "explain":

snowflake/ml/model/_packager/model_meta/model_meta.py CHANGED Viewed

@@ -215,6 +215,7 @@ class ModelMetadata:
         function_properties: A dict mapping function names to dict mapping function property key to value.
         metadata: User provided key-value metadata of the model. Defaults to None.
         creation_timestamp: Unix timestamp when the model metadata is created.
+        user_files: Dict mapping subdirectories to extra artifact file paths for files to include in the model.
         task: Model task like TABULAR_REGRESSION, tabular_classification, timeseries_forecasting etc.
     """
@@ -234,6 +235,7 @@ class ModelMetadata:
         runtimes: Optional[Dict[str, model_runtime.ModelRuntime]] = None,
         signatures: Optional[Dict[str, model_signature.ModelSignature]] = None,
         function_properties: Optional[Dict[str, Dict[str, Any]]] = None,
+        user_files: Optional[Dict[str, List[str]]] = None,
         metadata: Optional[Dict[str, str]] = None,
         creation_timestamp: Optional[str] = None,
         min_snowpark_ml_version: Optional[str] = None,
@@ -247,6 +249,7 @@ class ModelMetadata:
         if signatures:
             self.signatures = signatures
         self.function_properties = function_properties or {}
+        self.user_files = user_files
         self.metadata = metadata
         self.model_type = model_type
         self.env = env

snowflake/ml/model/_packager/model_meta/model_meta_schema.py CHANGED Viewed

@@ -59,7 +59,11 @@ class XgboostModelBlobOptions(BaseModelBlobOptions):
 class TensorflowModelBlobOptions(BaseModelBlobOptions):
-    is_keras_model: Required[bool]
+    save_format: Required[str]
+class SentenceTransformersModelBlobOptions(BaseModelBlobOptions):
+    batch_size: Required[int]
 ModelBlobOptions = Union[
@@ -68,6 +72,7 @@ ModelBlobOptions = Union[
     MLFlowModelBlobOptions,
     XgboostModelBlobOptions,
     TensorflowModelBlobOptions,
+    SentenceTransformersModelBlobOptions,
 ]

snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py CHANGED Viewed

@@ -1,2 +1,2 @@
-REQUIREMENTS = ['absl-py>=0.15,<2', 'aiohttp!=4.0.0a0, !=4.0.0a1', 'anyio>=3.5.0,<4', 'cachetools>=3.1.1,<6', 'cloudpickle>=2.0.0', 'cryptography', 'fsspec>=2022.11,<2024', 'importlib_resources>=6.1.1, <7', 'numpy>=1.23,<2', 'packaging>=20.9,<25', 'pandas>=1.0.0,<3', 'pyarrow', 'pyjwt>=2.0.0, <3', 'pytimeparse>=1.1.8,<2', 'pyyaml>=6.0,<7', 'requests', 'retrying>=1.3.3,<2', 's3fs>=2022.11,<2024', 'scikit-learn>=1.4,<1.6', 'scipy>=1.9,<2', 'snowflake-connector-python>=3.5.0,<4', 'snowflake-snowpark-python>=1.17.0,<2', 'sqlparse>=0.4,<1', 'typing-extensions>=4.1.0,<5', 'xgboost>=1.7.3,<3']
-ALL_REQUIREMENTS=['absl-py>=0.15,<2', 'aiohttp!=4.0.0a0, !=4.0.0a1', 'anyio>=3.5.0,<4', 'cachetools>=3.1.1,<6', 'catboost>=1.2.0, <2', 'cloudpickle>=2.0.0', 'cryptography', 'fsspec>=2022.11,<2024', 'importlib_resources>=6.1.1, <7', 'lightgbm>=4.1.0, <5', 'mlflow>=2.1.0,<2.4', 'numpy>=1.23,<2', 'packaging>=20.9,<25', 'pandas>=1.0.0,<3', 'pyarrow', 'pyjwt>=2.0.0, <3', 'pytimeparse>=1.1.8,<2', 'pytorch>=2.0.1,<2.3.0', 'pyyaml>=6.0,<7', 'requests', 'retrying>=1.3.3,<2', 's3fs>=2022.11,<2024', 'scikit-learn>=1.4,<1.6', 'scipy>=1.9,<2', 'sentence-transformers>=2.2.2,<3', 'sentencepiece>=0.1.95,<1', 'shap>=0.46.0,<1', 'snowflake-connector-python>=3.5.0,<4', 'snowflake-snowpark-python>=1.17.0,<2', 'sqlparse>=0.4,<1', 'tensorflow>=2.10,<3', 'tokenizers>=0.10,<1', 'torchdata>=0.4,<1', 'transformers>=4.32.1,<5', 'typing-extensions>=4.1.0,<5', 'xgboost>=1.7.3,<3']
+REQUIREMENTS = ['absl-py>=0.15,<2', 'aiohttp!=4.0.0a0, !=4.0.0a1', 'anyio>=3.5.0,<5', 'cachetools>=3.1.1,<6', 'cloudpickle>=2.0.0', 'cryptography', 'fsspec>=2024.6.1,<2026', 'importlib_resources>=6.1.1, <7', 'numpy>=1.23,<2', 'packaging>=20.9,<25', 'pandas>=1.0.0,<3', 'pyarrow', 'pyjwt>=2.0.0, <3', 'pytimeparse>=1.1.8,<2', 'pyyaml>=6.0,<7', 'requests', 'retrying>=1.3.3,<2', 's3fs>=2024.6.1,<2026', 'scikit-learn>=1.4,<1.6', 'scipy>=1.9,<2', 'snowflake-connector-python>=3.5.0,<4', 'snowflake-snowpark-python>=1.17.0,<2,!=1.26.0', 'sqlparse>=0.4,<1', 'typing-extensions>=4.1.0,<5', 'xgboost>=1.7.3,<3']
+ALL_REQUIREMENTS=['absl-py>=0.15,<2', 'aiohttp!=4.0.0a0, !=4.0.0a1', 'anyio>=3.5.0,<5', 'cachetools>=3.1.1,<6', 'catboost>=1.2.0, <2', 'cloudpickle>=2.0.0', 'cryptography', 'fsspec>=2024.6.1,<2026', 'huggingface_hub<0.26', 'importlib_resources>=6.1.1, <7', 'lightgbm>=4.1.0, <5', 'mlflow>=2.16.0, <3', 'numpy>=1.23,<2', 'packaging>=20.9,<25', 'pandas>=1.0.0,<3', 'pyarrow', 'pyjwt>=2.0.0, <3', 'pytimeparse>=1.1.8,<2', 'pytorch>=2.0.1,<2.3.0', 'pyyaml>=6.0,<7', 'requests', 'retrying>=1.3.3,<2', 's3fs>=2024.6.1,<2026', 'scikit-learn>=1.4,<1.6', 'scipy>=1.9,<2', 'sentence-transformers>=2.2.2,<3', 'sentencepiece>=0.1.95,<1', 'shap>=0.46.0,<1', 'snowflake-connector-python>=3.5.0,<4', 'snowflake-snowpark-python>=1.17.0,<2,!=1.26.0', 'sqlparse>=0.4,<1', 'tensorflow>=2.12.0,<3', 'tokenizers>=0.10,<1', 'torchdata>=0.4,<1', 'transformers>=4.32.1,<5', 'typing-extensions>=4.1.0,<5', 'xgboost>=1.7.3,<3']

snowflake/ml/model/_packager/model_task/model_task_utils.py CHANGED Viewed

@@ -149,8 +149,9 @@ def _get_model_task(model: Any) -> type_hints.Task:
     raise ValueError(f"Model type {type(model)} is not supported")
-def get_model_task_and_output_type(model: Any) -> ModelTaskAndOutputType:
-    task = _get_model_task(model)
+def resolve_model_task_and_output_type(model: Any, passed_model_task: type_hints.Task) -> ModelTaskAndOutputType:
+    inferred_task = _get_model_task(model)
+    task = handlers_utils.validate_model_task(passed_model_task, inferred_task)
     output_type = model_signature.DataType.DOUBLE
     if task == type_hints.Task.TABULAR_MULTI_CLASSIFICATION:
         output_type = model_signature.DataType.STRING

snowflake/ml/model/_signatures/base_handler.py CHANGED Viewed

@@ -12,7 +12,6 @@ class BaseDataHandler(ABC, Generic[model_types._DataType]):
     FEATURE_PREFIX: Final[str] = "feature"
     INPUT_PREFIX: Final[str] = "input"
     OUTPUT_PREFIX: Final[str] = "output"
-    SIG_INFER_ROWS_COUNT_LIMIT: Final[int] = 10
     @staticmethod
     @abstractmethod
@@ -26,7 +25,7 @@ class BaseDataHandler(ABC, Generic[model_types._DataType]):
     @staticmethod
     @abstractmethod
-    def truncate(data: model_types._DataType) -> model_types._DataType:
+    def truncate(data: model_types._DataType, length: int) -> model_types._DataType:
         ...
     @staticmethod

snowflake/ml/model/_signatures/builtins_handler.py CHANGED Viewed

@@ -35,8 +35,8 @@ class ListOfBuiltinHandler(base_handler.BaseDataHandler[model_types._SupportedBu
         return len(data)
     @staticmethod
-    def truncate(data: model_types._SupportedBuiltinsList) -> model_types._SupportedBuiltinsList:
-        return data[: min(ListOfBuiltinHandler.count(data), ListOfBuiltinHandler.SIG_INFER_ROWS_COUNT_LIMIT)]
+    def truncate(data: model_types._SupportedBuiltinsList, length: int) -> model_types._SupportedBuiltinsList:
+        return data[: min(ListOfBuiltinHandler.count(data), length)]
     @staticmethod
     def validate(data: model_types._SupportedBuiltinsList) -> None:

snowflake/ml/model/_signatures/numpy_handler.py CHANGED Viewed

@@ -23,8 +23,8 @@ class NumpyArrayHandler(base_handler.BaseDataHandler[model_types._SupportedNumpy
         return data.shape[0]
     @staticmethod
-    def truncate(data: model_types._SupportedNumpyArray) -> model_types._SupportedNumpyArray:
-        return data[: min(NumpyArrayHandler.count(data), NumpyArrayHandler.SIG_INFER_ROWS_COUNT_LIMIT)]
+    def truncate(data: model_types._SupportedNumpyArray, length: int) -> model_types._SupportedNumpyArray:
+        return data[: min(NumpyArrayHandler.count(data), length)]
     @staticmethod
     def validate(data: model_types._SupportedNumpyArray) -> None:
@@ -94,11 +94,10 @@ class SeqOfNumpyArrayHandler(base_handler.BaseDataHandler[Sequence[model_types._
         return min(NumpyArrayHandler.count(data_col) for data_col in data)
     @staticmethod
-    def truncate(data: Sequence[model_types._SupportedNumpyArray]) -> Sequence[model_types._SupportedNumpyArray]:
-        return [
-            data_col[: min(SeqOfNumpyArrayHandler.count(data), SeqOfNumpyArrayHandler.SIG_INFER_ROWS_COUNT_LIMIT)]
-            for data_col in data
-        ]
+    def truncate(
+        data: Sequence[model_types._SupportedNumpyArray], length: int
+    ) -> Sequence[model_types._SupportedNumpyArray]:
+        return [data_col[: min(SeqOfNumpyArrayHandler.count(data), length)] for data_col in data]
     @staticmethod
     def validate(data: Sequence[model_types._SupportedNumpyArray]) -> None:

snowflake/ml/model/_signatures/pandas_handler.py CHANGED Viewed

@@ -23,8 +23,8 @@ class PandasDataFrameHandler(base_handler.BaseDataHandler[pd.DataFrame]):
         return len(data.index)
     @staticmethod
-    def truncate(data: pd.DataFrame) -> pd.DataFrame:
-        return data.head(min(PandasDataFrameHandler.count(data), PandasDataFrameHandler.SIG_INFER_ROWS_COUNT_LIMIT))
+    def truncate(data: pd.DataFrame, length: int) -> pd.DataFrame:
+        return data.head(min(PandasDataFrameHandler.count(data), length))
     @staticmethod
     def validate(data: Union[pd.DataFrame, pd.Series]) -> None:
@@ -224,6 +224,6 @@ class PandasDataFrameHandler(base_handler.BaseDataHandler[pd.DataFrame]):
         df_col_dtypes = [df[col].dtype for col in df.columns]
         for df_col, df_col_dtype in zip(df_cols, df_col_dtypes):
             if df_col_dtype == np.dtype("O"):
-                if isinstance(df[df_col][0], np.ndarray):
+                if isinstance(df[df_col].iloc[0], np.ndarray):
                     df[df_col] = df[df_col].map(np.ndarray.tolist)
         return df

snowflake/ml/model/_signatures/pytorch_handler.py CHANGED Viewed

@@ -33,11 +33,8 @@ class SeqOfPyTorchTensorHandler(base_handler.BaseDataHandler[Sequence["torch.Ten
         return min(data_col.shape[0] for data_col in data)  # type: ignore[no-any-return]
     @staticmethod
-    def truncate(data: Sequence["torch.Tensor"]) -> Sequence["torch.Tensor"]:
-        return [
-            data_col[: min(SeqOfPyTorchTensorHandler.count(data), SeqOfPyTorchTensorHandler.SIG_INFER_ROWS_COUNT_LIMIT)]
-            for data_col in data
-        ]
+    def truncate(data: Sequence["torch.Tensor"], length: int) -> Sequence["torch.Tensor"]:
+        return [data_col[: min(SeqOfPyTorchTensorHandler.count(data), 10)] for data_col in data]
     @staticmethod
     def validate(data: Sequence["torch.Tensor"]) -> None:

snowflake/ml/model/_signatures/snowpark_handler.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import json
-from typing import Literal, Optional, Sequence, cast
+from typing import Any, Literal, Optional, Sequence, cast
 import numpy as np
 import pandas as pd
@@ -29,8 +29,8 @@ class SnowparkDataFrameHandler(base_handler.BaseDataHandler[snowflake.snowpark.D
         return data.count()
     @staticmethod
-    def truncate(data: snowflake.snowpark.DataFrame) -> snowflake.snowpark.DataFrame:
-        return cast(snowflake.snowpark.DataFrame, data.limit(SnowparkDataFrameHandler.SIG_INFER_ROWS_COUNT_LIMIT))
+    def truncate(data: snowflake.snowpark.DataFrame, length: int) -> snowflake.snowpark.DataFrame:
+        return cast(snowflake.snowpark.DataFrame, data.limit(length))
     @staticmethod
     def validate(data: snowflake.snowpark.DataFrame) -> None:
@@ -52,7 +52,7 @@ class SnowparkDataFrameHandler(base_handler.BaseDataHandler[snowflake.snowpark.D
         data: snowflake.snowpark.DataFrame, role: Literal["input", "output"]
     ) -> Sequence[core.BaseFeatureSpec]:
         return pandas_handler.PandasDataFrameHandler.infer_signature(
-            SnowparkDataFrameHandler.convert_to_df(data.limit(n=1)), role=role
+            SnowparkDataFrameHandler.convert_to_df(data), role=role
         )
     @staticmethod
@@ -73,14 +73,20 @@ class SnowparkDataFrameHandler(base_handler.BaseDataHandler[snowflake.snowpark.D
                 assert isinstance(feature, core.FeatureSpec), "Invalid feature kind."
                 dtype_map[feature.name] = feature.as_dtype()
         df_local = data.to_pandas()
         # This is because Array will become string (Even though the correct schema is set)
         # and object will become variant type and requires an additional loads
         # to get correct data otherwise it would be string.
+        def load_if_not_null(x: str) -> Optional[Any]:
+            if x is None:
+                return None
+            return json.loads(x)
         for field in data.schema.fields:
             if isinstance(field.datatype, spt.ArrayType):
                 df_local[identifier.get_unescaped_names(field.name)] = df_local[
                     identifier.get_unescaped_names(field.name)
-                ].map(json.loads)
+                ].map(load_if_not_null)
         # Only when the feature is not from inference, we are confident to do the type casting.
         # Otherwise, dtype_map will be empty.
         # Errors are ignored to make sure None won't be converted and won't raise Error

snowflake/ml/model/_signatures/tensorflow_handler.py CHANGED Viewed

@@ -60,14 +60,9 @@ class SeqOfTensorflowTensorHandler(
     @staticmethod
     def truncate(
-        data: Sequence[Union["tensorflow.Tensor", "tensorflow.Variable"]]
+        data: Sequence[Union["tensorflow.Tensor", "tensorflow.Variable"]], length: int
     ) -> Sequence[Union["tensorflow.Tensor", "tensorflow.Variable"]]:
-        return [
-            data_col[
-                : min(SeqOfTensorflowTensorHandler.count(data), SeqOfTensorflowTensorHandler.SIG_INFER_ROWS_COUNT_LIMIT)
-            ]
-            for data_col in data
-        ]
+        return [data_col[: min(SeqOfTensorflowTensorHandler.count(data), length)] for data_col in data]
     @staticmethod
     def validate(data: Sequence[Union["tensorflow.Tensor", "tensorflow.Variable"]]) -> None:

snowflake/ml/model/model_signature.py CHANGED Viewed

@@ -59,11 +59,16 @@ _ALL_DATA_HANDLERS = _LOCAL_DATA_HANDLERS + [snowpark_handler.SnowparkDataFrameH
 def _truncate_data(
     data: model_types.SupportedDataType,
+    length: Optional[int] = 100,
 ) -> model_types.SupportedDataType:
     for handler in _ALL_DATA_HANDLERS:
         if handler.can_handle(data):
+            # If length is None, return the original data
+            if length is None:
+                return data
             row_count = handler.count(data)
-            if row_count <= handler.SIG_INFER_ROWS_COUNT_LIMIT:
+            if row_count <= length:
                 return data
             warnings.warn(
@@ -77,7 +82,7 @@ def _truncate_data(
                 category=UserWarning,
                 stacklevel=1,
             )
-            return handler.truncate(data)
+            return handler.truncate(data, length)
     raise snowml_exceptions.SnowflakeMLException(
         error_code=error_codes.NOT_IMPLEMENTED,
         original_exception=NotImplementedError(
@@ -687,6 +692,8 @@ def infer_signature(
     output_data: model_types.SupportedLocalDataType,
     input_feature_names: Optional[List[str]] = None,
     output_feature_names: Optional[List[str]] = None,
+    input_data_limit: Optional[int] = 100,
+    output_data_limit: Optional[int] = 100,
 ) -> core.ModelSignature:
     """
     Infer model signature from given input and output sample data.
@@ -710,12 +717,18 @@ def infer_signature(
         output_data: Sample output data for the model.
         input_feature_names: Names for input features. Defaults to None.
         output_feature_names: Names for output features. Defaults to None.
+        input_data_limit: Limit the number of rows to be used in signature inference in the input data. Defaults to 100.
+            If None, all rows are used. If the number of rows in the input data is less than the limit, all rows are
+            used.
+        output_data_limit: Limit the number of rows to be used in signature inference in the output data. Defaults to
+            100. If None, all rows are used. If the number of rows in the output data is less than the limit, all rows
+            are used.
     Returns:
         A model signature inferred from the given input and output sample data.
     """
-    inputs = _infer_signature(input_data, role="input")
+    inputs = _infer_signature(_truncate_data(input_data, input_data_limit), role="input")
     inputs = utils.rename_features(inputs, input_feature_names)
-    outputs = _infer_signature(output_data, role="output")
+    outputs = _infer_signature(_truncate_data(output_data, output_data_limit), role="output")
     outputs = utils.rename_features(outputs, output_feature_names)
     return core.ModelSignature(inputs, outputs)

snowflake/ml/model/type_hints.py CHANGED Viewed

@@ -199,6 +199,7 @@ class HuggingFaceSaveOptions(BaseModelSaveOption):
 class SentenceTransformersSaveOptions(BaseModelSaveOption):
     target_methods: NotRequired[Sequence[str]]
     cuda_version: NotRequired[str]
+    batch_size: NotRequired[int]
 ModelSaveOption = Union[

snowflake/ml/modeling/_internal/model_trainer_builder.py CHANGED Viewed

@@ -1,11 +1,9 @@
-import os
 from typing import List, Optional, Union
 import pandas as pd
 from sklearn import model_selection
 from snowflake.ml._internal.exceptions import error_codes, exceptions
-from snowflake.ml.modeling._internal.constants import IN_ML_RUNTIME_ENV_VAR
 from snowflake.ml.modeling._internal.estimator_utils import (
     get_module_name,
     is_single_node,
@@ -13,9 +11,6 @@ from snowflake.ml.modeling._internal.estimator_utils import (
 from snowflake.ml.modeling._internal.local_implementations.pandas_trainer import (
     PandasModelTrainer,
 )
-from snowflake.ml.modeling._internal.ml_runtime_implementations.ml_runtime_trainer import (
-    MLRuntimeModelTrainer,
-)
 from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
 from snowflake.ml.modeling._internal.snowpark_implementations.distributed_hpo_trainer import (
     DistributedHPOTrainer,
@@ -107,9 +102,6 @@ class ModelTrainerBuilder:
                 "autogenerated": autogenerated,
                 "subproject": subproject,
             }
-            if os.environ.get(IN_ML_RUNTIME_ENV_VAR):
-                return MLRuntimeModelTrainer(**init_args)  # type: ignore[arg-type, return-value]
             trainer_klass = SnowparkModelTrainer
             assert dataset._session is not None  # Make MyPy happy

snowflake-ml-python 1.7.2__py3-none-any.whl → 1.7.4__py3-none-any.whl

snowflake-ml-python 1.7.2py3-none-any.whl → 1.7.4py3-none-any.whl