PyPI - snowflake-ml-python - Versions diffs - 1.7.2__py3-none-any.whl → 1.7.4__py3-none-any.whl - Mend

snowflake-ml-python 1.7.2py3-none-any.whl → 1.7.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (237) hide show

snowflake/ml/model/_model_composer/model_composer.py CHANGED Viewed

@@ -88,6 +88,7 @@ class ModelComposer:
         pip_requirements: Optional[List[str]] = None,
         target_platforms: Optional[List[model_types.TargetPlatform]] = None,
         python_version: Optional[str] = None,
+        user_files: Optional[Dict[str, List[str]]] = None,
         ext_modules: Optional[List[ModuleType]] = None,
         code_paths: Optional[List[str]] = None,
         task: model_types.Task = model_types.Task.UNKNOWN,
@@ -97,9 +98,12 @@ class ModelComposer:
             options = model_types.BaseModelSaveOption()
         if not snowpark_utils.is_in_stored_procedure():  # type: ignore[no-untyped-call]
-            snowml_matched_versions = env_utils.get_matched_package_versions_in_snowflake_conda_channel(
-                req=requirements.Requirement(f"snowflake-ml-python=={snowml_env.VERSION}")
-            )
+            snowml_matched_versions = env_utils.get_matched_package_versions_in_information_schema(
+                self.session,
+                reqs=[requirements.Requirement(f"{env_utils.SNOWPARK_ML_PKG_NAME}=={snowml_env.VERSION}")],
+                python_version=python_version or snowml_env.PYTHON_VERSION,
+                statement_params=self._statement_params,
+            ).get(env_utils.SNOWPARK_ML_PKG_NAME, [])
             if len(snowml_matched_versions) < 1 and options.get("embed_local_ml_library", False) is False:
                 logging.info(
@@ -131,6 +135,7 @@ class ModelComposer:
             model_meta=self.packager.meta,
             model_rel_path=pathlib.PurePosixPath(ModelComposer.MODEL_DIR_REL_PATH),
             options=options,
+            user_files=user_files,
             data_sources=self._get_data_sources(model, sample_input_data),
             target_platforms=target_platforms,
         )

snowflake/ml/model/_model_composer/model_manifest/model_manifest.py CHANGED Viewed

@@ -2,7 +2,7 @@ import collections
 import logging
 import pathlib
 import warnings
-from typing import List, Optional, cast
+from typing import Dict, List, Optional, cast
 import yaml
@@ -11,9 +11,11 @@ from snowflake.ml.data import data_source
 from snowflake.ml.model import type_hints
 from snowflake.ml.model._model_composer.model_manifest import model_manifest_schema
 from snowflake.ml.model._model_composer.model_method import (
+    constants,
     function_generator,
     model_method,
 )
+from snowflake.ml.model._model_composer.model_user_file import model_user_file
 from snowflake.ml.model._packager.model_meta import (
     model_meta as model_meta_api,
     model_meta_schema,
@@ -30,9 +32,11 @@ class ModelManifest:
         workspace_path: A local path where model related files should be dumped to.
         runtimes: A list of ModelRuntime objects managing the runtimes and environment in the MODEL object.
         methods: A list of ModelMethod objects managing the method we registered to the MODEL object.
+        user_files: A list of ModelUserFile objects managing extra files uploaded to the workspace.
     """
     MANIFEST_FILE_REL_PATH = "MANIFEST.yml"
+    _ENABLE_USER_FILES = False
     _DEFAULT_RUNTIME_NAME = "python_runtime"
     def __init__(self, workspace_path: pathlib.Path) -> None:
@@ -42,6 +46,7 @@ class ModelManifest:
         self,
         model_meta: model_meta_api.ModelMetadata,
         model_rel_path: pathlib.PurePosixPath,
+        user_files: Optional[Dict[str, List[str]]] = None,
         options: Optional[type_hints.ModelSaveOption] = None,
         data_sources: Optional[List[data_source.DataSource]] = None,
         target_platforms: Optional[List[type_hints.TargetPlatform]] = None,
@@ -79,6 +84,7 @@ class ModelManifest:
         self.function_generator = function_generator.FunctionGenerator(model_dir_rel_path=model_rel_path)
         self.methods: List[model_method.ModelMethod] = []
         for target_method in model_meta.signatures.keys():
             method = model_method.ModelMethod(
                 model_meta=model_meta,
@@ -88,11 +94,21 @@ class ModelManifest:
                 is_partitioned_function=model_meta.function_properties.get(target_method, {}).get(
                     model_meta_schema.FunctionProperties.PARTITIONED.value, False
                 ),
+                wide_input=len(model_meta.signatures[target_method].inputs) > constants.SNOWPARK_UDF_INPUT_COL_LIMIT,
                 options=model_method.get_model_method_options_from_options(options, target_method),
             )
             self.methods.append(method)
+        self.user_files: List[model_user_file.ModelUserFile] = []
+        if user_files is not None:
+            for subdirectory, paths in user_files.items():
+                for path in paths:
+                    self.user_files.append(
+                        model_user_file.ModelUserFile(pathlib.PurePosixPath(subdirectory), pathlib.Path(path))
+                    )
         method_name_counter = collections.Counter([method.method_name for method in self.methods])
         dup_method_names = [k for k, v in method_name_counter.items() if v > 1]
         if dup_method_names:
@@ -129,6 +145,9 @@ class ModelManifest:
             ],
         )
+        if self._ENABLE_USER_FILES:
+            manifest_dict["user_files"] = [user_file.save(self.workspace_path) for user_file in self.user_files]
         lineage_sources = self._extract_lineage_info(data_sources)
         if lineage_sources:
             manifest_dict["lineage_sources"] = lineage_sources

snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py CHANGED Viewed

@@ -94,5 +94,6 @@ class ModelManifestDict(TypedDict):
     runtimes: Required[Dict[str, ModelRuntimeDict]]
     methods: Required[List[ModelMethodDict]]
     user_data: NotRequired[Dict[str, Any]]
+    user_files: NotRequired[List[str]]
     lineage_sources: NotRequired[List[LineageSourceDict]]
     target_platforms: NotRequired[List[str]]

snowflake/ml/model/_model_composer/model_method/constants.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ SNOWPARK_UDF_INPUT_COL_LIMIT = 500

snowflake/ml/model/_model_composer/model_method/function_generator.py CHANGED Viewed

@@ -43,6 +43,7 @@ class FunctionGenerator:
         target_method: str,
         function_type: str,
         is_partitioned_function: bool = False,
+        wide_input: bool = False,
         options: Optional[FunctionGenerateOptions] = None,
     ) -> None:
         import importlib_resources
@@ -70,6 +71,7 @@ class FunctionGenerator:
             model_dir_name=self.model_dir_rel_path.name,
             target_method=target_method,
             max_batch_size=options.get("max_batch_size", None),
+            wide_input=wide_input,
             function_name=FunctionGenerator.FUNCTION_NAME,
         )
         with open(function_file_path, "w", encoding="utf-8") as f:

snowflake/ml/model/_model_composer/model_method/infer_function.py_template CHANGED Viewed

@@ -43,7 +43,7 @@ dtype_map = {{feature.name: feature.as_dtype() for feature in features}}
 # Actual function
-@vectorized(input=pd.DataFrame, max_batch_size=MAX_BATCH_SIZE)
+@vectorized(input=pd.DataFrame, max_batch_size=MAX_BATCH_SIZE, flatten_object_input={wide_input})
 def {function_name}(df: pd.DataFrame) -> dict:
     df.columns = input_cols
     input_df = df.astype(dtype=dtype_map)

snowflake/ml/model/_model_composer/model_method/infer_partitioned.py_template CHANGED Viewed

@@ -48,7 +48,7 @@ dtype_map = {{feature.name: feature.as_dtype() for feature in features}}
 # Actual table function
 class {function_name}:
-    @vectorized(input=pd.DataFrame)
+    @vectorized(input=pd.DataFrame, flatten_object_input={wide_input})
     def end_partition(self, df: pd.DataFrame) -> pd.DataFrame:
         df.columns = input_cols
         input_df = df.astype(dtype=dtype_map)

snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template CHANGED Viewed

@@ -43,7 +43,7 @@ dtype_map = {{feature.name: feature.as_dtype() for feature in features}}
 # Actual table function
 class {function_name}:
-    @vectorized(input=pd.DataFrame, max_batch_size=MAX_BATCH_SIZE)
+    @vectorized(input=pd.DataFrame, max_batch_size=MAX_BATCH_SIZE, flatten_object_input={wide_input})
     def process(self, df: pd.DataFrame) -> pd.DataFrame:
         df.columns = input_cols
         input_df = df.astype(dtype=dtype_map)

snowflake/ml/model/_model_composer/model_method/model_method.py CHANGED Viewed

@@ -7,7 +7,10 @@ from typing_extensions import NotRequired
 from snowflake.ml._internal.utils import sql_identifier
 from snowflake.ml.model import model_signature, type_hints
 from snowflake.ml.model._model_composer.model_manifest import model_manifest_schema
-from snowflake.ml.model._model_composer.model_method import function_generator
+from snowflake.ml.model._model_composer.model_method import (
+    constants,
+    function_generator,
+)
 from snowflake.ml.model._packager.model_meta import model_meta as model_meta_api
 from snowflake.snowpark._internal import type_utils
@@ -64,6 +67,7 @@ class ModelMethod:
         runtime_name: str,
         function_generator: function_generator.FunctionGenerator,
         is_partitioned_function: bool = False,
+        wide_input: bool = False,
         options: Optional[ModelMethodOptions] = None,
     ) -> None:
         self.model_meta = model_meta
@@ -71,6 +75,7 @@ class ModelMethod:
         self.function_generator = function_generator
         self.is_partitioned_function = is_partitioned_function
         self.runtime_name = runtime_name
+        self.wide_input = wide_input
         self.options = options or {}
         try:
             self.method_name = sql_identifier.SqlIdentifier(
@@ -114,12 +119,15 @@ class ModelMethod:
             self.target_method,
             self.function_type,
             self.is_partitioned_function,
+            self.wide_input,
             options=options,
         )
         input_list = [
             ModelMethod._get_method_arg_from_feature(ft, case_sensitive=self.options.get("case_sensitive", False))
             for ft in self.model_meta.signatures[self.target_method].inputs
         ]
+        if len(input_list) > constants.SNOWPARK_UDF_INPUT_COL_LIMIT:
+            input_list = [{"name": "INPUT", "type": "OBJECT"}]
         input_name_counter = collections.Counter([input_info["name"] for input_info in input_list])
         dup_input_names = [k for k, v in input_name_counter.items() if v > 1]
         if dup_input_names:

snowflake/ml/model/_model_composer/model_user_file/model_user_file.py ADDED Viewed

@@ -0,0 +1,27 @@
+import os
+import pathlib
+from snowflake.ml._internal import file_utils
+class ModelUserFile:
+    """Class representing a user provided file.
+    Attributes:
+        subdirectory_name: A local path where model related files should be dumped to.
+        local_path: A list of ModelRuntime objects managing the runtimes and environment in the MODEL object.
+    """
+    USER_FILES_DIR_REL_PATH = "user_files"
+    def __init__(self, subdirectory_name: pathlib.PurePosixPath, local_path: pathlib.Path) -> None:
+        self.subdirectory_name = subdirectory_name
+        self.local_path = local_path
+    def save(self, workspace_path: pathlib.Path) -> str:
+        user_files_path = workspace_path / ModelUserFile.USER_FILES_DIR_REL_PATH / self.subdirectory_name
+        user_files_path.mkdir(parents=True, exist_ok=True)
+        # copy the file to the workspace
+        file_utils.copy_file_or_tree(str(self.local_path), str(user_files_path))
+        return os.path.join(self.subdirectory_name, self.local_path.name)

snowflake/ml/model/_packager/model_handlers/_utils.py CHANGED Viewed

@@ -1,7 +1,8 @@
 import json
 import os
+import pathlib
 import warnings
-from typing import Any, Callable, Iterable, List, Optional, Sequence, cast
+from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, cast
 import numpy as np
 import numpy.typing as npt
@@ -37,8 +38,10 @@ def _is_callable(model: model_types.SupportedModelType, method_name: str) -> boo
     return callable(getattr(model, method_name, None))
-def get_truncated_sample_data(sample_input_data: model_types.SupportedDataType) -> model_types.SupportedLocalDataType:
-    trunc_sample_input = model_signature._truncate_data(sample_input_data)
+def get_truncated_sample_data(
+    sample_input_data: model_types.SupportedDataType, length: int = 100
+) -> model_types.SupportedLocalDataType:
+    trunc_sample_input = model_signature._truncate_data(sample_input_data, length=length)
     local_sample_input: model_types.SupportedLocalDataType = None
     if isinstance(sample_input_data, SnowparkDataFrame):
         # Added because of Any from missing stubs.
@@ -77,7 +80,14 @@ def validate_signature(
     local_sample_input = get_truncated_sample_data(sample_input_data)
     for target_method in target_methods:
         predictions_df = get_prediction_fn(target_method, local_sample_input)
-        sig = model_signature.infer_signature(local_sample_input, predictions_df)
+        sig = model_signature.infer_signature(
+            sample_input_data,
+            predictions_df,
+            input_feature_names=None,
+            output_feature_names=None,
+            input_data_limit=100,
+            output_data_limit=100,
+        )
         model_meta.signatures[target_method] = sig
     return model_meta
@@ -118,7 +128,7 @@ def get_explainability_supported_background(
     meta: model_meta.ModelMetadata,
     explain_target_method: Optional[str],
 ) -> pd.DataFrame:
-    if sample_input_data is None:
+    if sample_input_data is None or explain_target_method is None:
         return None
     if isinstance(sample_input_data, pd.DataFrame):
@@ -223,3 +233,27 @@ def get_explain_target_method(
         if method in target_methods_list:
             return method
     return None
+def save_transformers_config_with_auto_map(local_model_path: str) -> None:
+    import huggingface_hub
+    for f_path in pathlib.Path(local_model_path).iterdir():
+        if f_path.name in ["config.json", "tokenizer_config.json"]:
+            with open(f_path) as f:
+                config_dict = json.load(f)
+            # a. get repository and class_path from configs
+            auto_map_configs = cast(Dict[str, str], config_dict.get("auto_map", {}))
+            for config_name, config_value in auto_map_configs.items():
+                repository, _, class_path = config_value.rpartition("--")
+                # b. download required configs from hf hub
+                if repository:
+                    huggingface_hub.snapshot_download(repo_id=repository, local_dir=local_model_path)
+                # c. update config files
+                config_dict["auto_map"][config_name] = class_path
+            with open(f_path, "w") as f:
+                json.dump(config_dict, f)

snowflake/ml/model/_packager/model_handlers/catboost.py CHANGED Viewed

@@ -94,8 +94,8 @@ class CatBoostModelHandler(_base.BaseModelHandler["catboost.CatBoost"]):
                 sample_input_data=sample_input_data,
                 get_prediction_fn=get_prediction,
             )
-            model_task_and_output = model_task_utils.get_model_task_and_output_type(model)
-            model_meta.task = handlers_utils.validate_model_task(model_meta.task, model_task_and_output.task)
+            model_task_and_output = model_task_utils.resolve_model_task_and_output_type(model, model_meta.task)
+            model_meta.task = model_task_and_output.task
             if enable_explainability:
                 explain_target_method = handlers_utils.get_explain_target_method(model_meta, cls.EXPLAIN_TARGET_METHODS)
                 model_meta = handlers_utils.add_explain_method_signature(
@@ -227,7 +227,7 @@ class CatBoostModelHandler(_base.BaseModelHandler["catboost.CatBoost"]):
                     import shap
                     explainer = shap.TreeExplainer(raw_model)
-                    df = handlers_utils.convert_explanations_to_2D_df(raw_model, explainer(X).values)
+                    df = handlers_utils.convert_explanations_to_2D_df(raw_model, explainer.shap_values(X))
                     return model_signature_utils.rename_pandas_df(df, signature.outputs)
                 if target_method == "explain":

snowflake/ml/model/_packager/model_handlers/custom.py CHANGED Viewed

@@ -66,7 +66,7 @@ class CustomModelHandler(_base.BaseModelHandler["custom_model.CustomModel"]):
                 sample_input_data = model_signature._convert_local_data_to_df(sample_input_data)
             if inspect.iscoroutinefunction(target_method):
-                with anyio.start_blocking_portal() as portal:
+                with anyio.from_thread.start_blocking_portal() as portal:
                     predictions_df = portal.call(target_method, model, sample_input_data)
             else:
                 predictions_df = target_method(model, sample_input_data)
@@ -98,7 +98,6 @@ class CustomModelHandler(_base.BaseModelHandler["custom_model.CustomModel"]):
         if model.context.model_refs:
             for sub_name, model_ref in model.context.model_refs.items():
                 handler = model_handler.find_handler(model_ref.model)
-                assert handler is not None
                 if handler is None:
                     raise TypeError("Your input type to custom model is not currently supported")
                 sub_model = handler.cast_model(model_ref.model)

snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py CHANGED Viewed

@@ -195,8 +195,12 @@ class HuggingFacePipelineHandler(
         os.makedirs(model_blob_path, exist_ok=True)
         if type_utils.LazyType("transformers.Pipeline").isinstance(model):
+            save_path = os.path.join(model_blob_path, cls.MODEL_BLOB_FILE_OR_DIR)
             model.save_pretrained(  # type:ignore[attr-defined]
-                os.path.join(model_blob_path, cls.MODEL_BLOB_FILE_OR_DIR)
+                save_path
+            )
+            handlers_utils.save_transformers_config_with_auto_map(
+                save_path,
             )
             pipeline_params = {
                 "_batch_size": model._batch_size,  # type:ignore[attr-defined]
@@ -319,6 +323,7 @@ class HuggingFacePipelineHandler(
                 model_blob_options["task"],
                 model=model_blob_file_or_dir_path,
                 trust_remote_code=True,
+                torch_dtype="auto",
                 **device_config,
             )

snowflake/ml/model/_packager/model_handlers/lightgbm.py CHANGED Viewed

@@ -110,8 +110,8 @@ class LGBMModelHandler(_base.BaseModelHandler[Union["lightgbm.Booster", "lightgb
                 sample_input_data=sample_input_data,
                 get_prediction_fn=get_prediction,
             )
-            model_task_and_output = model_task_utils.get_model_task_and_output_type(model)
-            model_meta.task = handlers_utils.validate_model_task(model_meta.task, model_task_and_output.task)
+            model_task_and_output = model_task_utils.resolve_model_task_and_output_type(model, model_meta.task)
+            model_meta.task = model_task_and_output.task
             if enable_explainability:
                 explain_target_method = handlers_utils.get_explain_target_method(model_meta, cls.EXPLAIN_TARGET_METHODS)
                 model_meta = handlers_utils.add_explain_method_signature(
@@ -240,7 +240,9 @@ class LGBMModelHandler(_base.BaseModelHandler[Union["lightgbm.Booster", "lightgb
                     import shap
                     explainer = shap.TreeExplainer(raw_model)
-                    df = handlers_utils.convert_explanations_to_2D_df(raw_model, explainer(X).values)
+                    df = handlers_utils.convert_explanations_to_2D_df(
+                        raw_model, explainer.shap_values(X, from_call=True)
+                    )
                     return model_signature_utils.rename_pandas_df(df, signature.outputs)
                 if target_method == "explain":

snowflake/ml/model/_packager/model_handlers/sentence_transformers.py CHANGED Viewed

@@ -14,8 +14,8 @@ from snowflake.ml.model._packager.model_handlers_migrator import base_migrator
 from snowflake.ml.model._packager.model_meta import (
     model_blob_meta,
     model_meta as model_meta_api,
+    model_meta_schema,
 )
-from snowflake.ml.model._signatures import utils as model_signature_utils
 from snowflake.snowpark._internal import utils as snowpark_utils
 if TYPE_CHECKING:
@@ -24,6 +24,25 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
+def _validate_sentence_transformers_signatures(sigs: Dict[str, model_signature.ModelSignature]) -> None:
+    if list(sigs.keys()) != ["encode"]:
+        raise ValueError("target_methods can only be ['encode']")
+    if len(sigs["encode"].inputs) != 1:
+        raise ValueError("SentenceTransformer can only accept 1 input column")
+    if len(sigs["encode"].outputs) != 1:
+        raise ValueError("SentenceTransformer can only return 1 output column")
+    assert isinstance(sigs["encode"].inputs[0], model_signature.FeatureSpec)
+    if sigs["encode"].inputs[0]._shape is not None:
+        raise ValueError("SentenceTransformer does not support input shape")
+    if sigs["encode"].inputs[0]._dtype != model_signature.DataType.STRING:
+        raise ValueError("SentenceTransformer only accepts string input")
 @final
 class SentenceTransformerHandler(_base.BaseModelHandler["sentence_transformers.SentenceTransformer"]):
     HANDLER_TYPE = "sentence_transformers"
@@ -68,6 +87,10 @@ class SentenceTransformerHandler(_base.BaseModelHandler["sentence_transformers.S
         if enable_explainability:
             raise NotImplementedError("Explainability is not supported for Sentence Transformer model.")
+        batch_size = kwargs.get("batch_size", 32)
+        if not isinstance(batch_size, int) or batch_size <= 0:
+            raise ValueError("batch_size must be a positive integer")
         # Validate target methods and signature (if possible)
         if not is_sub_model:
             target_methods = handlers_utils.get_target_methods(
@@ -75,12 +98,23 @@ class SentenceTransformerHandler(_base.BaseModelHandler["sentence_transformers.S
                 target_methods=kwargs.pop("target_methods", None),
                 default_target_methods=cls.DEFAULT_TARGET_METHODS,
             )
-            assert target_methods == ["encode"], "target_methods can only be ['encode']"
+            if target_methods != ["encode"]:
+                raise ValueError("target_methods can only be ['encode']")
             def get_prediction(
                 target_method_name: str, sample_input_data: model_types.SupportedLocalDataType
             ) -> model_types.SupportedLocalDataType:
-                return _sentence_transformer_encode(model, sample_input_data)
+                if not isinstance(sample_input_data, pd.DataFrame):
+                    sample_input_data = model_signature._convert_local_data_to_df(data=sample_input_data)
+                if sample_input_data.shape[1] != 1:
+                    raise ValueError(
+                        "SentenceTransformer can only accept 1 input column when converted to pd.DataFrame"
+                    )
+                X_list = sample_input_data.iloc[:, 0].tolist()
+                assert callable(getattr(model, "encode", None))
+                return pd.DataFrame({0: model.encode(X_list, batch_size=batch_size).tolist()})
             if model_meta.signatures:
                 handlers_utils.validate_target_methods(model, list(model_meta.signatures.keys()))
@@ -102,10 +136,16 @@ class SentenceTransformerHandler(_base.BaseModelHandler["sentence_transformers.S
                         get_prediction_fn=get_prediction,
                     )
+            _validate_sentence_transformers_signatures(model_meta.signatures)
         # save model
         model_blob_path = os.path.join(model_blobs_dir_path, name)
         os.makedirs(model_blob_path, exist_ok=True)
-        model.save(os.path.join(model_blob_path, cls.MODEL_BLOB_FILE_OR_DIR))
+        save_path = os.path.join(model_blob_path, cls.MODEL_BLOB_FILE_OR_DIR)
+        model.save(save_path)
+        handlers_utils.save_transformers_config_with_auto_map(
+            save_path,
+        )
         # save model metadata
         base_meta = model_blob_meta.ModelBlobMeta(
@@ -113,6 +153,7 @@ class SentenceTransformerHandler(_base.BaseModelHandler["sentence_transformers.S
             model_type=cls.HANDLER_TYPE,
             handler_version=cls.HANDLER_VERSION,
             path=cls.MODEL_BLOB_FILE_OR_DIR,
+            options=model_meta_schema.SentenceTransformersModelBlobOptions(batch_size=batch_size),
         )
         model_meta.models[name] = base_meta
         model_meta.min_snowpark_ml_version = cls._MIN_SNOWPARK_ML_VERSION
@@ -149,6 +190,7 @@ class SentenceTransformerHandler(_base.BaseModelHandler["sentence_transformers.S
         if snowpark_utils.is_in_stored_procedure():  # type: ignore[no-untyped-call]
             # We need to redirect the same folders to a writable location in the sandbox.
             os.environ["TRANSFORMERS_CACHE"] = "/tmp"
+            os.environ["HF_HOME"] = "/tmp"
         model_blob_path = os.path.join(model_blobs_dir_path, name)
         model_blobs_metadata = model_meta.models
@@ -183,6 +225,10 @@ class SentenceTransformerHandler(_base.BaseModelHandler["sentence_transformers.S
             raw_model: "sentence_transformers.SentenceTransformer",
             model_meta: model_meta_api.ModelMetadata,
         ) -> Type[custom_model.CustomModel]:
+            batch_size = cast(
+                model_meta_schema.SentenceTransformersModelBlobOptions, model_meta.models[model_meta.name].options
+            ).get("batch_size", None)
             def get_prediction(
                 raw_model: "sentence_transformers.SentenceTransformer",
                 signature: model_signature.ModelSignature,
@@ -190,8 +236,11 @@ class SentenceTransformerHandler(_base.BaseModelHandler["sentence_transformers.S
             ) -> Callable[[custom_model.CustomModel, pd.DataFrame], pd.DataFrame]:
                 @custom_model.inference_api
                 def fn(self: custom_model.CustomModel, X: pd.DataFrame) -> pd.DataFrame:
-                    predictions_df = _sentence_transformer_encode(raw_model, X)
-                    return model_signature_utils.rename_pandas_df(predictions_df, signature.outputs)
+                    X_list = X.iloc[:, 0].tolist()
+                    return pd.DataFrame(
+                        {signature.outputs[0].name: raw_model.encode(X_list, batch_size=batch_size).tolist()}
+                    )
                 return fn
@@ -217,17 +266,3 @@ class SentenceTransformerHandler(_base.BaseModelHandler["sentence_transformers.S
         predict_method = getattr(sentence_transformers_SentenceTransformer_model, "encode", None)
         assert callable(predict_method)
         return sentence_transformers_SentenceTransformer_model
-def _sentence_transformer_encode(
-    model: "sentence_transformers.SentenceTransformer", X: model_types.SupportedLocalDataType
-) -> model_types.SupportedLocalDataType:
-    if not isinstance(X, pd.DataFrame):
-        X = model_signature._convert_local_data_to_df(X)
-    assert X.shape[1] == 1, "SentenceTransformer can only accept 1 input column when converted to pd.DataFrame"
-    X_list = X.iloc[:, 0].tolist()
-    assert callable(getattr(model, "encode", None))
-    return pd.DataFrame({0: model.encode(X_list, batch_size=X.shape[0]).tolist()})

snowflake/ml/model/_packager/model_handlers/sklearn.py CHANGED Viewed

@@ -152,8 +152,8 @@ class SKLModelHandler(_base.BaseModelHandler[Union["sklearn.base.BaseEstimator",
                 sample_input_data, model_meta, explain_target_method
             )
-            model_task_and_output_type = model_task_utils.get_model_task_and_output_type(model)
-            model_meta.task = handlers_utils.validate_model_task(model_meta.task, model_task_and_output_type.task)
+            model_task_and_output_type = model_task_utils.resolve_model_task_and_output_type(model, model_meta.task)
+            model_meta.task = model_task_and_output_type.task
             # if users did not ask then we enable if we have background data
             if enable_explainability is None:
@@ -164,11 +164,17 @@ class SKLModelHandler(_base.BaseModelHandler[Union["sklearn.base.BaseEstimator",
                         stacklevel=1,
                     )
                     enable_explainability = False
-                elif model_meta.task == model_types.Task.UNKNOWN:
+                elif model_meta.task == model_types.Task.UNKNOWN or explain_target_method is None:
                     enable_explainability = False
                 else:
                     enable_explainability = True
             if enable_explainability:
+                model_meta = handlers_utils.add_explain_method_signature(
+                    model_meta=model_meta,
+                    explain_method="explain",
+                    target_method=explain_target_method,
+                    output_return_type=model_task_and_output_type.output_type,
+                )
                 handlers_utils.save_background_data(
                     model_blobs_dir_path,
                     cls.EXPLAIN_ARTIFACTS_DIR,
@@ -177,13 +183,6 @@ class SKLModelHandler(_base.BaseModelHandler[Union["sklearn.base.BaseEstimator",
                     background_data,
                 )
-                model_meta = handlers_utils.add_explain_method_signature(
-                    model_meta=model_meta,
-                    explain_method="explain",
-                    target_method=explain_target_method,
-                    output_return_type=model_task_and_output_type.output_type,
-                )
         model_blob_path = os.path.join(model_blobs_dir_path, name)
         os.makedirs(model_blob_path, exist_ok=True)
         with open(os.path.join(model_blob_path, cls.MODEL_BLOB_FILE_OR_DIR), "wb") as f:

snowflake-ml-python 1.7.2__py3-none-any.whl → 1.7.4__py3-none-any.whl

snowflake-ml-python 1.7.2py3-none-any.whl → 1.7.4py3-none-any.whl