PyPI - snowflake-ml-python - Versions diffs - 1.7.2__py3-none-any.whl → 1.7.3__py3-none-any.whl - Mend

snowflake-ml-python 1.7.2py3-none-any.whl → 1.7.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (69) hide show

snowflake/ml/data/data_connector.py CHANGED Viewed

@@ -1,5 +1,16 @@
 import os
-from typing import TYPE_CHECKING, Any, Dict, Generator, List, Optional, Type, TypeVar
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Dict,
+    Generator,
+    List,
+    Optional,
+    Sequence,
+    Type,
+    TypeVar,
+    cast,
+)
 import numpy.typing as npt
 from typing_extensions import deprecated
@@ -12,6 +23,7 @@ from snowflake.ml.modeling._internal.constants import (
     IN_ML_RUNTIME_ENV_VAR,
     USE_OPTIMIZED_DATA_INGESTOR,
 )
+from snowflake.snowpark import context as sf_context
 if TYPE_CHECKING:
     import pandas as pd
@@ -35,8 +47,10 @@ class DataConnector:
     def __init__(
         self,
         ingestor: data_ingestor.DataIngestor,
+        **kwargs: Any,
     ) -> None:
         self._ingestor = ingestor
+        self._kwargs = kwargs
     @classmethod
     @snowpark._internal.utils.private_preview(version="1.6.0")
@@ -44,20 +58,34 @@ class DataConnector:
         cls: Type[DataConnectorType],
         df: snowpark.DataFrame,
         ingestor_class: Optional[Type[data_ingestor.DataIngestor]] = None,
-        **kwargs: Any
+        **kwargs: Any,
     ) -> DataConnectorType:
         if len(df.queries["queries"]) != 1 or len(df.queries["post_actions"]) != 0:
             raise ValueError("DataFrames with multiple queries and/or post-actions not supported")
-        source = data_source.DataFrameInfo(df.queries["queries"][0])
-        assert df._session is not None
-        return cls.from_sources(df._session, [source], ingestor_class=ingestor_class, **kwargs)
+        return cast(
+            DataConnectorType,
+            cls.from_sql(df.queries["queries"][0], session=df._session, ingestor_class=ingestor_class, **kwargs),
+        )
+    @classmethod
+    @snowpark._internal.utils.private_preview(version="1.7.3")
+    def from_sql(
+        cls: Type[DataConnectorType],
+        query: str,
+        session: Optional[snowpark.Session] = None,
+        ingestor_class: Optional[Type[data_ingestor.DataIngestor]] = None,
+        **kwargs: Any,
+    ) -> DataConnectorType:
+        session = session or sf_context.get_active_session()
+        source = data_source.DataFrameInfo(query)
+        return cls.from_sources(session, [source], ingestor_class=ingestor_class, **kwargs)
     @classmethod
     def from_dataset(
         cls: Type[DataConnectorType],
         ds: "dataset.Dataset",
         ingestor_class: Optional[Type[data_ingestor.DataIngestor]] = None,
-        **kwargs: Any
+        **kwargs: Any,
     ) -> DataConnectorType:
         dsv = ds.selected_version
         assert dsv is not None
@@ -75,9 +103,9 @@ class DataConnector:
     def from_sources(
         cls: Type[DataConnectorType],
         session: snowpark.Session,
-        sources: List[data_source.DataSource],
+        sources: Sequence[data_source.DataSource],
         ingestor_class: Optional[Type[data_ingestor.DataIngestor]] = None,
-        **kwargs: Any
+        **kwargs: Any,
     ) -> DataConnectorType:
         ingestor_class = ingestor_class or cls.DEFAULT_INGESTOR_CLASS
         ingestor = ingestor_class.from_sources(session, sources)
@@ -130,7 +158,11 @@ class DataConnector:
         func_params_to_log=["batch_size", "shuffle", "drop_last_batch"],
     )
     def to_torch_datapipe(
-        self, *, batch_size: int, shuffle: bool = False, drop_last_batch: bool = True
+        self,
+        *,
+        batch_size: int,
+        shuffle: bool = False,
+        drop_last_batch: bool = True,
     ) -> "torch_data.IterDataPipe":  # type: ignore[type-arg]
         """Transform the Snowflake data into a ready-to-use Pytorch datapipe.
@@ -149,8 +181,13 @@ class DataConnector:
         """
         from snowflake.ml.data import torch_utils
+        expand_dims = self._kwargs.get("expand_dims", True)
         return torch_utils.TorchDataPipeWrapper(
-            self._ingestor, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last_batch
+            self._ingestor,
+            batch_size=batch_size,
+            shuffle=shuffle,
+            drop_last=drop_last_batch,
+            expand_dims=expand_dims,
         )
     @telemetry.send_api_usage_telemetry(
@@ -179,8 +216,13 @@ class DataConnector:
         """
         from snowflake.ml.data import torch_utils
+        expand_dims = self._kwargs.get("expand_dims", True)
         return torch_utils.TorchDatasetWrapper(
-            self._ingestor, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last_batch
+            self._ingestor,
+            batch_size=batch_size,
+            shuffle=shuffle,
+            drop_last=drop_last_batch,
+            expand_dims=expand_dims,
         )
     @telemetry.send_api_usage_telemetry(

snowflake/ml/data/data_ingestor.py CHANGED Viewed

@@ -6,6 +6,7 @@ from typing import (
     List,
     Optional,
     Protocol,
+    Sequence,
     Type,
     TypeVar,
 )
@@ -25,7 +26,7 @@ DataIngestorType = TypeVar("DataIngestorType", bound="DataIngestor")
 class DataIngestor(Protocol):
     @classmethod
     def from_sources(
-        cls: Type[DataIngestorType], session: snowpark.Session, sources: List[data_source.DataSource]
+        cls: Type[DataIngestorType], session: snowpark.Session, sources: Sequence[data_source.DataSource]
     ) -> DataIngestorType:
         raise NotImplementedError

snowflake/ml/data/torch_utils.py CHANGED Viewed

@@ -17,6 +17,7 @@ class TorchDatasetWrapper(torch.utils.data.IterableDataset[Dict[str, Any]]):
         batch_size: Optional[int],
         shuffle: bool = False,
         drop_last: bool = False,
+        expand_dims: bool = True,
     ) -> None:
         """Not intended for direct usage. Use DataConnector.to_torch_dataset() instead"""
         squeeze = False
@@ -29,6 +30,7 @@ class TorchDatasetWrapper(torch.utils.data.IterableDataset[Dict[str, Any]]):
         self._shuffle = shuffle
         self._drop_last = drop_last
         self._squeeze_outputs = squeeze
+        self._expand_dims = expand_dims
     def __iter__(self) -> Iterator[Dict[str, Union[npt.NDArray[Any], List[Any]]]]:
         max_idx = 0
@@ -47,7 +49,10 @@ class TorchDatasetWrapper(torch.utils.data.IterableDataset[Dict[str, Any]]):
         ):
             # Skip indices during multi-process data loading to prevent data duplication
             if counter == filter_idx:
-                yield {k: _preprocess_array(v, squeeze=self._squeeze_outputs) for k, v in batch.items()}
+                yield {
+                    k: _preprocess_array(v, squeeze=self._squeeze_outputs, expand_dims=self._expand_dims)
+                    for k, v in batch.items()
+                }
             if counter < max_idx:
                 counter += 1
             else:
@@ -58,13 +63,21 @@ class TorchDataPipeWrapper(TorchDatasetWrapper, torch.utils.data.IterDataPipe[Di
     """Wrap a DataIngestor into a PyTorch IterDataPipe"""
     def __init__(
-        self, ingestor: data_ingestor.DataIngestor, *, batch_size: int, shuffle: bool = False, drop_last: bool = False
+        self,
+        ingestor: data_ingestor.DataIngestor,
+        *,
+        batch_size: int,
+        shuffle: bool = False,
+        drop_last: bool = False,
+        expand_dims: bool = True,
     ) -> None:
         """Not intended for direct usage. Use DataConnector.to_torch_datapipe() instead"""
-        super().__init__(ingestor, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last)
+        super().__init__(ingestor, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, expand_dims=expand_dims)
-def _preprocess_array(arr: npt.NDArray[Any], squeeze: bool = False) -> Union[npt.NDArray[Any], List[np.object_]]:
+def _preprocess_array(
+    arr: npt.NDArray[Any], squeeze: bool = False, expand_dims: bool = True
+) -> Union[npt.NDArray[Any], List[np.object_]]:
     """Preprocesses batch column values."""
     single_dimensional = arr.ndim < 2 and not arr.dtype == np.object_
@@ -73,7 +86,7 @@ def _preprocess_array(arr: npt.NDArray[Any], squeeze: bool = False) -> Union[npt
         arr = arr.squeeze(axis=0)
     # For single dimensional data,
-    if single_dimensional:
+    if single_dimensional and expand_dims:
         axis = 0 if arr.ndim == 0 else 1
         arr = np.expand_dims(arr, axis=axis)

snowflake/ml/feature_store/examples/example_helper.py CHANGED Viewed

@@ -45,8 +45,9 @@ class ExampleHelper:
         """Return a dataframe object about descriptions of all examples."""
         root_dir = Path(__file__).parent
         rows = []
+        hide_folders = ["citibike_trip_features", "source_data"]
         for f_name in os.listdir(root_dir):
-            if os.path.isdir(os.path.join(root_dir, f_name)) and f_name[0].isalpha() and f_name != "source_data":
+            if os.path.isdir(os.path.join(root_dir, f_name)) and f_name[0].isalpha() and f_name not in hide_folders:
                 source_file_path = root_dir.joinpath(f"{f_name}/source.yaml")
                 source_dict = self._read_yaml(str(source_file_path))
                 rows.append((f_name, source_dict["model_category"], source_dict["desc"], source_dict["label_columns"]))

snowflake/ml/fileset/fileset.py CHANGED Viewed

@@ -11,11 +11,9 @@ from snowflake.ml._internal.exceptions import (
     fileset_error_messages,
     fileset_errors,
 )
-from snowflake.ml._internal.utils import (
-    identifier,
-    import_utils,
-    snowpark_dataframe_utils,
-)
+from snowflake.ml._internal.utils import identifier, snowpark_dataframe_utils
+from snowflake.ml.data import data_connector
+from snowflake.ml.data._internal import arrow_ingestor
 from snowflake.ml.fileset import sfcfs
 from snowflake.snowpark import exceptions as snowpark_exceptions, functions
@@ -285,6 +283,16 @@ class FileSet:
         """Get the Snowflake absolute path to this FileSet directory."""
         return _fileset_absolute_path(self._target_stage_loc, self.name)
+    def _to_data_connector(self) -> data_connector.DataConnector:
+        self._fs.optimize_read(self._list_files())
+        ingester = arrow_ingestor.ArrowIngestor(
+            self._snowpark_session,
+            self._list_files(),
+            format="parquet",
+            filesystem=self._fs,
+        )
+        return data_connector.DataConnector(ingester, expand_dims=False)
     @telemetry.send_api_usage_telemetry(
         project=_PROJECT,
     )
@@ -362,13 +370,9 @@ class FileSet:
         ----
         {'_COL_1':[10]}
         """
-        IterableWrapper, _ = import_utils.import_or_get_dummy("torchdata.datapipes.iter.IterableWrapper")
-        torch_datapipe_module, _ = import_utils.import_or_get_dummy("snowflake.ml.fileset.torch_datapipe")
-        self._fs.optimize_read(self._list_files())
-        input_dp = IterableWrapper(self._list_files())
-        return torch_datapipe_module.ReadAndParseParquet(input_dp, self._fs, batch_size, shuffle, drop_last_batch)
+        return self._to_data_connector().to_torch_datapipe(
+            batch_size=batch_size, shuffle=shuffle, drop_last_batch=drop_last_batch
+        )
     @telemetry.send_api_usage_telemetry(
         project=_PROJECT,
@@ -402,12 +406,8 @@ class FileSet:
         ----
         {'_COL_1': <tf.Tensor: shape=(1,), dtype=int64, numpy=[10]>}
         """
-        tf_dataset_module, _ = import_utils.import_or_get_dummy("snowflake.ml.fileset.tf_dataset")
-        self._fs.optimize_read(self._list_files())
-        return tf_dataset_module.read_and_parse_parquet(
-            self._list_files(), self._fs, batch_size, shuffle, drop_last_batch
+        return self._to_data_connector().to_tf_dataset(
+            batch_size=batch_size, shuffle=shuffle, drop_last_batch=drop_last_batch
         )
     @telemetry.send_api_usage_telemetry(

snowflake/ml/model/_client/model/model_version_impl.py CHANGED Viewed

@@ -447,13 +447,15 @@ class ModelVersion(lineage_node.LineageNode):
             target_function_info = functions[0]
         if service_name:
+            database_name_id, schema_name_id, service_name_id = sql_identifier.parse_fully_qualified_name(service_name)
             return self._model_ops.invoke_method(
                 method_name=sql_identifier.SqlIdentifier(target_function_info["name"]),
                 signature=target_function_info["signature"],
                 X=X,
-                database_name=None,
-                schema_name=None,
-                service_name=sql_identifier.SqlIdentifier(service_name),
+                database_name=database_name_id,
+                schema_name=schema_name_id,
+                service_name=service_name_id,
                 strict_input_validation=strict_input_validation,
                 statement_params=statement_params,
             )

snowflake/ml/model/_client/ops/model_ops.py CHANGED Viewed

@@ -168,14 +168,10 @@ class ModelOperator:
         schema_name: Optional[sql_identifier.SqlIdentifier],
         model_name: sql_identifier.SqlIdentifier,
         version_name: sql_identifier.SqlIdentifier,
+        model_exists: bool,
         statement_params: Optional[Dict[str, Any]] = None,
     ) -> None:
-        if self.validate_existence(
-            database_name=database_name,
-            schema_name=schema_name,
-            model_name=model_name,
-            statement_params=statement_params,
-        ):
+        if model_exists:
             return self._model_version_client.add_version_from_model_version(
                 source_database_name=source_database_name,
                 source_schema_name=source_schema_name,

snowflake/ml/model/_client/sql/model_version.py CHANGED Viewed

@@ -10,6 +10,7 @@ from snowflake.ml._internal.utils import (
     sql_identifier,
 )
 from snowflake.ml.model._client.sql import _base
+from snowflake.ml.model._model_composer.model_method import constants
 from snowflake.snowpark import dataframe, functions as F, row, types as spt
 from snowflake.snowpark._internal import utils as snowpark_utils
@@ -333,6 +334,11 @@ class ModelVersionSQLClient(_base._BaseSQLClient):
         args_sql = ", ".join(args_sql_list)
+        wide_input = len(input_args) > constants.SNOWPARK_UDF_INPUT_COL_LIMIT
+        if wide_input:
+            input_args_sql = ", ".join(f"'{arg}', {arg.identifier()}" for arg in input_args)
+            args_sql = f"object_construct_keep_null({input_args_sql})"
         sql = textwrap.dedent(
             f"""WITH {','.join(with_statements)}
                 SELECT *,
@@ -412,6 +418,11 @@ class ModelVersionSQLClient(_base._BaseSQLClient):
         args_sql = ", ".join(args_sql_list)
+        wide_input = len(input_args) > constants.SNOWPARK_UDF_INPUT_COL_LIMIT
+        if wide_input:
+            input_args_sql = ", ".join(f"'{arg}', {arg.identifier()}" for arg in input_args)
+            args_sql = f"object_construct_keep_null({input_args_sql})"
         sql = textwrap.dedent(
             f"""WITH {','.join(with_statements)}
                 SELECT *,

snowflake/ml/model/_model_composer/model_composer.py CHANGED Viewed

@@ -88,6 +88,7 @@ class ModelComposer:
         pip_requirements: Optional[List[str]] = None,
         target_platforms: Optional[List[model_types.TargetPlatform]] = None,
         python_version: Optional[str] = None,
+        user_files: Optional[Dict[str, List[str]]] = None,
         ext_modules: Optional[List[ModuleType]] = None,
         code_paths: Optional[List[str]] = None,
         task: model_types.Task = model_types.Task.UNKNOWN,
@@ -97,9 +98,12 @@ class ModelComposer:
             options = model_types.BaseModelSaveOption()
         if not snowpark_utils.is_in_stored_procedure():  # type: ignore[no-untyped-call]
-            snowml_matched_versions = env_utils.get_matched_package_versions_in_snowflake_conda_channel(
-                req=requirements.Requirement(f"snowflake-ml-python=={snowml_env.VERSION}")
-            )
+            snowml_matched_versions = env_utils.get_matched_package_versions_in_information_schema(
+                self.session,
+                reqs=[requirements.Requirement(f"{env_utils.SNOWPARK_ML_PKG_NAME}=={snowml_env.VERSION}")],
+                python_version=python_version or snowml_env.PYTHON_VERSION,
+                statement_params=self._statement_params,
+            ).get(env_utils.SNOWPARK_ML_PKG_NAME, [])
             if len(snowml_matched_versions) < 1 and options.get("embed_local_ml_library", False) is False:
                 logging.info(
@@ -131,6 +135,7 @@ class ModelComposer:
             model_meta=self.packager.meta,
             model_rel_path=pathlib.PurePosixPath(ModelComposer.MODEL_DIR_REL_PATH),
             options=options,
+            user_files=user_files,
             data_sources=self._get_data_sources(model, sample_input_data),
             target_platforms=target_platforms,
         )

snowflake/ml/model/_model_composer/model_manifest/model_manifest.py CHANGED Viewed

@@ -2,7 +2,7 @@ import collections
 import logging
 import pathlib
 import warnings
-from typing import List, Optional, cast
+from typing import Dict, List, Optional, cast
 import yaml
@@ -11,9 +11,11 @@ from snowflake.ml.data import data_source
 from snowflake.ml.model import type_hints
 from snowflake.ml.model._model_composer.model_manifest import model_manifest_schema
 from snowflake.ml.model._model_composer.model_method import (
+    constants,
     function_generator,
     model_method,
 )
+from snowflake.ml.model._model_composer.model_user_file import model_user_file
 from snowflake.ml.model._packager.model_meta import (
     model_meta as model_meta_api,
     model_meta_schema,
@@ -30,9 +32,11 @@ class ModelManifest:
         workspace_path: A local path where model related files should be dumped to.
         runtimes: A list of ModelRuntime objects managing the runtimes and environment in the MODEL object.
         methods: A list of ModelMethod objects managing the method we registered to the MODEL object.
+        user_files: A list of ModelUserFile objects managing extra files uploaded to the workspace.
     """
     MANIFEST_FILE_REL_PATH = "MANIFEST.yml"
+    _ENABLE_USER_FILES = False
     _DEFAULT_RUNTIME_NAME = "python_runtime"
     def __init__(self, workspace_path: pathlib.Path) -> None:
@@ -42,6 +46,7 @@ class ModelManifest:
         self,
         model_meta: model_meta_api.ModelMetadata,
         model_rel_path: pathlib.PurePosixPath,
+        user_files: Optional[Dict[str, List[str]]] = None,
         options: Optional[type_hints.ModelSaveOption] = None,
         data_sources: Optional[List[data_source.DataSource]] = None,
         target_platforms: Optional[List[type_hints.TargetPlatform]] = None,
@@ -79,6 +84,7 @@ class ModelManifest:
         self.function_generator = function_generator.FunctionGenerator(model_dir_rel_path=model_rel_path)
         self.methods: List[model_method.ModelMethod] = []
         for target_method in model_meta.signatures.keys():
             method = model_method.ModelMethod(
                 model_meta=model_meta,
@@ -88,11 +94,21 @@ class ModelManifest:
                 is_partitioned_function=model_meta.function_properties.get(target_method, {}).get(
                     model_meta_schema.FunctionProperties.PARTITIONED.value, False
                 ),
+                wide_input=len(model_meta.signatures[target_method].inputs) > constants.SNOWPARK_UDF_INPUT_COL_LIMIT,
                 options=model_method.get_model_method_options_from_options(options, target_method),
             )
             self.methods.append(method)
+        self.user_files: List[model_user_file.ModelUserFile] = []
+        if user_files is not None:
+            for subdirectory, paths in user_files.items():
+                for path in paths:
+                    self.user_files.append(
+                        model_user_file.ModelUserFile(pathlib.PurePosixPath(subdirectory), pathlib.Path(path))
+                    )
         method_name_counter = collections.Counter([method.method_name for method in self.methods])
         dup_method_names = [k for k, v in method_name_counter.items() if v > 1]
         if dup_method_names:
@@ -129,6 +145,9 @@ class ModelManifest:
             ],
         )
+        if self._ENABLE_USER_FILES:
+            manifest_dict["user_files"] = [user_file.save(self.workspace_path) for user_file in self.user_files]
         lineage_sources = self._extract_lineage_info(data_sources)
         if lineage_sources:
             manifest_dict["lineage_sources"] = lineage_sources

snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py CHANGED Viewed

@@ -94,5 +94,6 @@ class ModelManifestDict(TypedDict):
     runtimes: Required[Dict[str, ModelRuntimeDict]]
     methods: Required[List[ModelMethodDict]]
     user_data: NotRequired[Dict[str, Any]]
+    user_files: NotRequired[List[str]]
     lineage_sources: NotRequired[List[LineageSourceDict]]
     target_platforms: NotRequired[List[str]]

snowflake/ml/model/_model_composer/model_method/constants.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ SNOWPARK_UDF_INPUT_COL_LIMIT = 500

snowflake/ml/model/_model_composer/model_method/function_generator.py CHANGED Viewed

@@ -43,6 +43,7 @@ class FunctionGenerator:
         target_method: str,
         function_type: str,
         is_partitioned_function: bool = False,
+        wide_input: bool = False,
         options: Optional[FunctionGenerateOptions] = None,
     ) -> None:
         import importlib_resources
@@ -70,6 +71,7 @@ class FunctionGenerator:
             model_dir_name=self.model_dir_rel_path.name,
             target_method=target_method,
             max_batch_size=options.get("max_batch_size", None),
+            wide_input=wide_input,
             function_name=FunctionGenerator.FUNCTION_NAME,
         )
         with open(function_file_path, "w", encoding="utf-8") as f:

snowflake/ml/model/_model_composer/model_method/infer_function.py_template CHANGED Viewed

@@ -43,7 +43,7 @@ dtype_map = {{feature.name: feature.as_dtype() for feature in features}}
 # Actual function
-@vectorized(input=pd.DataFrame, max_batch_size=MAX_BATCH_SIZE)
+@vectorized(input=pd.DataFrame, max_batch_size=MAX_BATCH_SIZE, flatten_object_input={wide_input})
 def {function_name}(df: pd.DataFrame) -> dict:
     df.columns = input_cols
     input_df = df.astype(dtype=dtype_map)

snowflake/ml/model/_model_composer/model_method/infer_partitioned.py_template CHANGED Viewed

@@ -48,7 +48,7 @@ dtype_map = {{feature.name: feature.as_dtype() for feature in features}}
 # Actual table function
 class {function_name}:
-    @vectorized(input=pd.DataFrame)
+    @vectorized(input=pd.DataFrame, flatten_object_input={wide_input})
     def end_partition(self, df: pd.DataFrame) -> pd.DataFrame:
         df.columns = input_cols
         input_df = df.astype(dtype=dtype_map)

snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template CHANGED Viewed

@@ -43,7 +43,7 @@ dtype_map = {{feature.name: feature.as_dtype() for feature in features}}
 # Actual table function
 class {function_name}:
-    @vectorized(input=pd.DataFrame, max_batch_size=MAX_BATCH_SIZE)
+    @vectorized(input=pd.DataFrame, max_batch_size=MAX_BATCH_SIZE, flatten_object_input={wide_input})
     def process(self, df: pd.DataFrame) -> pd.DataFrame:
         df.columns = input_cols
         input_df = df.astype(dtype=dtype_map)

snowflake/ml/model/_model_composer/model_method/model_method.py CHANGED Viewed

@@ -7,7 +7,10 @@ from typing_extensions import NotRequired
 from snowflake.ml._internal.utils import sql_identifier
 from snowflake.ml.model import model_signature, type_hints
 from snowflake.ml.model._model_composer.model_manifest import model_manifest_schema
-from snowflake.ml.model._model_composer.model_method import function_generator
+from snowflake.ml.model._model_composer.model_method import (
+    constants,
+    function_generator,
+)
 from snowflake.ml.model._packager.model_meta import model_meta as model_meta_api
 from snowflake.snowpark._internal import type_utils
@@ -64,6 +67,7 @@ class ModelMethod:
         runtime_name: str,
         function_generator: function_generator.FunctionGenerator,
         is_partitioned_function: bool = False,
+        wide_input: bool = False,
         options: Optional[ModelMethodOptions] = None,
     ) -> None:
         self.model_meta = model_meta
@@ -71,6 +75,7 @@ class ModelMethod:
         self.function_generator = function_generator
         self.is_partitioned_function = is_partitioned_function
         self.runtime_name = runtime_name
+        self.wide_input = wide_input
         self.options = options or {}
         try:
             self.method_name = sql_identifier.SqlIdentifier(
@@ -114,12 +119,15 @@ class ModelMethod:
             self.target_method,
             self.function_type,
             self.is_partitioned_function,
+            self.wide_input,
             options=options,
         )
         input_list = [
             ModelMethod._get_method_arg_from_feature(ft, case_sensitive=self.options.get("case_sensitive", False))
             for ft in self.model_meta.signatures[self.target_method].inputs
         ]
+        if len(input_list) > constants.SNOWPARK_UDF_INPUT_COL_LIMIT:
+            input_list = [{"name": "INPUT", "type": "OBJECT"}]
         input_name_counter = collections.Counter([input_info["name"] for input_info in input_list])
         dup_input_names = [k for k, v in input_name_counter.items() if v > 1]
         if dup_input_names:

snowflake/ml/model/_model_composer/model_user_file/model_user_file.py ADDED Viewed

@@ -0,0 +1,27 @@
+import os
+import pathlib
+from snowflake.ml._internal import file_utils
+class ModelUserFile:
+    """Class representing a user provided file.
+    Attributes:
+        subdirectory_name: A local path where model related files should be dumped to.
+        local_path: A list of ModelRuntime objects managing the runtimes and environment in the MODEL object.
+    """
+    USER_FILES_DIR_REL_PATH = "user_files"
+    def __init__(self, subdirectory_name: pathlib.PurePosixPath, local_path: pathlib.Path) -> None:
+        self.subdirectory_name = subdirectory_name
+        self.local_path = local_path
+    def save(self, workspace_path: pathlib.Path) -> str:
+        user_files_path = workspace_path / ModelUserFile.USER_FILES_DIR_REL_PATH / self.subdirectory_name
+        user_files_path.mkdir(parents=True, exist_ok=True)
+        # copy the file to the workspace
+        file_utils.copy_file_or_tree(str(self.local_path), str(user_files_path))
+        return os.path.join(self.subdirectory_name, self.local_path.name)

snowflake/ml/model/_packager/model_handlers/_utils.py CHANGED Viewed

@@ -1,7 +1,8 @@
 import json
 import os
+import pathlib
 import warnings
-from typing import Any, Callable, Iterable, List, Optional, Sequence, cast
+from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, cast
 import numpy as np
 import numpy.typing as npt
@@ -118,7 +119,7 @@ def get_explainability_supported_background(
     meta: model_meta.ModelMetadata,
     explain_target_method: Optional[str],
 ) -> pd.DataFrame:
-    if sample_input_data is None:
+    if sample_input_data is None or explain_target_method is None:
         return None
     if isinstance(sample_input_data, pd.DataFrame):
@@ -223,3 +224,27 @@ def get_explain_target_method(
         if method in target_methods_list:
             return method
     return None
+def save_transformers_config_with_auto_map(local_model_path: str) -> None:
+    import huggingface_hub
+    for f_path in pathlib.Path(local_model_path).iterdir():
+        if f_path.name in ["config.json", "tokenizer_config.json"]:
+            with open(f_path) as f:
+                config_dict = json.load(f)
+            # a. get repository and class_path from configs
+            auto_map_configs = cast(Dict[str, str], config_dict.get("auto_map", {}))
+            for config_name, config_value in auto_map_configs.items():
+                repository, _, class_path = config_value.rpartition("--")
+                # b. download required configs from hf hub
+                if repository:
+                    huggingface_hub.snapshot_download(repo_id=repository, local_dir=local_model_path)
+                # c. update config files
+                config_dict["auto_map"][config_name] = class_path
+            with open(f_path, "w") as f:
+                json.dump(config_dict, f)

snowflake/ml/model/_packager/model_handlers/catboost.py CHANGED Viewed

@@ -94,8 +94,8 @@ class CatBoostModelHandler(_base.BaseModelHandler["catboost.CatBoost"]):
                 sample_input_data=sample_input_data,
                 get_prediction_fn=get_prediction,
             )
-            model_task_and_output = model_task_utils.get_model_task_and_output_type(model)
-            model_meta.task = handlers_utils.validate_model_task(model_meta.task, model_task_and_output.task)
+            model_task_and_output = model_task_utils.resolve_model_task_and_output_type(model, model_meta.task)
+            model_meta.task = model_task_and_output.task
             if enable_explainability:
                 explain_target_method = handlers_utils.get_explain_target_method(model_meta, cls.EXPLAIN_TARGET_METHODS)
                 model_meta = handlers_utils.add_explain_method_signature(
@@ -227,7 +227,7 @@ class CatBoostModelHandler(_base.BaseModelHandler["catboost.CatBoost"]):
                     import shap
                     explainer = shap.TreeExplainer(raw_model)
-                    df = handlers_utils.convert_explanations_to_2D_df(raw_model, explainer(X).values)
+                    df = handlers_utils.convert_explanations_to_2D_df(raw_model, explainer.shap_values(X))
                     return model_signature_utils.rename_pandas_df(df, signature.outputs)
                 if target_method == "explain":

snowflake-ml-python 1.7.2__py3-none-any.whl → 1.7.3__py3-none-any.whl

snowflake-ml-python 1.7.2py3-none-any.whl → 1.7.3py3-none-any.whl