PyPI - snowflake-ml-python - Versions diffs - 1.21.0__py3-none-any.whl → 1.23.0__py3-none-any.whl - Mend

snowflake-ml-python 1.21.0py3-none-any.whl → 1.23.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

snowflake/ml/model/_client/sql/model_version.py CHANGED Viewed

@@ -22,6 +22,14 @@ def _normalize_url_for_sql(url: str) -> str:
     return f"'{url}'"
+def _format_param_value(value: Any) -> str:
+    if isinstance(value, str):
+        return f"'{snowpark_utils.escape_single_quotes(value)}'"  # type: ignore[no-untyped-call]
+    elif value is None:
+        return "NULL"
+    return str(value)
 class ModelVersionSQLClient(_base._BaseSQLClient):
     FUNCTION_NAME_COL_NAME = "name"
     FUNCTION_RETURN_TYPE_COL_NAME = "return_type"
@@ -354,6 +362,7 @@ class ModelVersionSQLClient(_base._BaseSQLClient):
         input_args: list[sql_identifier.SqlIdentifier],
         returns: list[tuple[str, spt.DataType, sql_identifier.SqlIdentifier]],
         statement_params: Optional[dict[str, Any]] = None,
+        params: Optional[list[tuple[sql_identifier.SqlIdentifier, Any]]] = None,
     ) -> dataframe.DataFrame:
         with_statements = []
         if len(input_df.queries["queries"]) == 1 and len(input_df.queries["post_actions"]) == 0:
@@ -392,10 +401,17 @@ class ModelVersionSQLClient(_base._BaseSQLClient):
         args_sql = ", ".join(args_sql_list)
-        wide_input = len(input_args) > constants.SNOWPARK_UDF_INPUT_COL_LIMIT
+        if params:
+            param_sql = ", ".join(_format_param_value(val) for _, val in params)
+            args_sql = f"{args_sql}, {param_sql}" if args_sql else param_sql
+        total_args = len(input_args) + (len(params) if params else 0)
+        wide_input = total_args > constants.SNOWPARK_UDF_INPUT_COL_LIMIT
         if wide_input:
-            input_args_sql = ", ".join(f"'{arg}', {arg.identifier()}" for arg in input_args)
-            args_sql = f"object_construct_keep_null({input_args_sql})"
+            parts = [f"'{arg}', {arg.identifier()}" for arg in input_args]
+            if params:
+                parts.extend(f"'{name}', {_format_param_value(val)}" for name, val in params)
+            args_sql = f"object_construct_keep_null({', '.join(parts)})"
         sql = textwrap.dedent(
             f"""WITH {','.join(with_statements)}
@@ -439,6 +455,7 @@ class ModelVersionSQLClient(_base._BaseSQLClient):
         statement_params: Optional[dict[str, Any]] = None,
         is_partitioned: bool = True,
         explain_case_sensitive: bool = False,
+        params: Optional[list[tuple[sql_identifier.SqlIdentifier, Any]]] = None,
     ) -> dataframe.DataFrame:
         with_statements = []
         if len(input_df.queries["queries"]) == 1 and len(input_df.queries["post_actions"]) == 0:
@@ -477,10 +494,17 @@ class ModelVersionSQLClient(_base._BaseSQLClient):
         args_sql = ", ".join(args_sql_list)
-        wide_input = len(input_args) > constants.SNOWPARK_UDF_INPUT_COL_LIMIT
+        if params:
+            param_sql = ", ".join(_format_param_value(val) for _, val in params)
+            args_sql = f"{args_sql}, {param_sql}" if args_sql else param_sql
+        total_args = len(input_args) + (len(params) if params else 0)
+        wide_input = total_args > constants.SNOWPARK_UDF_INPUT_COL_LIMIT
         if wide_input:
-            input_args_sql = ", ".join(f"'{arg}', {arg.identifier()}" for arg in input_args)
-            args_sql = f"object_construct_keep_null({input_args_sql})"
+            parts = [f"'{arg}', {arg.identifier()}" for arg in input_args]
+            if params:
+                parts.extend(f"'{name}', {_format_param_value(val)}" for name, val in params)
+            args_sql = f"object_construct_keep_null({', '.join(parts)})"
         sql = textwrap.dedent(
             f"""WITH {','.join(with_statements)}

snowflake/ml/model/_client/sql/service.py CHANGED Viewed

@@ -20,6 +20,15 @@ from snowflake.snowpark._internal import utils as snowpark_utils
 logger = logging.getLogger(__name__)
+def _format_param_value(value: Any) -> str:
+    if isinstance(value, str):
+        return f"'{snowpark_utils.escape_single_quotes(value)}'"  # type: ignore[no-untyped-call]
+    elif value is None:
+        return "NULL"
+    return str(value)
 # Using this token instead of '?' to avoid escaping issues
 # After quotes are escaped, we replace this token with '|| ? ||'
 QMARK_RESERVED_TOKEN = "<QMARK_RESERVED_TOKEN>"
@@ -38,22 +47,6 @@ class ServiceStatus(enum.Enum):
     INTERNAL_ERROR = "INTERNAL_ERROR"
-class InstanceStatus(enum.Enum):
-    PENDING = "PENDING"
-    READY = "READY"
-    FAILED = "FAILED"
-    TERMINATING = "TERMINATING"
-    SUCCEEDED = "SUCCEEDED"
-class ContainerStatus(enum.Enum):
-    PENDING = "PENDING"
-    READY = "READY"
-    DONE = "DONE"
-    FAILED = "FAILED"
-    UNKNOWN = "UNKNOWN"
 @dataclasses.dataclass
 class ServiceStatusInfo:
     """
@@ -63,8 +56,8 @@ class ServiceStatusInfo:
     service_status: ServiceStatus
     instance_id: Optional[int] = None
-    instance_status: Optional[InstanceStatus] = None
-    container_status: Optional[ContainerStatus] = None
+    instance_status: Optional[str] = None
+    container_status: Optional[str] = None
     message: Optional[str] = None
@@ -140,6 +133,7 @@ class ServiceSQLClient(_base._BaseSQLClient):
         input_args: list[sql_identifier.SqlIdentifier],
         returns: list[tuple[str, spt.DataType, sql_identifier.SqlIdentifier]],
         statement_params: Optional[dict[str, Any]] = None,
+        params: Optional[list[tuple[sql_identifier.SqlIdentifier, Any]]] = None,
     ) -> dataframe.DataFrame:
         with_statements = []
         actual_database_name = database_name or self._database_name
@@ -170,10 +164,17 @@ class ServiceSQLClient(_base._BaseSQLClient):
             args_sql_list.append(input_arg_value)
         args_sql = ", ".join(args_sql_list)
-        wide_input = len(input_args) > constants.SNOWPARK_UDF_INPUT_COL_LIMIT
+        if params:
+            param_sql = ", ".join(_format_param_value(val) for _, val in params)
+            args_sql = f"{args_sql}, {param_sql}" if args_sql else param_sql
+        total_args = len(input_args) + (len(params) if params else 0)
+        wide_input = total_args > constants.SNOWPARK_UDF_INPUT_COL_LIMIT
         if wide_input:
-            input_args_sql = ", ".join(f"'{arg}', {arg.identifier()}" for arg in input_args)
-            args_sql = f"object_construct_keep_null({input_args_sql})"
+            parts = [f"'{arg}', {arg.identifier()}" for arg in input_args]
+            if params:
+                parts.extend(f"'{name}', {_format_param_value(val)}" for name, val in params)
+            args_sql = f"object_construct_keep_null({', '.join(parts)})"
         fully_qualified_service_name = self.fully_qualified_object_name(
             actual_database_name, actual_schema_name, service_name
@@ -255,17 +256,12 @@ class ServiceSQLClient(_base._BaseSQLClient):
         )
         statuses = []
         for r in rows:
-            instance_status, container_status = None, None
-            if r[ServiceSQLClient.INSTANCE_STATUS] is not None:
-                instance_status = InstanceStatus(r[ServiceSQLClient.INSTANCE_STATUS])
-            if r[ServiceSQLClient.CONTAINER_STATUS] is not None:
-                container_status = ContainerStatus(r[ServiceSQLClient.CONTAINER_STATUS])
             statuses.append(
                 ServiceStatusInfo(
                     service_status=ServiceStatus(r[ServiceSQLClient.SERVICE_STATUS]),
                     instance_id=r[ServiceSQLClient.INSTANCE_ID],
-                    instance_status=instance_status,
-                    container_status=container_status,
+                    instance_status=r[ServiceSQLClient.INSTANCE_STATUS],
+                    container_status=r[ServiceSQLClient.CONTAINER_STATUS],
                     message=r[ServiceSQLClient.MESSAGE] if include_message else None,
                 )
             )
@@ -301,7 +297,12 @@ class ServiceSQLClient(_base._BaseSQLClient):
             False if service doesn't have proxy container
         """
         try:
-            spec_raw = yaml.safe_load(row[ServiceSQLClient.DESC_SERVICE_SPEC_COL_NAME])
+            spec_yaml = row[ServiceSQLClient.DESC_SERVICE_SPEC_COL_NAME]
+            if spec_yaml is None:
+                return False
+            spec_raw = yaml.safe_load(spec_yaml)
+            if spec_raw is None:
+                return False
             spec = cast(dict[str, Any], spec_raw)
             proxy_container_spec = next(

snowflake/ml/model/_model_composer/model_composer.py CHANGED Viewed

@@ -131,7 +131,7 @@ class ModelComposer:
         python_version: Optional[str] = None,
         user_files: Optional[dict[str, list[str]]] = None,
         ext_modules: Optional[list[ModuleType]] = None,
-        code_paths: Optional[list[str]] = None,
+        code_paths: Optional[list[model_types.CodePathLike]] = None,
         task: model_types.Task = model_types.Task.UNKNOWN,
         experiment_info: Optional["ExperimentInfo"] = None,
         options: Optional[model_types.ModelSaveOption] = None,

snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py CHANGED Viewed

@@ -39,6 +39,10 @@ class ModelMethodSignatureFieldWithName(ModelMethodSignatureField):
     name: Required[str]
+class ModelMethodSignatureFieldWithNameAndDefault(ModelMethodSignatureFieldWithName):
+    default: Required[Any]
 class ModelFunctionMethodDict(TypedDict):
     name: Required[str]
     runtime: Required[str]
@@ -46,6 +50,7 @@ class ModelFunctionMethodDict(TypedDict):
     handler: Required[str]
     inputs: Required[list[ModelMethodSignatureFieldWithName]]
     outputs: Required[Union[list[ModelMethodSignatureField], list[ModelMethodSignatureFieldWithName]]]
+    params: NotRequired[list[ModelMethodSignatureFieldWithNameAndDefault]]
     volatility: NotRequired[str]

snowflake/ml/model/_model_composer/model_method/infer_function.py_template CHANGED Viewed

@@ -41,11 +41,29 @@ features = meta.signatures[TARGET_METHOD].inputs
 input_cols = [feature.name for feature in features]
 dtype_map = {{feature.name: feature.as_dtype() for feature in features}}
+# Load inference parameters from method signature (if any)
+param_cols = []
+param_defaults = {{}}
+if hasattr(meta.signatures[TARGET_METHOD], "params") and meta.signatures[TARGET_METHOD].params:
+    for param_spec in meta.signatures[TARGET_METHOD].params:
+        param_cols.append(param_spec.name)
+        param_defaults[param_spec.name] = param_spec.default_value
 # Actual function
 @vectorized(input=pd.DataFrame, max_batch_size=MAX_BATCH_SIZE, flatten_object_input={wide_input})
 def {function_name}(df: pd.DataFrame) -> dict:
-    df.columns = input_cols
-    input_df = df.astype(dtype=dtype_map)
-    predictions_df = runner(input_df[input_cols])
+    df.columns = input_cols + param_cols
+    input_df = df[input_cols].astype(dtype=dtype_map)
+    # Extract runtime param values, using defaults if None
+    method_params = {{}}
+    for col in param_cols:
+        val = df[col].iloc[0]
+        if val is None or pd.isna(val):
+            method_params[col] = param_defaults[col]
+        else:
+            method_params[col] = val
+    predictions_df = runner(input_df, **method_params)
     return predictions_df.replace({{pd.NA: None, np.nan: None}}).to_dict("records")

snowflake/ml/model/_model_composer/model_method/infer_partitioned.py_template CHANGED Viewed

@@ -45,11 +45,29 @@ features = meta.signatures[TARGET_METHOD].inputs
 input_cols = [feature.name for feature in features]
 dtype_map = {{feature.name: feature.as_dtype() for feature in features}}
+# Load inference parameters from method signature (if any)
+param_cols = []
+param_defaults = {{}}
+if hasattr(meta.signatures[TARGET_METHOD], "params") and meta.signatures[TARGET_METHOD].params:
+    for param_spec in meta.signatures[TARGET_METHOD].params:
+        param_cols.append(param_spec.name)
+        param_defaults[param_spec.name] = param_spec.default_value
 # Actual table function
 class {function_name}:
     @vectorized(input=pd.DataFrame, flatten_object_input={wide_input})
     def end_partition(self, df: pd.DataFrame) -> pd.DataFrame:
-        df.columns = input_cols
-        input_df = df.astype(dtype=dtype_map)
-        return runner(input_df[input_cols])
+        df.columns = input_cols + param_cols
+        input_df = df[input_cols].astype(dtype=dtype_map)
+        # Extract runtime param values, using defaults if None
+        method_params = {{}}
+        for col in param_cols:
+            val = df[col].iloc[0]
+            if val is None or pd.isna(val):
+                method_params[col] = param_defaults[col]
+            else:
+                method_params[col] = val
+        return runner(input_df, **method_params)

snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template CHANGED Viewed

@@ -40,11 +40,29 @@ features = meta.signatures[TARGET_METHOD].inputs
 input_cols = [feature.name for feature in features]
 dtype_map = {{feature.name: feature.as_dtype() for feature in features}}
+# Load inference parameters from method signature (if any)
+param_cols = []
+param_defaults = {{}}
+if hasattr(meta.signatures[TARGET_METHOD], "params") and meta.signatures[TARGET_METHOD].params:
+    for param_spec in meta.signatures[TARGET_METHOD].params:
+        param_cols.append(param_spec.name)
+        param_defaults[param_spec.name] = param_spec.default_value
 # Actual table function
 class {function_name}:
     @vectorized(input=pd.DataFrame, max_batch_size=MAX_BATCH_SIZE, flatten_object_input={wide_input})
     def process(self, df: pd.DataFrame) -> pd.DataFrame:
-        df.columns = input_cols
-        input_df = df.astype(dtype=dtype_map)
-        return runner(input_df[input_cols])
+        df.columns = input_cols + param_cols
+        input_df = df[input_cols].astype(dtype=dtype_map)
+        # Extract runtime param values, using defaults if None
+        method_params = {{}}
+        for col in param_cols:
+            val = df[col].iloc[0]
+            if val is None or pd.isna(val):
+                method_params[col] = param_defaults[col]
+            else:
+                method_params[col] = val
+        return runner(input_df, **method_params)

snowflake/ml/model/_model_composer/model_method/model_method.py CHANGED Viewed

@@ -105,7 +105,7 @@ class ModelMethod:
         except ValueError as e:
             raise ValueError(
                 f"Your target method {self.target_method} cannot be resolved as valid SQL identifier. "
-                "Try specify `case_sensitive` as True."
+                "Try specifying `case_sensitive` as True."
             ) from e
         if self.target_method not in self.model_meta.signatures.keys():
@@ -127,12 +127,42 @@ class ModelMethod:
         except ValueError as e:
             raise ValueError(
                 f"Your feature {feature.name} cannot be resolved as valid SQL identifier. "
-                "Try specify `case_sensitive` as True."
+                "Try specifying `case_sensitive` as True."
             ) from e
         return model_manifest_schema.ModelMethodSignatureFieldWithName(
             name=feature_name.resolved(), type=type_utils.convert_sp_to_sf_type(feature.as_snowpark_type())
         )
+    @staticmethod
+    def _flatten_params(params: list[model_signature.BaseParamSpec]) -> list[model_signature.ParamSpec]:
+        """Flatten ParamGroupSpec into leaf ParamSpec items."""
+        result: list[model_signature.ParamSpec] = []
+        for param in params:
+            if isinstance(param, model_signature.ParamSpec):
+                result.append(param)
+            elif isinstance(param, model_signature.ParamGroupSpec):
+                result.extend(ModelMethod._flatten_params(param.specs))
+        return result
+    @staticmethod
+    def _get_method_arg_from_param(
+        param_spec: model_signature.ParamSpec,
+        case_sensitive: bool = False,
+    ) -> model_manifest_schema.ModelMethodSignatureFieldWithNameAndDefault:
+        try:
+            param_name = sql_identifier.SqlIdentifier(param_spec.name, case_sensitive=case_sensitive)
+        except ValueError as e:
+            raise ValueError(
+                f"Your parameter {param_spec.name} cannot be resolved as valid SQL identifier. "
+                "Try specifying `case_sensitive` as True."
+            ) from e
+        default_value = param_spec.default_value if param_spec.default_value is None else str(param_spec.default_value)
+        return model_manifest_schema.ModelMethodSignatureFieldWithNameAndDefault(
+            name=param_name.resolved(),
+            type=type_utils.convert_sp_to_sf_type(param_spec.dtype.as_snowpark_type()),
+            default=default_value,
+        )
     def save(
         self, workspace_path: pathlib.Path, options: Optional[function_generator.FunctionGenerateOptions] = None
     ) -> model_manifest_schema.ModelMethodDict:
@@ -182,6 +212,36 @@ class ModelMethod:
             inputs=input_list,
             outputs=outputs,
         )
+        # Add parameters if signature has parameters
+        if self.model_meta.signatures[self.target_method].params:
+            flat_params = ModelMethod._flatten_params(list(self.model_meta.signatures[self.target_method].params))
+            param_list = [
+                ModelMethod._get_method_arg_from_param(
+                    param_spec, case_sensitive=self.options.get("case_sensitive", False)
+                )
+                for param_spec in flat_params
+            ]
+            param_name_counter = collections.Counter([param_info["name"] for param_info in param_list])
+            dup_param_names = [k for k, v in param_name_counter.items() if v > 1]
+            if dup_param_names:
+                raise ValueError(
+                    f"Found duplicate parameter named resolved as {', '.join(dup_param_names)} in the method"
+                    f" {self.target_method}. This might be because you have parameters with same letters but "
+                    "different cases. In this case, set case_sensitive as True for those methods to distinguish them."
+                )
+            # Check for name collisions between parameters and inputs using existing counters
+            collision_names = [name for name in param_name_counter if name in input_name_counter]
+            if collision_names:
+                raise ValueError(
+                    f"Found parameter(s) with the same name as input feature(s): {', '.join(sorted(collision_names))} "
+                    f"in the method {self.target_method}. Parameters and inputs must have distinct names. "
+                    "Try using case_sensitive=True if the names differ only by case."
+                )
+            method_dict["params"] = param_list
         should_set_volatility = (
             platform_capabilities.PlatformCapabilities.get_instance().is_set_module_functions_volatility_from_manifest()
         )

snowflake/ml/model/_packager/model_handlers/custom.py CHANGED Viewed

@@ -86,6 +86,9 @@ class CustomModelHandler(_base.BaseModelHandler["custom_model.CustomModel"]):
                 get_prediction_fn=get_prediction,
             )
+            # Add parameters extracted from custom model inference methods to signatures
+            cls._add_method_parameters_to_signatures(model, model_meta)
         model_blob_path = os.path.join(model_blobs_dir_path, name)
         os.makedirs(model_blob_path, exist_ok=True)
         if model.context.artifacts:
@@ -188,6 +191,55 @@ class CustomModelHandler(_base.BaseModelHandler["custom_model.CustomModel"]):
         assert isinstance(model, custom_model.CustomModel)
         return model
+    @classmethod
+    def _add_method_parameters_to_signatures(
+        cls,
+        model: "custom_model.CustomModel",
+        model_meta: model_meta_api.ModelMetadata,
+    ) -> None:
+        """Extract parameters from custom model inference methods and add them to signatures.
+        For each inference method, if the signature doesn't already have parameters and the method
+        has keyword-only parameters with defaults, create ParamSpecs and add them to the signature.
+        Args:
+            model: The custom model instance.
+            model_meta: The model metadata containing signatures to augment.
+        """
+        for method in model._get_infer_methods():
+            method_name = method.__name__
+            if method_name not in model_meta.signatures:
+                continue
+            sig = model_meta.signatures[method_name]
+            # Skip if the signature already has parameters (user-provided or previously set)
+            if sig.params:
+                continue
+            # Extract parameters from the method
+            method_params = custom_model.get_method_parameters(method)
+            if not method_params:
+                continue
+            # Create ParamSpecs from the method parameters
+            param_specs = []
+            for param_name, param_type, param_default in method_params:
+                dtype = model_signature.DataType.from_python_type(param_type)
+                param_spec = model_signature.ParamSpec(
+                    name=param_name,
+                    dtype=dtype,
+                    default_value=param_default,
+                )
+                param_specs.append(param_spec)
+            # Create a new signature with parameters
+            model_meta.signatures[method_name] = model_signature.ModelSignature(
+                inputs=sig.inputs,
+                outputs=sig.outputs,
+                params=param_specs,
+            )
     @classmethod
     def convert_as_custom_model(
         cls,

snowflake/ml/model/_packager/model_handlers/huggingface.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import io
 import json
 import logging
 import os
@@ -28,7 +29,10 @@ from snowflake.ml.model._packager.model_meta import (
     model_meta as model_meta_api,
     model_meta_schema,
 )
-from snowflake.ml.model._signatures import utils as model_signature_utils
+from snowflake.ml.model._signatures import (
+    core as model_signature_core,
+    utils as model_signature_utils,
+)
 from snowflake.ml.model.models import (
     huggingface as huggingface_base,
     huggingface_pipeline,
@@ -530,7 +534,10 @@ class TransformersPipelineHandler(
                         # verify when the target method is __call__ and
                         # if the signature is default text-generation signature
                         # then use the HuggingFaceOpenAICompatibleModel to wrap the pipeline
-                        if signature == openai_signatures._OPENAI_CHAT_SIGNATURE_SPEC:
+                        if (
+                            signature == openai_signatures._OPENAI_CHAT_SIGNATURE_SPEC
+                            or signature == openai_signatures._OPENAI_CHAT_SIGNATURE_SPEC_WITH_CONTENT_FORMAT_STRING
+                        ):
                             wrapped_model = HuggingFaceOpenAICompatibleModel(pipeline=raw_model)
                             temp_res = X.apply(
@@ -554,6 +561,19 @@ class TransformersPipelineHandler(
                             else:
                                 input_data = X[signature.inputs[0].name].to_list()
                             temp_res = getattr(raw_model, target_method)(input_data)
+                    elif isinstance(raw_model, transformers.ImageClassificationPipeline):
+                        # Image classification expects PIL Images. Convert bytes to PIL Images.
+                        from PIL import Image
+                        input_col = signature.inputs[0].name
+                        images = [Image.open(io.BytesIO(img_bytes)) for img_bytes in X[input_col].to_list()]
+                        temp_res = getattr(raw_model, target_method)(images)
+                    elif isinstance(raw_model, transformers.AutomaticSpeechRecognitionPipeline):
+                        # ASR pipeline accepts a single audio input (bytes, str, np.ndarray, or dict),
+                        # not a list. Process each audio input individually.
+                        input_col = signature.inputs[0].name
+                        audio_inputs = X[input_col].to_list()
+                        temp_res = [getattr(raw_model, target_method)(audio) for audio in audio_inputs]
                     else:
                         # TODO: remove conversational pipeline code
                         # For others, we could offer the whole dataframe as a list.
@@ -615,11 +635,14 @@ class TransformersPipelineHandler(
                         temp_res = [[conv.generated_responses] for conv in temp_res]
                     # To concat those who outputs a list with one input.
-                    if isinstance(temp_res[0], list):
-                        if isinstance(temp_res[0][0], dict):
-                            res = pd.DataFrame({0: temp_res})
-                        else:
-                            res = pd.DataFrame(temp_res)
+                    # if `signature.outputs` is single valued and is a FeatureGroupSpec,
+                    # we create a DataFrame with one column and the values are stored as a dictionary.
+                    # Otherwise, we create a DataFrame with the output as the column.
+                    if len(signature.outputs) == 1 and isinstance(
+                        signature.outputs[0], model_signature_core.FeatureGroupSpec
+                    ):
+                        # creating a dataframe with one column
+                        res = pd.DataFrame({signature.outputs[0].name: temp_res})
                     else:
                         res = pd.DataFrame(temp_res)
@@ -702,7 +725,6 @@ class HuggingFaceOpenAICompatibleModel:
         self.pipeline = pipeline
         self.model = self.pipeline.model
         self.tokenizer = self.pipeline.tokenizer
         self.model_name = self.pipeline.model.name_or_path
         if self.tokenizer.pad_token is None:
@@ -724,11 +746,33 @@ class HuggingFaceOpenAICompatibleModel:
         Returns:
             The formatted prompt string ready for model input.
         """
+        final_messages = []
+        for message in messages:
+            if isinstance(message.get("content", ""), str):
+                final_messages.append({"role": message.get("role", "user"), "content": message.get("content", "")})
+            else:
+                # extract only the text from the content
+                # sample data:
+                # {
+                #     "role": "user",
+                #     "content": [
+                #         {"type": "text", "text": "Hello, how are you?"}, # extracted
+                #         {"type": "image", "image": "https://example.com/image.png"}, # not extracted
+                #     ],
+                # }
+                for content_part in message.get("content", []):
+                    if content_part.get("type", "") == "text":
+                        final_messages.append(
+                            {"role": message.get("role", "user"), "content": content_part.get("text", "")}
+                        )
+                    # TODO: implement other content types
         # Use the tokenizer's apply_chat_template method.
         # We ensured a template exists in __init__.
         if hasattr(self.tokenizer, "apply_chat_template"):
             return self.tokenizer.apply_chat_template(  # type: ignore[no-any-return]
-                messages,
+                final_messages,
                 tokenize=False,
                 add_generation_prompt=True,
             )
@@ -736,7 +780,7 @@ class HuggingFaceOpenAICompatibleModel:
         # Fallback for very old transformers without apply_chat_template
         # Manually apply ChatML-like formatting
         prompt = ""
-        for message in messages:
+        for message in final_messages:
             role = message.get("role", "user")
             content = message.get("content", "")
             prompt += f"<|im_start|>{role}\n{content}<|im_end|>\n"

snowflake-ml-python 1.21.0__py3-none-any.whl → 1.23.0__py3-none-any.whl

snowflake-ml-python 1.21.0py3-none-any.whl → 1.23.0py3-none-any.whl