PyPI - snowflake-ml-python - Versions diffs - 1.11.0__py3-none-any.whl → 1.12.0__py3-none-any.whl - Mend

snowflake-ml-python 1.11.0py3-none-any.whl → 1.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (192) hide show

snowflake/ml/model/_client/service/model_deployment_spec.py CHANGED Viewed

@@ -194,16 +194,14 @@ class ModelDeploymentSpec:
         self,
         job_name: sql_identifier.SqlIdentifier,
         inference_compute_pool_name: sql_identifier.SqlIdentifier,
+        function_name: str,
+        input_stage_location: str,
+        output_stage_location: str,
+        completion_filename: str,
+        input_file_pattern: str,
         warehouse: sql_identifier.SqlIdentifier,
-        target_method: str,
-        input_table_name: sql_identifier.SqlIdentifier,
-        output_table_name: sql_identifier.SqlIdentifier,
         job_database_name: Optional[sql_identifier.SqlIdentifier] = None,
         job_schema_name: Optional[sql_identifier.SqlIdentifier] = None,
-        input_table_database_name: Optional[sql_identifier.SqlIdentifier] = None,
-        input_table_schema_name: Optional[sql_identifier.SqlIdentifier] = None,
-        output_table_database_name: Optional[sql_identifier.SqlIdentifier] = None,
-        output_table_schema_name: Optional[sql_identifier.SqlIdentifier] = None,
         cpu: Optional[str] = None,
         memory: Optional[str] = None,
         gpu: Optional[Union[str, int]] = None,
@@ -215,16 +213,14 @@ class ModelDeploymentSpec:
         Args:
             job_name: Name of the job.
             inference_compute_pool_name: Compute pool for inference.
+            warehouse: Warehouse for the job.
+            function_name: Function name.
+            input_stage_location: Stage location for input data.
+            output_stage_location: Stage location for output data.
             job_database_name: Database name for the job.
             job_schema_name: Schema name for the job.
-            warehouse: Warehouse for the job.
-            target_method: Target method for inference.
-            input_table_name: Input table name.
-            output_table_name: Output table name.
-            input_table_database_name: Database for input table.
-            input_table_schema_name: Schema for input table.
-            output_table_database_name: Database for output table.
-            output_table_schema_name: Schema for output table.
+            input_file_pattern: Pattern for input files (optional).
+            completion_filename: Name of completion file (default: "completion.txt").
             cpu: CPU requirement.
             memory: Memory requirement.
             gpu: GPU requirement.
@@ -242,41 +238,28 @@ class ModelDeploymentSpec:
         saved_job_database = job_database_name or self.database
         saved_job_schema = job_schema_name or self.schema
-        input_table_database_name = input_table_database_name or self.database
-        input_table_schema_name = input_table_schema_name or self.schema
-        output_table_database_name = output_table_database_name or self.database
-        output_table_schema_name = output_table_schema_name or self.schema
         assert saved_job_database is not None
         assert saved_job_schema is not None
-        assert input_table_database_name is not None
-        assert input_table_schema_name is not None
-        assert output_table_database_name is not None
-        assert output_table_schema_name is not None
         fq_job_name = identifier.get_schema_level_object_identifier(
             saved_job_database.identifier(), saved_job_schema.identifier(), job_name.identifier()
         )
-        fq_input_table_name = identifier.get_schema_level_object_identifier(
-            input_table_database_name.identifier(),
-            input_table_schema_name.identifier(),
-            input_table_name.identifier(),
-        )
-        fq_output_table_name = identifier.get_schema_level_object_identifier(
-            output_table_database_name.identifier(),
-            output_table_schema_name.identifier(),
-            output_table_name.identifier(),
-        )
         self._add_inference_spec(cpu, memory, gpu, num_workers, max_batch_rows)
         self._job = model_deployment_spec_schema.Job(
             name=fq_job_name,
             compute_pool=inference_compute_pool_name.identifier(),
-            warehouse=warehouse.identifier(),
-            target_method=target_method,
-            input_table_name=fq_input_table_name,
-            output_table_name=fq_output_table_name,
+            warehouse=warehouse.identifier() if warehouse else None,
+            function_name=function_name,
+            input=model_deployment_spec_schema.Input(
+                input_stage_location=input_stage_location, input_file_pattern=input_file_pattern
+            ),
+            output=model_deployment_spec_schema.Output(
+                output_stage_location=output_stage_location,
+                completion_filename=completion_filename,
+            ),
             **self._inference_spec,
         )
         return self

snowflake/ml/model/_client/service/model_deployment_spec_schema.py CHANGED Viewed

@@ -35,6 +35,16 @@ class Service(BaseModel):
     inference_engine_spec: Optional[InferenceEngineSpec] = None
+class Input(BaseModel):
+    input_stage_location: str
+    input_file_pattern: str
+class Output(BaseModel):
+    output_stage_location: str
+    completion_filename: str
 class Job(BaseModel):
     name: str
     compute_pool: str
@@ -43,10 +53,10 @@ class Job(BaseModel):
     gpu: Optional[str] = None
     num_workers: Optional[int] = None
     max_batch_rows: Optional[int] = None
-    warehouse: str
-    target_method: str
-    input_table_name: str
-    output_table_name: str
+    warehouse: Optional[str] = None
+    function_name: str
+    input: Input
+    output: Output
 class LogModelArgs(BaseModel):

snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import json
 import logging
 import os
+import time
+import uuid
 import warnings
 from typing import TYPE_CHECKING, Any, Callable, Optional, Union, cast, final
@@ -11,7 +13,12 @@ from packaging import version
 from typing_extensions import TypeGuard, Unpack
 from snowflake.ml._internal import type_utils
-from snowflake.ml.model import custom_model, model_signature, type_hints as model_types
+from snowflake.ml.model import (
+    custom_model,
+    model_signature,
+    openai_signatures,
+    type_hints as model_types,
+)
 from snowflake.ml.model._packager.model_env import model_env
 from snowflake.ml.model._packager.model_handlers import _base, _utils as handlers_utils
 from snowflake.ml.model._packager.model_handlers_migrator import base_migrator
@@ -151,7 +158,10 @@ class HuggingFacePipelineHandler(
             assert isinstance(model, huggingface_pipeline.HuggingFacePipelineModel)
             params = {**model.__dict__, **model.model_kwargs}
-        inferred_pipe_sig = model_signature_utils.huggingface_pipeline_signature_auto_infer(task, params=params)
+        inferred_pipe_sig = model_signature_utils.huggingface_pipeline_signature_auto_infer(
+            task,
+            params=params,
+        )
         if not is_sub_model:
             target_methods = handlers_utils.get_target_methods(
@@ -401,6 +411,34 @@ class HuggingFacePipelineHandler(
                             ),
                             axis=1,
                         ).to_list()
+                    elif raw_model.task == "text-generation":
+                        # verify when the target method is __call__ and
+                        # if the signature is default text-generation signature
+                        # then use the HuggingFaceOpenAICompatibleModel to wrap the pipeline
+                        if signature == openai_signatures._OPENAI_CHAT_SIGNATURE_SPEC:
+                            wrapped_model = HuggingFaceOpenAICompatibleModel(pipeline=raw_model)
+                            temp_res = X.apply(
+                                lambda row: wrapped_model.generate_chat_completion(
+                                    messages=row["messages"],
+                                    max_completion_tokens=row.get("max_completion_tokens", None),
+                                    temperature=row.get("temperature", None),
+                                    stop_strings=row.get("stop", None),
+                                    n=row.get("n", 1),
+                                    stream=row.get("stream", False),
+                                    top_p=row.get("top_p", 1.0),
+                                    frequency_penalty=row.get("frequency_penalty", None),
+                                    presence_penalty=row.get("presence_penalty", None),
+                                ),
+                                axis=1,
+                            ).to_list()
+                        else:
+                            if len(signature.inputs) > 1:
+                                input_data = X.to_dict("records")
+                            # If it is only expecting one argument, Then it is expecting a list of something.
+                            else:
+                                input_data = X[signature.inputs[0].name].to_list()
+                            temp_res = getattr(raw_model, target_method)(input_data)
                     else:
                         # For others, we could offer the whole dataframe as a list.
                         # Some of them may need some conversion
@@ -527,3 +565,170 @@ class HuggingFacePipelineHandler(
         hg_pipe_model = _HFPipelineModel(custom_model.ModelContext())
         return hg_pipe_model
+class HuggingFaceOpenAICompatibleModel:
+    """
+    A class to wrap a Hugging Face text generation model and provide an
+    OpenAI-compatible chat completion interface.
+    """
+    def __init__(self, pipeline: "transformers.Pipeline") -> None:
+        """
+        Initializes the model and tokenizer.
+        Args:
+            pipeline (transformers.pipeline): The Hugging Face pipeline to wrap.
+        """
+        self.pipeline = pipeline
+        self.model = self.pipeline.model
+        self.tokenizer = self.pipeline.tokenizer
+        self.model_name = self.pipeline.model.name_or_path
+    def _apply_chat_template(self, messages: list[dict[str, Any]]) -> str:
+        """
+        Applies a chat template to a list of messages.
+        If the tokenizer has a chat template, it uses that.
+        Otherwise, it falls back to a simple concatenation.
+        Args:
+            messages (list[dict]): A list of message dictionaries, e.g.,
+                                   [{"role": "user", "content": "Hello!"}, ...]
+        Returns:
+            The formatted prompt string ready for model input.
+        """
+        if hasattr(self.tokenizer, "apply_chat_template") and self.tokenizer.chat_template:
+            # Use the tokenizer's built-in chat template if available
+            # `tokenize=False` means it returns a string, not token IDs
+            return self.tokenizer.apply_chat_template(  # type: ignore[no-any-return]
+                messages,
+                tokenize=False,
+                add_generation_prompt=True,
+            )
+        else:
+            # Fallback to a simple concatenation for models without a specific chat template
+            # This is a basic example; real chat models often need specific formatting.
+            prompt = ""
+            for message in messages:
+                role = message.get("role", "user")
+                content = message.get("content", "")
+                if role == "system":
+                    prompt += f"System: {content}\n"
+                elif role == "user":
+                    prompt += f"User: {content}\n"
+                elif role == "assistant":
+                    prompt += f"Assistant: {content}\n"
+            prompt += "Assistant:"  # Indicate that the assistant should respond
+            return prompt
+    def generate_chat_completion(
+        self,
+        messages: list[dict[str, Any]],
+        max_completion_tokens: Optional[int] = None,
+        stream: Optional[bool] = False,
+        stop_strings: Optional[list[str]] = None,
+        temperature: Optional[float] = None,
+        top_p: Optional[float] = None,
+        frequency_penalty: Optional[float] = None,
+        presence_penalty: Optional[float] = None,
+        n: int = 1,
+    ) -> dict[str, Any]:
+        """
+        Generates a chat completion response in an OpenAI-compatible format.
+        Args:
+            messages (list[dict]): A list of message dictionaries, e.g.,
+                                   [{"role": "system", "content": "You are a helpful assistant."},
+                                    {"role": "user", "content": "What is deep learning?"}]
+            max_completion_tokens (int): The maximum number of completion tokens to generate.
+            stop_strings (list[str]): A list of strings to stop generation.
+            temperature (float): The temperature for sampling.
+            top_p (float): The top-p value for sampling.
+            stream (bool): Whether to stream the generation.
+            frequency_penalty (float): The frequency penalty for sampling.
+            presence_penalty (float): The presence penalty for sampling.
+            n (int): The number of samples to generate.
+        Returns:
+            dict: An OpenAI-compatible dictionary representing the chat completion.
+        """
+        # Apply chat template to convert messages into a single prompt string
+        prompt_text = self._apply_chat_template(messages)
+        # Tokenize the prompt
+        inputs = self.tokenizer(
+            prompt_text,
+            return_tensors="pt",
+            padding=True,
+        )
+        prompt_tokens = inputs.input_ids.shape[1]
+        from transformers import GenerationConfig
+        generation_config = GenerationConfig(
+            max_new_tokens=max_completion_tokens,
+            temperature=temperature,
+            top_p=top_p,
+            pad_token_id=self.tokenizer.pad_token_id,
+            eos_token_id=self.tokenizer.eos_token_id,
+            stop_strings=stop_strings,
+            stream=stream,
+            repetition_penalty=frequency_penalty,
+            diversity_penalty=presence_penalty if n > 1 else None,
+            num_return_sequences=n,
+            num_beams=max(2, n),  # must be >1
+            num_beam_groups=max(2, n) if presence_penalty else 1,
+        )
+        # Generate text
+        output_ids = self.model.generate(
+            inputs.input_ids,
+            attention_mask=inputs.attention_mask,
+            generation_config=generation_config,
+        )
+        generated_texts = []
+        completion_tokens = 0
+        total_tokens = prompt_tokens
+        for output_id in output_ids:
+            # The output_ids include the input prompt
+            # Decode the generated text, excluding the input prompt
+            # so we slice to get only new tokens
+            generated_tokens = output_id[prompt_tokens:]
+            generated_text = self.tokenizer.decode(generated_tokens, skip_special_tokens=True)
+            generated_texts.append(generated_text)
+            # Calculate completion tokens
+            completion_tokens += len(generated_tokens)
+            total_tokens += len(generated_tokens)
+        choices = []
+        for i, generated_text in enumerate(generated_texts):
+            choices.append(
+                {
+                    "index": i,
+                    "message": {"role": "assistant", "content": generated_text},
+                    "logprobs": None,  # Not directly supported in this basic implementation
+                    "finish_reason": "stop",  # Assuming stop for simplicity
+                }
+            )
+        # Construct OpenAI-compatible response
+        response = {
+            "id": f"chatcmpl-{uuid.uuid4().hex}",
+            "object": "chat.completion",
+            "created": int(time.time()),
+            "model": self.model_name,
+            "choices": choices,
+            "usage": {
+                "prompt_tokens": prompt_tokens,
+                "completion_tokens": completion_tokens,
+                "total_tokens": total_tokens,
+            },
+        }
+        return response

snowflake/ml/model/_packager/model_handlers/sklearn.py CHANGED Viewed

@@ -386,7 +386,9 @@ class SKLModelHandler(_base.BaseModelHandler[Union["sklearn.base.BaseEstimator",
             predictor = model[-1] if isinstance(model, sklearn.pipeline.Pipeline) else model
             try:
                 explainer = shap.Explainer(predictor, transformed_bg_data)
-                return handlers_utils.convert_explanations_to_2D_df(model, explainer(transformed_data).values)
+                return handlers_utils.convert_explanations_to_2D_df(model, explainer(transformed_data).values).astype(
+                    np.float64, errors="ignore"
+                )
             except TypeError:
                 if isinstance(data, pd.DataFrame):
                     dtype_map = {spec.name: spec.as_dtype(force_numpy_dtype=True) for spec in input_specs}

snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py CHANGED Viewed

@@ -14,7 +14,7 @@ REQUIREMENTS = [
     "packaging>=20.9,<25",
     "pandas>=2.1.4,<3",
     "platformdirs<5",
-    "pyarrow",
+    "pyarrow<19.0.0",
     "pydantic>=2.8.2, <3",
     "pyjwt>=2.0.0, <3",
     "pytimeparse>=1.1.8,<2",
@@ -22,10 +22,10 @@ REQUIREMENTS = [
     "requests",
     "retrying>=1.3.3,<2",
     "s3fs>=2024.6.1,<2026",
-    "scikit-learn<1.6",
+    "scikit-learn<1.7",
     "scipy>=1.9,<2",
     "shap>=0.46.0,<1",
-    "snowflake-connector-python>=3.15.0,<4",
+    "snowflake-connector-python>=3.16.0,<4",
     "snowflake-snowpark-python>=1.17.0,<2,!=1.26.0",
     "snowflake.core>=1.0.2,<2",
     "sqlparse>=0.4,<1",

snowflake/ml/model/_signatures/snowpark_handler.py CHANGED Viewed

@@ -84,7 +84,7 @@ class SnowparkDataFrameHandler(base_handler.BaseDataHandler[snowflake.snowpark.D
             return json.loads(x)
         for field in data.schema.fields:
-            if isinstance(field.datatype, spt.ArrayType):
+            if isinstance(field.datatype, (spt.ArrayType, spt.MapType, spt.StructType)):
                 df_local[identifier.get_unescaped_names(field.name)] = df_local[
                     identifier.get_unescaped_names(field.name)
                 ].map(load_if_not_null)

snowflake/ml/model/_signatures/utils.py CHANGED Viewed

@@ -104,7 +104,10 @@ def rename_pandas_df(data: pd.DataFrame, features: Sequence[core.BaseFeatureSpec
     return data
-def huggingface_pipeline_signature_auto_infer(task: str, params: dict[str, Any]) -> Optional[core.ModelSignature]:
+def huggingface_pipeline_signature_auto_infer(
+    task: str,
+    params: dict[str, Any],
+) -> Optional[core.ModelSignature]:
     # Text
     # https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.ConversationalPipeline
@@ -297,7 +300,6 @@ def huggingface_pipeline_signature_auto_infer(task: str, params: dict[str, Any])
                 )
             ],
         )
     # https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.Text2TextGenerationPipeline
     if task == "text2text-generation":
         if params.get("return_tensors", False):

snowflake/ml/model/openai_signatures.py ADDED Viewed

@@ -0,0 +1,57 @@
+from snowflake.ml.model._signatures import core
+_OPENAI_CHAT_SIGNATURE_SPEC = core.ModelSignature(
+    inputs=[
+        core.FeatureGroupSpec(
+            name="messages",
+            specs=[
+                core.FeatureSpec(name="content", dtype=core.DataType.STRING),
+                core.FeatureSpec(name="name", dtype=core.DataType.STRING),
+                core.FeatureSpec(name="role", dtype=core.DataType.STRING),
+                core.FeatureSpec(name="title", dtype=core.DataType.STRING),
+            ],
+            shape=(-1,),
+        ),
+        core.FeatureSpec(name="temperature", dtype=core.DataType.DOUBLE),
+        core.FeatureSpec(name="max_completion_tokens", dtype=core.DataType.INT64),
+        core.FeatureSpec(name="stop", dtype=core.DataType.STRING, shape=(-1,)),
+        core.FeatureSpec(name="n", dtype=core.DataType.INT32),
+        core.FeatureSpec(name="stream", dtype=core.DataType.BOOL),
+        core.FeatureSpec(name="top_p", dtype=core.DataType.DOUBLE),
+        core.FeatureSpec(name="frequency_penalty", dtype=core.DataType.DOUBLE),
+        core.FeatureSpec(name="presence_penalty", dtype=core.DataType.DOUBLE),
+    ],
+    outputs=[
+        core.FeatureSpec(name="id", dtype=core.DataType.STRING),
+        core.FeatureSpec(name="object", dtype=core.DataType.STRING),
+        core.FeatureSpec(name="created", dtype=core.DataType.FLOAT),
+        core.FeatureSpec(name="model", dtype=core.DataType.STRING),
+        core.FeatureGroupSpec(
+            name="choices",
+            specs=[
+                core.FeatureSpec(name="index", dtype=core.DataType.INT32),
+                core.FeatureGroupSpec(
+                    name="message",
+                    specs=[
+                        core.FeatureSpec(name="content", dtype=core.DataType.STRING),
+                        core.FeatureSpec(name="name", dtype=core.DataType.STRING),
+                        core.FeatureSpec(name="role", dtype=core.DataType.STRING),
+                    ],
+                ),
+                core.FeatureSpec(name="logprobs", dtype=core.DataType.STRING),
+                core.FeatureSpec(name="finish_reason", dtype=core.DataType.STRING),
+            ],
+            shape=(-1,),
+        ),
+        core.FeatureGroupSpec(
+            name="usage",
+            specs=[
+                core.FeatureSpec(name="completion_tokens", dtype=core.DataType.INT32),
+                core.FeatureSpec(name="prompt_tokens", dtype=core.DataType.INT32),
+                core.FeatureSpec(name="total_tokens", dtype=core.DataType.INT32),
+            ],
+        ),
+    ],
+)
+OPENAI_CHAT_SIGNATURE = {"__call__": _OPENAI_CHAT_SIGNATURE_SPEC}

snowflake/ml/modeling/_internal/estimator_utils.py CHANGED Viewed

@@ -42,6 +42,26 @@ def validate_sklearn_args(args: dict[str, tuple[Any, Any, bool]], klass: type) -
                     error_code=error_codes.DEPENDENCY_VERSION_ERROR,
                     original_exception=RuntimeError(f"Arg {k} is not supported by current version of SKLearn/XGBoost."),
                 )
+        elif v[0] == v[1] and v[0] != signature.parameters[k].default:
+            # If default value (pulled at autogen time) is not the same as the installed library's default value,
+            # we need to validate the parameter value against the parameter constraints.
+            # If the parameter value is invalid, we drop it.
+            try:
+                from sklearn.utils._param_validation import (
+                    InvalidParameterError,
+                    validate_parameter_constraints,
+                )
+                try:
+                    validate_parameter_constraints(
+                        klass._parameter_constraints,  # type: ignore[attr-defined]
+                        {k: v[0]},
+                        klass.__name__,
+                    )
+                except InvalidParameterError:
+                    continue  # Let the underlying estimator fill in the default value.
+            except (ImportError, AttributeError, TypeError):
+                result[k] = v[0]  # Try to use the value as is.
         else:
             result[k] = v[0]
     return result
@@ -199,7 +219,12 @@ def handle_inference_result(
         transformed_numpy_array = np.hstack(transformed_numpy_array)  # type: ignore[call-overload]
     if len(transformed_numpy_array.shape) == 1:
-        transformed_numpy_array = np.reshape(transformed_numpy_array, (-1, 1))
+        # Within a vectorized UDF, a single-row batch often yields a 1D array of length n_components.
+        # That must be reshaped to (1, n_components) to keep the number of rows aligned with the input batch.
+        if len(output_cols) > 1:
+            transformed_numpy_array = np.reshape(transformed_numpy_array, (1, -1))
+        else:
+            transformed_numpy_array = np.reshape(transformed_numpy_array, (-1, 1))
     shape = transformed_numpy_array.shape
     if len(shape) > 1:
@@ -292,3 +317,20 @@ def should_include_sample_weight(estimator: object, method_name: str) -> bool:
         return True
     return False
+def is_multi_task_estimator(estimator: object) -> bool:
+    """
+    Check if the estimator is a multi-task estimator that requires 2D targets.
+    Args:
+        estimator: The estimator to check
+    Returns:
+        True if the estimator is a multi-task estimator, False otherwise
+    """
+    # List of known multi-task estimators that require 2D targets
+    multi_task_estimators = {"MultiTaskElasticNet", "MultiTaskElasticNetCV", "MultiTaskLasso", "MultiTaskLassoCV"}
+    estimator_name = estimator.__class__.__name__
+    return estimator_name in multi_task_estimators

snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py CHANGED Viewed

@@ -3,7 +3,10 @@ from typing import Optional
 import pandas as pd
-from snowflake.ml.modeling._internal.estimator_utils import handle_inference_result
+from snowflake.ml.modeling._internal.estimator_utils import (
+    handle_inference_result,
+    is_multi_task_estimator,
+)
 class PandasModelTrainer:
@@ -48,7 +51,11 @@ class PandasModelTrainer:
         if self.label_cols:
             label_arg_name = "Y" if "Y" in params else "y"
-            args[label_arg_name] = self.dataset[self.label_cols].squeeze()
+            # For multi-task estimators, avoid squeezing to maintain 2D shape
+            if is_multi_task_estimator(self.estimator):
+                args[label_arg_name] = self.dataset[self.label_cols]
+            else:
+                args[label_arg_name] = self.dataset[self.label_cols].squeeze()
         if self.sample_weight_col is not None and "sample_weight" in params:
             args["sample_weight"] = self.dataset[self.sample_weight_col].squeeze()
@@ -115,7 +122,11 @@ class PandasModelTrainer:
         args = {"X": self.dataset[self.input_cols]}
         if self.label_cols:
             label_arg_name = "Y" if "Y" in params else "y"
-            args[label_arg_name] = self.dataset[self.label_cols].squeeze()
+            # For multi-task estimators, avoid squeezing to maintain 2D shape
+            if is_multi_task_estimator(self.estimator):
+                args[label_arg_name] = self.dataset[self.label_cols]
+            else:
+                args[label_arg_name] = self.dataset[self.label_cols].squeeze()
         if self.sample_weight_col is not None and "sample_weight" in params:
             args["sample_weight"] = self.dataset[self.sample_weight_col].squeeze()

snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py CHANGED Viewed

@@ -22,6 +22,7 @@ from snowflake.ml._internal.utils import (
 from snowflake.ml.modeling._internal import estimator_utils
 from snowflake.ml.modeling._internal.estimator_utils import (
     handle_inference_result,
+    is_multi_task_estimator,
     should_include_sample_weight,
 )
 from snowflake.ml.modeling._internal.model_specifications import (
@@ -178,7 +179,11 @@ class SnowparkModelTrainer:
                 args = {"X": df[input_cols]}
                 if label_cols:
                     label_arg_name = "Y" if "Y" in params else "y"
-                    args[label_arg_name] = df[label_cols].squeeze()
+                    # For multi-task estimators, avoid squeezing to maintain 2D shape
+                    if is_multi_task_estimator(estimator):
+                        args[label_arg_name] = df[label_cols]
+                    else:
+                        args[label_arg_name] = df[label_cols].squeeze()
                 # Sample weight is not included in search estimators parameters, check the underlying estimator.
                 if sample_weight_col is not None and should_include_sample_weight(estimator, "fit"):
@@ -416,7 +421,11 @@ class SnowparkModelTrainer:
             args = {"X": df[input_cols]}
             if label_cols:
                 label_arg_name = "Y" if "Y" in params else "y"
-                args[label_arg_name] = df[label_cols].squeeze()
+                # For multi-task estimators, avoid squeezing to maintain 2D shape
+                if is_multi_task_estimator(estimator):
+                    args[label_arg_name] = df[label_cols]
+                else:
+                    args[label_arg_name] = df[label_cols].squeeze()
             if sample_weight_col is not None and should_include_sample_weight(estimator, "fit"):
                 args["sample_weight"] = df[sample_weight_col].squeeze()
@@ -734,12 +743,14 @@ class SnowparkModelTrainer:
         # Create a temp table in advance to store the output
         # This would allow us to use the same table outside the stored procedure
         df_one_line = dataset.limit(1).to_pandas(statement_params=statement_params)
-        df_one_line[
-            expected_output_cols_list[0]
-        ] = "[0]"  # Add one column as the output_col; this is a dummy value to represent the OBJECT type
+        # Pre-create ALL expected output columns so subsequent writes can target the same schema.
+        # Use a simple dummy string value to represent OBJECT-typed payloads.
+        for out_col in expected_output_cols_list:
+            df_one_line[out_col] = "[0]"
         if drop_input_cols:
+            # When input columns are dropped, the table should only contain the output columns.
             self.session.write_pandas(
-                df_one_line[expected_output_cols_list[0]],
+                df_one_line[expected_output_cols_list],
                 fit_transform_result_name,
                 auto_create_table=True,
                 table_type="temp",

snowflake/ml/modeling/calibration/calibrated_classifier_cv.py CHANGED Viewed

@@ -60,7 +60,7 @@ DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
 INFER_SIGNATURE_MAX_ROWS = 100
-SKLEARN_LOWER, SKLEARN_UPPER = ('1.4', '1.6')
+SKLEARN_LOWER, SKLEARN_UPPER = ('1.4', '1.7')
 # Modeling library estimators require a smaller sklearn version range.
 if not version.Version(SKLEARN_LOWER) <= version.Version(sklearn.__version__) < version.Version(SKLEARN_UPPER):
     raise Exception(

snowflake/ml/modeling/cluster/affinity_propagation.py CHANGED Viewed

@@ -60,7 +60,7 @@ DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
 INFER_SIGNATURE_MAX_ROWS = 100
-SKLEARN_LOWER, SKLEARN_UPPER = ('1.4', '1.6')
+SKLEARN_LOWER, SKLEARN_UPPER = ('1.4', '1.7')
 # Modeling library estimators require a smaller sklearn version range.
 if not version.Version(SKLEARN_LOWER) <= version.Version(sklearn.__version__) < version.Version(SKLEARN_UPPER):
     raise Exception(

snowflake-ml-python 1.11.0__py3-none-any.whl → 1.12.0__py3-none-any.whl

snowflake-ml-python 1.11.0py3-none-any.whl → 1.12.0py3-none-any.whl