PyPI - snowflake-ml-python - Versions diffs - 1.20.0__py3-none-any.whl → 1.22.0__py3-none-any.whl - Mend

snowflake-ml-python 1.20.0py3-none-any.whl → 1.22.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

snowflake/ml/_internal/platform_capabilities.py +36 -0
snowflake/ml/_internal/utils/url.py +42 -0
snowflake/ml/data/_internal/arrow_ingestor.py +67 -2
snowflake/ml/data/data_connector.py +103 -1
snowflake/ml/experiment/_client/experiment_tracking_sql_client.py +8 -2
snowflake/ml/experiment/callback/__init__.py +0 -0
snowflake/ml/experiment/callback/keras.py +25 -2
snowflake/ml/experiment/callback/lightgbm.py +27 -2
snowflake/ml/experiment/callback/xgboost.py +25 -2
snowflake/ml/experiment/experiment_tracking.py +93 -3
snowflake/ml/experiment/utils.py +6 -0
snowflake/ml/feature_store/feature_view.py +34 -24
snowflake/ml/jobs/_interop/protocols.py +3 -0
snowflake/ml/jobs/_utils/constants.py +1 -0
snowflake/ml/jobs/_utils/payload_utils.py +354 -356
snowflake/ml/jobs/_utils/scripts/mljob_launcher.py +95 -8
snowflake/ml/jobs/_utils/scripts/start_mlruntime.sh +92 -0
snowflake/ml/jobs/_utils/scripts/startup.sh +112 -0
snowflake/ml/jobs/_utils/spec_utils.py +1 -445
snowflake/ml/jobs/_utils/stage_utils.py +22 -1
snowflake/ml/jobs/_utils/types.py +14 -7
snowflake/ml/jobs/job.py +2 -8
snowflake/ml/jobs/manager.py +57 -135
snowflake/ml/lineage/lineage_node.py +1 -1
snowflake/ml/model/__init__.py +6 -0
snowflake/ml/model/_client/model/batch_inference_specs.py +16 -1
snowflake/ml/model/_client/model/model_version_impl.py +130 -14
snowflake/ml/model/_client/ops/deployment_step.py +36 -0
snowflake/ml/model/_client/ops/model_ops.py +93 -8
snowflake/ml/model/_client/ops/service_ops.py +32 -52
snowflake/ml/model/_client/service/import_model_spec_schema.py +23 -0
snowflake/ml/model/_client/service/model_deployment_spec.py +12 -4
snowflake/ml/model/_client/service/model_deployment_spec_schema.py +3 -0
snowflake/ml/model/_client/sql/model_version.py +30 -6
snowflake/ml/model/_client/sql/service.py +94 -5
snowflake/ml/model/_model_composer/model_composer.py +1 -1
snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +5 -0
snowflake/ml/model/_model_composer/model_method/model_method.py +61 -2
snowflake/ml/model/_packager/model_handler.py +8 -2
snowflake/ml/model/_packager/model_handlers/custom.py +52 -0
snowflake/ml/model/_packager/model_handlers/{huggingface_pipeline.py → huggingface.py} +203 -76
snowflake/ml/model/_packager/model_handlers/mlflow.py +6 -1
snowflake/ml/model/_packager/model_handlers/xgboost.py +26 -1
snowflake/ml/model/_packager/model_meta/model_meta.py +40 -7
snowflake/ml/model/_packager/model_packager.py +1 -1
snowflake/ml/model/_signatures/core.py +390 -8
snowflake/ml/model/_signatures/utils.py +13 -4
snowflake/ml/model/code_path.py +104 -0
snowflake/ml/model/compute_pool.py +2 -0
snowflake/ml/model/custom_model.py +55 -13
snowflake/ml/model/model_signature.py +13 -1
snowflake/ml/model/models/huggingface.py +285 -0
snowflake/ml/model/models/huggingface_pipeline.py +19 -208
snowflake/ml/model/type_hints.py +7 -1
snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +2 -2
snowflake/ml/monitoring/_client/model_monitor_sql_client.py +12 -0
snowflake/ml/monitoring/_manager/model_monitor_manager.py +12 -0
snowflake/ml/monitoring/entities/model_monitor_config.py +5 -0
snowflake/ml/registry/_manager/model_manager.py +230 -15
snowflake/ml/registry/registry.py +4 -4
snowflake/ml/utils/html_utils.py +67 -1
snowflake/ml/version.py +1 -1
{snowflake_ml_python-1.20.0.dist-info → snowflake_ml_python-1.22.0.dist-info}/METADATA +81 -7
{snowflake_ml_python-1.20.0.dist-info → snowflake_ml_python-1.22.0.dist-info}/RECORD +67 -59
snowflake/ml/jobs/_utils/runtime_env_utils.py +0 -63
{snowflake_ml_python-1.20.0.dist-info → snowflake_ml_python-1.22.0.dist-info}/WHEEL +0 -0
{snowflake_ml_python-1.20.0.dist-info → snowflake_ml_python-1.22.0.dist-info}/licenses/LICENSE.txt +0 -0
{snowflake_ml_python-1.20.0.dist-info → snowflake_ml_python-1.22.0.dist-info}/top_level.txt +0 -0

snowflake/ml/model/_model_composer/model_method/model_method.py CHANGED Viewed

@@ -105,7 +105,7 @@ class ModelMethod:
         except ValueError as e:
             raise ValueError(
                 f"Your target method {self.target_method} cannot be resolved as valid SQL identifier. "
-                "Try specify `case_sensitive` as True."
+                "Try specifying `case_sensitive` as True."
             ) from e
         if self.target_method not in self.model_meta.signatures.keys():
@@ -127,12 +127,41 @@ class ModelMethod:
         except ValueError as e:
             raise ValueError(
                 f"Your feature {feature.name} cannot be resolved as valid SQL identifier. "
-                "Try specify `case_sensitive` as True."
+                "Try specifying `case_sensitive` as True."
             ) from e
         return model_manifest_schema.ModelMethodSignatureFieldWithName(
             name=feature_name.resolved(), type=type_utils.convert_sp_to_sf_type(feature.as_snowpark_type())
         )
+    @staticmethod
+    def _flatten_params(params: list[model_signature.BaseParamSpec]) -> list[model_signature.ParamSpec]:
+        """Flatten ParamGroupSpec into leaf ParamSpec items."""
+        result: list[model_signature.ParamSpec] = []
+        for param in params:
+            if isinstance(param, model_signature.ParamSpec):
+                result.append(param)
+            elif isinstance(param, model_signature.ParamGroupSpec):
+                result.extend(ModelMethod._flatten_params(param.specs))
+        return result
+    @staticmethod
+    def _get_method_arg_from_param(
+        param_spec: model_signature.ParamSpec,
+        case_sensitive: bool = False,
+    ) -> model_manifest_schema.ModelMethodSignatureFieldWithNameAndDefault:
+        try:
+            param_name = sql_identifier.SqlIdentifier(param_spec.name, case_sensitive=case_sensitive)
+        except ValueError as e:
+            raise ValueError(
+                f"Your parameter {param_spec.name} cannot be resolved as valid SQL identifier. "
+                "Try specifying `case_sensitive` as True."
+            ) from e
+        return model_manifest_schema.ModelMethodSignatureFieldWithNameAndDefault(
+            name=param_name.resolved(),
+            type=type_utils.convert_sp_to_sf_type(param_spec.dtype.as_snowpark_type()),
+            default=param_spec.default_value,
+        )
     def save(
         self, workspace_path: pathlib.Path, options: Optional[function_generator.FunctionGenerateOptions] = None
     ) -> model_manifest_schema.ModelMethodDict:
@@ -182,6 +211,36 @@ class ModelMethod:
             inputs=input_list,
             outputs=outputs,
         )
+        # Add parameters if signature has parameters
+        if self.model_meta.signatures[self.target_method].params:
+            flat_params = ModelMethod._flatten_params(list(self.model_meta.signatures[self.target_method].params))
+            param_list = [
+                ModelMethod._get_method_arg_from_param(
+                    param_spec, case_sensitive=self.options.get("case_sensitive", False)
+                )
+                for param_spec in flat_params
+            ]
+            param_name_counter = collections.Counter([param_info["name"] for param_info in param_list])
+            dup_param_names = [k for k, v in param_name_counter.items() if v > 1]
+            if dup_param_names:
+                raise ValueError(
+                    f"Found duplicate parameter named resolved as {', '.join(dup_param_names)} in the method"
+                    f" {self.target_method}. This might be because you have parameters with same letters but "
+                    "different cases. In this case, set case_sensitive as True for those methods to distinguish them."
+                )
+            # Check for name collisions between parameters and inputs using existing counters
+            collision_names = [name for name in param_name_counter if name in input_name_counter]
+            if collision_names:
+                raise ValueError(
+                    f"Found parameter(s) with the same name as input feature(s): {', '.join(sorted(collision_names))} "
+                    f"in the method {self.target_method}. Parameters and inputs must have distinct names. "
+                    "Try using case_sensitive=True if the names differ only by case."
+                )
+            method_dict["params"] = param_list
         should_set_volatility = (
             platform_capabilities.PlatformCapabilities.get_instance().is_set_module_functions_volatility_from_manifest()
         )

snowflake/ml/model/_packager/model_handler.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import functools
 import importlib
+import logging
 import pkgutil
 from types import ModuleType
 from typing import Any, Callable, Optional, TypeVar, cast
@@ -11,6 +12,8 @@ _HANDLERS_BASE = "snowflake.ml.model._packager.model_handlers"
 _MODEL_HANDLER_REGISTRY: dict[str, type[_base.BaseModelHandler[model_types.SupportedModelType]]] = dict()
 _IS_HANDLER_LOADED = False
+logger = logging.getLogger(__name__)
 def _register_handlers() -> None:
     """
@@ -56,8 +59,11 @@ def find_handler(
     model: model_types.SupportedModelType,
 ) -> Optional[type[_base.BaseModelHandler[model_types.SupportedModelType]]]:
     for handler in _MODEL_HANDLER_REGISTRY.values():
-        if handler.can_handle(model):
-            return handler
+        try:
+            if handler.can_handle(model):
+                return handler
+        except Exception:
+            logger.error(f"Error in {handler.__name__} `can_handle` method for model {type(model)}", exc_info=True)
     return None

snowflake/ml/model/_packager/model_handlers/custom.py CHANGED Viewed

@@ -86,6 +86,9 @@ class CustomModelHandler(_base.BaseModelHandler["custom_model.CustomModel"]):
                 get_prediction_fn=get_prediction,
             )
+            # Add parameters extracted from custom model inference methods to signatures
+            cls._add_method_parameters_to_signatures(model, model_meta)
         model_blob_path = os.path.join(model_blobs_dir_path, name)
         os.makedirs(model_blob_path, exist_ok=True)
         if model.context.artifacts:
@@ -188,6 +191,55 @@ class CustomModelHandler(_base.BaseModelHandler["custom_model.CustomModel"]):
         assert isinstance(model, custom_model.CustomModel)
         return model
+    @classmethod
+    def _add_method_parameters_to_signatures(
+        cls,
+        model: "custom_model.CustomModel",
+        model_meta: model_meta_api.ModelMetadata,
+    ) -> None:
+        """Extract parameters from custom model inference methods and add them to signatures.
+        For each inference method, if the signature doesn't already have parameters and the method
+        has keyword-only parameters with defaults, create ParamSpecs and add them to the signature.
+        Args:
+            model: The custom model instance.
+            model_meta: The model metadata containing signatures to augment.
+        """
+        for method in model._get_infer_methods():
+            method_name = method.__name__
+            if method_name not in model_meta.signatures:
+                continue
+            sig = model_meta.signatures[method_name]
+            # Skip if the signature already has parameters (user-provided or previously set)
+            if sig.params:
+                continue
+            # Extract parameters from the method
+            method_params = custom_model.get_method_parameters(method)
+            if not method_params:
+                continue
+            # Create ParamSpecs from the method parameters
+            param_specs = []
+            for param_name, param_type, param_default in method_params:
+                dtype = model_signature.DataType.from_python_type(param_type)
+                param_spec = model_signature.ParamSpec(
+                    name=param_name,
+                    dtype=dtype,
+                    default_value=param_default,
+                )
+                param_specs.append(param_spec)
+            # Create a new signature with parameters
+            model_meta.signatures[method_name] = model_signature.ModelSignature(
+                inputs=sig.inputs,
+                outputs=sig.outputs,
+                params=param_specs,
+            )
     @classmethod
     def convert_as_custom_model(
         cls,

snowflake/ml/model/_packager/model_handlers/{huggingface_pipeline.py → huggingface.py} RENAMED Viewed

@@ -29,12 +29,16 @@ from snowflake.ml.model._packager.model_meta import (
     model_meta_schema,
 )
 from snowflake.ml.model._signatures import utils as model_signature_utils
-from snowflake.ml.model.models import huggingface_pipeline
+from snowflake.ml.model.models import (
+    huggingface as huggingface_base,
+    huggingface_pipeline,
+)
 from snowflake.snowpark._internal import utils as snowpark_utils
 logger = logging.getLogger(__name__)
 if TYPE_CHECKING:
+    import torch
     import transformers
 DEFAULT_CHAT_TEMPLATE = "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}"  # noqa: E501
@@ -77,8 +81,14 @@ def sanitize_output(data: Any) -> Any:
 @final
-class HuggingFacePipelineHandler(
-    _base.BaseModelHandler[Union[huggingface_pipeline.HuggingFacePipelineModel, "transformers.Pipeline"]]
+class TransformersPipelineHandler(
+    _base.BaseModelHandler[
+        Union[
+            huggingface_base.TransformersPipeline,
+            huggingface_pipeline.HuggingFacePipelineModel,
+            "transformers.Pipeline",
+        ]
+    ]
 ):
     """Handler for custom model."""
@@ -97,35 +107,48 @@ class HuggingFacePipelineHandler(
     def can_handle(
         cls,
         model: model_types.SupportedModelType,
-    ) -> TypeGuard[Union[huggingface_pipeline.HuggingFacePipelineModel, "transformers.Pipeline"]]:
+    ) -> TypeGuard[
+        Union[
+            huggingface_base.TransformersPipeline,
+            huggingface_pipeline.HuggingFacePipelineModel,
+            "transformers.Pipeline",
+        ]
+    ]:
         if type_utils.LazyType("transformers.Pipeline").isinstance(model):
             return True
         if isinstance(model, huggingface_pipeline.HuggingFacePipelineModel):
             return True
+        if isinstance(model, huggingface_base.TransformersPipeline):
+            return True
         return False
     @classmethod
     def cast_model(
         cls,
         model: model_types.SupportedModelType,
-    ) -> Union[huggingface_pipeline.HuggingFacePipelineModel, "transformers.Pipeline"]:
-        try:
-            if isinstance(model, huggingface_pipeline.HuggingFacePipelineModel):
-                raise ImportError
-            else:
-                import transformers
-        except ImportError:
-            assert isinstance(model, huggingface_pipeline.HuggingFacePipelineModel)
+    ) -> Union[
+        huggingface_base.TransformersPipeline,
+        huggingface_pipeline.HuggingFacePipelineModel,
+        "transformers.Pipeline",
+    ]:
+        if type_utils.LazyType("transformers.Pipeline").isinstance(model):
             return model
-        else:
-            assert isinstance(model, transformers.Pipeline)
+        elif isinstance(model, huggingface_pipeline.HuggingFacePipelineModel) or isinstance(
+            model, huggingface_base.TransformersPipeline
+        ):
             return model
+        else:
+            raise ValueError(f"Model {model} is not a valid Hugging Face model.")
     @classmethod
     def save_model(
         cls,
         name: str,
-        model: Union[huggingface_pipeline.HuggingFacePipelineModel, "transformers.Pipeline"],
+        model: Union[
+            huggingface_base.TransformersPipeline,
+            huggingface_pipeline.HuggingFacePipelineModel,
+            "transformers.Pipeline",
+        ],
         model_meta: model_meta_api.ModelMetadata,
         model_blobs_dir_path: str,
         sample_input_data: Optional[model_types.SupportedDataType] = None,
@@ -140,7 +163,9 @@ class HuggingFacePipelineHandler(
             framework = model.framework  # type:ignore[attr-defined]
             batch_size = model._batch_size  # type:ignore[attr-defined]
         else:
-            assert isinstance(model, huggingface_pipeline.HuggingFacePipelineModel)
+            assert isinstance(model, huggingface_pipeline.HuggingFacePipelineModel) or isinstance(
+                model, huggingface_base.TransformersPipeline
+            )
             task = model.task
             framework = getattr(model, "framework", None)
             batch_size = getattr(model, "batch_size", None)
@@ -156,7 +181,9 @@ class HuggingFacePipelineHandler(
                 **model._postprocess_params,  # type:ignore[attr-defined]
             }
         else:
-            assert isinstance(model, huggingface_pipeline.HuggingFacePipelineModel)
+            assert isinstance(model, huggingface_pipeline.HuggingFacePipelineModel) or isinstance(
+                model, huggingface_base.TransformersPipeline
+            )
             params = {**model.__dict__, **model.model_kwargs}
         inferred_pipe_sig = model_signature_utils.huggingface_pipeline_signature_auto_infer(
@@ -177,7 +204,7 @@ class HuggingFacePipelineHandler(
                 else:
                     warnings.warn(
                         "It is impossible to validate your model signatures when using a"
-                        " `snowflake.ml.model.models.huggingface_pipeline.HuggingFacePipelineModel` object. "
+                        f" {type(model).__name__} object. "
                         "Please make sure you are providing correct model signatures.",
                         UserWarning,
                         stacklevel=2,
@@ -302,14 +329,16 @@ class HuggingFacePipelineHandler(
     def _load_pickle_model(
         pickle_file: str,
         **kwargs: Unpack[model_types.HuggingFaceLoadOptions],
-    ) -> huggingface_pipeline.HuggingFacePipelineModel:
+    ) -> Union[huggingface_pipeline.HuggingFacePipelineModel, huggingface_base.TransformersPipeline]:
         with open(pickle_file, "rb") as f:
             m = cloudpickle.load(f)
-        assert isinstance(m, huggingface_pipeline.HuggingFacePipelineModel)
+        assert isinstance(m, huggingface_pipeline.HuggingFacePipelineModel) or isinstance(
+            m, huggingface_base.TransformersPipeline
+        )
         torch_dtype: Optional[str] = None
         device_config = None
         if getattr(m, "device", None) is None and getattr(m, "device_map", None) is None:
-            device_config = HuggingFacePipelineHandler._get_device_config(**kwargs)
+            device_config = TransformersPipelineHandler._get_device_config(**kwargs)
             m.__dict__.update(device_config)
         if getattr(m, "torch_dtype", None) is None and kwargs.get("use_gpu", False):
@@ -326,7 +355,9 @@ class HuggingFacePipelineHandler(
         model_meta: model_meta_api.ModelMetadata,
         model_blobs_dir_path: str,
         **kwargs: Unpack[model_types.HuggingFaceLoadOptions],
-    ) -> Union[huggingface_pipeline.HuggingFacePipelineModel, "transformers.Pipeline"]:
+    ) -> Union[
+        huggingface_pipeline.HuggingFacePipelineModel, huggingface_base.TransformersPipeline, "transformers.Pipeline"
+    ]:
         if snowpark_utils.is_in_stored_procedure():  # type: ignore[no-untyped-call]
             # We need to redirect the some folders to a writable location in the sandbox.
             os.environ["HF_HOME"] = "/tmp"
@@ -369,7 +400,7 @@ class HuggingFacePipelineHandler(
             ) as f:
                 pipeline_params = cloudpickle.load(f)
-            device_config = HuggingFacePipelineHandler._get_device_config(**kwargs)
+            device_config = TransformersPipelineHandler._get_device_config(**kwargs)
             m = transformers.pipeline(
                 model_blob_options["task"],
@@ -402,7 +433,7 @@ class HuggingFacePipelineHandler(
         def _create_pipeline_from_model(
             model_blob_file_or_dir_path: str,
-            m: huggingface_pipeline.HuggingFacePipelineModel,
+            m: Union[huggingface_pipeline.HuggingFacePipelineModel, huggingface_base.TransformersPipeline],
             **kwargs: Unpack[model_types.HuggingFaceLoadOptions],
         ) -> "transformers.Pipeline":
             import transformers
@@ -414,7 +445,7 @@ class HuggingFacePipelineHandler(
                 torch_dtype=getattr(m, "torch_dtype", None),
                 revision=m.revision,
                 # pass device or device_map when creating the pipeline
-                **HuggingFacePipelineHandler._get_device_config(**kwargs),
+                **TransformersPipelineHandler._get_device_config(**kwargs),
                 # pass other model_kwargs to transformers.pipeline.from_pretrained method
                 **m.model_kwargs,
             )
@@ -455,7 +486,11 @@ class HuggingFacePipelineHandler(
     @classmethod
     def convert_as_custom_model(
         cls,
-        raw_model: Union[huggingface_pipeline.HuggingFacePipelineModel, "transformers.Pipeline"],
+        raw_model: Union[
+            huggingface_pipeline.HuggingFacePipelineModel,
+            huggingface_base.TransformersPipeline,
+            "transformers.Pipeline",
+        ],
         model_meta: model_meta_api.ModelMetadata,
         background_data: Optional[pd.DataFrame] = None,
         **kwargs: Unpack[model_types.HuggingFaceLoadOptions],
@@ -609,7 +644,9 @@ class HuggingFacePipelineHandler(
             return _HFPipelineModel
-        if isinstance(raw_model, huggingface_pipeline.HuggingFacePipelineModel):
+        if isinstance(raw_model, huggingface_pipeline.HuggingFacePipelineModel) or isinstance(
+            raw_model, huggingface_base.TransformersPipeline
+        ):
             if version.parse(transformers.__version__) < version.parse("4.32.0"):
                 # Backward compatibility since HF interface change.
                 raw_model.__dict__["use_auth_token"] = raw_model.__dict__["token"]
@@ -668,43 +705,64 @@ class HuggingFaceOpenAICompatibleModel:
         self.model_name = self.pipeline.model.name_or_path
+        if self.tokenizer.pad_token is None:
+            self.tokenizer.pad_token = self.tokenizer.eos_token
+        # Ensure the tokenizer has a chat template.
+        # If not, we inject the default ChatML template which supports prompt generation.
+        if not getattr(self.tokenizer, "chat_template", None):
+            logger.warning(f"No chat template found for {self.model_name}. Using default ChatML template.")
+            self.tokenizer.chat_template = DEFAULT_CHAT_TEMPLATE
     def _apply_chat_template(self, messages: list[dict[str, Any]]) -> str:
         """
         Applies a chat template to a list of messages.
-        If the tokenizer has a chat template, it uses that.
-        Otherwise, it falls back to a simple concatenation.
         Args:
-            messages (list[dict]): A list of message dictionaries, e.g.,
-                                   [{"role": "user", "content": "Hello!"}, ...]
+            messages (list[dict]): A list of message dictionaries.
         Returns:
             The formatted prompt string ready for model input.
         """
-        if hasattr(self.tokenizer, "apply_chat_template") and self.tokenizer.chat_template:
-            # Use the tokenizer's built-in chat template if available
-            # `tokenize=False` means it returns a string, not token IDs
+        # Use the tokenizer's apply_chat_template method.
+        # We ensured a template exists in __init__.
+        if hasattr(self.tokenizer, "apply_chat_template"):
             return self.tokenizer.apply_chat_template(  # type: ignore[no-any-return]
                 messages,
                 tokenize=False,
                 add_generation_prompt=True,
             )
-        else:
-            # Fallback to a simple concatenation for models without a specific chat template
-            # This is a basic example; real chat models often need specific formatting.
-            prompt = ""
-            for message in messages:
-                role = message.get("role", "user")
-                content = message.get("content", "")
-                if role == "system":
-                    prompt += f"System: {content}\n"
-                elif role == "user":
-                    prompt += f"User: {content}\n"
-                elif role == "assistant":
-                    prompt += f"Assistant: {content}\n"
-            prompt += "Assistant:"  # Indicate that the assistant should respond
-            return prompt
+        # Fallback for very old transformers without apply_chat_template
+        # Manually apply ChatML-like formatting
+        prompt = ""
+        for message in messages:
+            role = message.get("role", "user")
+            content = message.get("content", "")
+            prompt += f"<|im_start|>{role}\n{content}<|im_end|>\n"
+        prompt += "<|im_start|>assistant\n"
+        return prompt
+    def _get_stopping_criteria(self, stop_strings: list[str]) -> "transformers.StoppingCriteriaList":
+        import transformers
+        class StopStringsStoppingCriteria(transformers.StoppingCriteria):
+            def __init__(self, stop_strings: list[str], tokenizer: Any) -> None:
+                self.stop_strings = stop_strings
+                self.tokenizer = tokenizer
+            def __call__(self, input_ids: "torch.Tensor", scores: "torch.Tensor", **kwargs: Any) -> bool:
+                # Decode the generated text for each sequence
+                for i in range(input_ids.shape[0]):
+                    generated_text = self.tokenizer.decode(input_ids[i], skip_special_tokens=True)
+                    # Check if any stop string appears in the generated text
+                    for stop_str in self.stop_strings:
+                        if stop_str in generated_text:
+                            return True
+                return False
+        return transformers.StoppingCriteriaList([StopStringsStoppingCriteria(stop_strings, self.tokenizer)])
     def generate_chat_completion(
         self,
@@ -727,18 +785,17 @@ class HuggingFaceOpenAICompatibleModel:
                                     {"role": "user", "content": "What is deep learning?"}]
             max_completion_tokens (int): The maximum number of completion tokens to generate.
             stop_strings (list[str]): A list of strings to stop generation.
-            temperature (float): The temperature for sampling.
-            top_p (float): The top-p value for sampling.
-            stream (bool): Whether to stream the generation.
-            frequency_penalty (float): The frequency penalty for sampling.
-            presence_penalty (float): The presence penalty for sampling.
+            temperature (float): The temperature for sampling. 0 means greedy decoding.
+            top_p (float): The top-p value for nucleus sampling.
+            stream (bool): Whether to stream the generation (not yet supported).
+            frequency_penalty (float): The frequency penalty for sampling (maps to repetition_penalty).
+            presence_penalty (float): The presence penalty for sampling (not directly supported).
             n (int): The number of samples to generate.
         Returns:
             dict: An OpenAI-compatible dictionary representing the chat completion.
         """
         # Apply chat template to convert messages into a single prompt string
         prompt_text = self._apply_chat_template(messages)
         # Tokenize the prompt
@@ -749,42 +806,112 @@ class HuggingFaceOpenAICompatibleModel:
         ).to(self.model.device)
         prompt_tokens = inputs.input_ids.shape[1]
-        from transformers import GenerationConfig
+        if stream:
+            logger.warning(
+                "Streaming is not supported using transformers.Pipeline implementation. Ignoring stream=True."
+            )
+            stream = False
+        if presence_penalty is not None:
+            logger.warning(
+                "Presence penalty is not supported using transformers.Pipeline implementation."
+                " Ignoring presence_penalty."
+            )
+            presence_penalty = None
+        import transformers
+        transformers_version = version.parse(transformers.__version__)
+        # Stop strings are supported in transformers >= 4.43.0
+        can_handle_stop_strings = transformers_version >= version.parse("4.43.0")
-        generation_config = GenerationConfig(
-            max_new_tokens=max_completion_tokens,
-            temperature=temperature,
-            top_p=top_p,
+        # Determine sampling based on temperature (following serve.py logic)
+        # Default temperature to 1.0 if not specified
+        actual_temperature = temperature if temperature is not None else 1.0
+        do_sample = actual_temperature > 0.0
+        # Set up generation config following best practices from serve.py
+        generation_config = transformers.GenerationConfig(
+            max_new_tokens=max_completion_tokens if max_completion_tokens is not None else 1024,
             pad_token_id=self.tokenizer.pad_token_id,
             eos_token_id=self.tokenizer.eos_token_id,
-            stop_strings=stop_strings,
-            stream=stream,
-            num_return_sequences=n,
-            num_beams=max(1, n),  # must be >1
-            repetition_penalty=frequency_penalty,
-            # TODO: Handle diversity_penalty and num_beam_groups
-            # not all models support them making it hard to support any huggingface model
-            # diversity_penalty=presence_penalty if n > 1 else None,
-            # num_beam_groups=max(2, n) if presence_penalty else 1,
-            do_sample=False,
+            do_sample=do_sample,
         )
+        # Only set temperature and top_p if sampling is enabled
+        if do_sample:
+            generation_config.temperature = actual_temperature
+            if top_p is not None:
+                generation_config.top_p = top_p
+        # Handle repetition penalty (mapped from frequency_penalty)
+        if frequency_penalty is not None:
+            # OpenAI's frequency_penalty is typically in range [-2.0, 2.0]
+            # HuggingFace's repetition_penalty is typically > 0, with 1.0 = no penalty
+            # We need to convert: frequency_penalty=0 -> repetition_penalty=1.0
+            # Higher frequency_penalty should increase repetition_penalty
+            generation_config.repetition_penalty = 1.0 + (frequency_penalty if frequency_penalty > 0 else 0)
+        # For multiple completions (n > 1), use sampling not beam search
+        if n > 1:
+            generation_config.num_return_sequences = n
+            # Force sampling on for multiple sequences
+            if not do_sample:
+                logger.warning("Forcing do_sample=True for n>1. Consider setting temperature > 0 for better diversity.")
+                generation_config.do_sample = True
+                generation_config.temperature = 1.0
+        else:
+            generation_config.num_return_sequences = 1
+        # Handle stop strings if provided
+        stopping_criteria = None
+        if stop_strings and not can_handle_stop_strings:
+            logger.warning("Stop strings are not supported in transformers < 4.41.0. Ignoring stop strings.")
+        if stop_strings and can_handle_stop_strings:
+            stopping_criteria = self._get_stopping_criteria(stop_strings)
+            output_ids = self.model.generate(
+                inputs.input_ids,
+                attention_mask=inputs.attention_mask,
+                generation_config=generation_config,
+                # Pass tokenizer for proper handling of stop strings
+                tokenizer=self.tokenizer,
+                stopping_criteria=stopping_criteria,
+            )
+        else:
+            output_ids = self.model.generate(
+                inputs.input_ids,
+                attention_mask=inputs.attention_mask,
+                generation_config=generation_config,
+            )
         # Generate text
-        output_ids = self.model.generate(
-            inputs.input_ids,
-            attention_mask=inputs.attention_mask,
-            generation_config=generation_config,
-        )
+        # Handle the case where output might be 1D if n=1
+        if output_ids.dim() == 1:
+            output_ids = output_ids.unsqueeze(0)
         generated_texts = []
         completion_tokens = 0
         total_tokens = prompt_tokens
         for output_id in output_ids:
             # The output_ids include the input prompt
             # Decode the generated text, excluding the input prompt
             # so we slice to get only new tokens
             generated_tokens = output_id[prompt_tokens:]
             generated_text = self.tokenizer.decode(generated_tokens, skip_special_tokens=True)
+            # Trim stop strings from generated text if they appear
+            # The stop criteria would stop generating further tokens, so we need to trim the generated text
+            if stop_strings and can_handle_stop_strings:
+                for stop_str in stop_strings:
+                    if stop_str in generated_text:
+                        # Find the first occurrence and trim everything from there
+                        stop_idx = generated_text.find(stop_str)
+                        generated_text = generated_text[:stop_idx]
+                        break  # Stop after finding the first stop string
             generated_texts.append(generated_text)
             # Calculate completion tokens

snowflake/ml/model/_packager/model_handlers/mlflow.py CHANGED Viewed

@@ -148,12 +148,17 @@ class MLFlowHandler(_base.BaseModelHandler["mlflow.pyfunc.PyFuncModel"]):
             file_utils.copy_file_or_tree(local_path, os.path.join(model_blob_path, cls.MODEL_BLOB_FILE_OR_DIR))
+        # MLflow 3.x may return file:// URIs for artifact_path; extract just the last path component
+        artifact_path = model_info.artifact_path
+        if artifact_path.startswith("file://"):
+            artifact_path = artifact_path.rstrip("/").split("/")[-1]
         base_meta = model_blob_meta.ModelBlobMeta(
             name=name,
             model_type=cls.HANDLER_TYPE,
             handler_version=cls.HANDLER_VERSION,
             path=cls.MODEL_BLOB_FILE_OR_DIR,
-            options=model_meta_schema.MLFlowModelBlobOptions({"artifact_path": model_info.artifact_path}),
+            options=model_meta_schema.MLFlowModelBlobOptions({"artifact_path": artifact_path}),
         )
         model_meta.models[name] = base_meta
         model_meta.min_snowpark_ml_version = cls._MIN_SNOWPARK_ML_VERSION

snowflake-ml-python 1.20.0__py3-none-any.whl → 1.22.0__py3-none-any.whl

snowflake-ml-python 1.20.0py3-none-any.whl → 1.22.0py3-none-any.whl