PyPI - snowflake-ml-python - Versions diffs - 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl - Mend

snowflake-ml-python 1.3.0py3-none-any.whl → 1.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (211) hide show

snowflake/ml/_internal/file_utils.py +3 -3
snowflake/ml/_internal/human_readable_id/adjectives.txt +128 -0
snowflake/ml/_internal/human_readable_id/animals.txt +128 -0
snowflake/ml/_internal/human_readable_id/hrid_generator.py +40 -0
snowflake/ml/_internal/human_readable_id/hrid_generator_base.py +135 -0
snowflake/ml/_internal/telemetry.py +11 -2
snowflake/ml/_internal/utils/formatting.py +1 -1
snowflake/ml/feature_store/feature_store.py +15 -106
snowflake/ml/fileset/sfcfs.py +4 -3
snowflake/ml/fileset/stage_fs.py +18 -0
snowflake/ml/model/_api.py +9 -9
snowflake/ml/model/_client/model/model_version_impl.py +20 -15
snowflake/ml/model/_deploy_client/image_builds/docker_context.py +3 -9
snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +3 -5
snowflake/ml/model/_deploy_client/snowservice/deploy.py +7 -6
snowflake/ml/model/_model_composer/model_composer.py +10 -8
snowflake/ml/model/_model_composer/model_method/function_generator.py +1 -1
snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +2 -1
snowflake/ml/model/_model_composer/model_method/model_method.py +2 -2
snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +1 -1
snowflake/ml/model/_packager/model_handlers/_base.py +2 -2
snowflake/ml/model/_packager/model_handlers/_utils.py +5 -5
snowflake/ml/model/_packager/model_handlers/custom.py +7 -7
snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +2 -2
snowflake/ml/model/_packager/model_handlers/llm.py +1 -1
snowflake/ml/model/_packager/model_handlers/mlflow.py +1 -1
snowflake/ml/model/_packager/model_handlers/pytorch.py +13 -10
snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +214 -0
snowflake/ml/model/_packager/model_handlers/sklearn.py +6 -6
snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +15 -3
snowflake/ml/model/_packager/model_handlers/tensorflow.py +8 -8
snowflake/ml/model/_packager/model_handlers/torchscript.py +7 -7
snowflake/ml/model/_packager/model_handlers/xgboost.py +8 -8
snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
snowflake/ml/model/_packager/model_packager.py +8 -6
snowflake/ml/model/custom_model.py +3 -1
snowflake/ml/model/type_hints.py +13 -0
snowflake/ml/modeling/_internal/estimator_utils.py +61 -1
snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -43
snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +4 -4
snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +21 -17
snowflake/ml/modeling/_internal/model_specifications.py +3 -1
snowflake/ml/modeling/_internal/model_trainer.py +2 -2
snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +547 -1
snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +67 -114
snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +9 -9
snowflake/ml/modeling/_internal/transformer_protocols.py +2 -3
snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +33 -61
snowflake/ml/modeling/cluster/affinity_propagation.py +33 -61
snowflake/ml/modeling/cluster/agglomerative_clustering.py +33 -61
snowflake/ml/modeling/cluster/birch.py +33 -61
snowflake/ml/modeling/cluster/bisecting_k_means.py +33 -61
snowflake/ml/modeling/cluster/dbscan.py +33 -61
snowflake/ml/modeling/cluster/feature_agglomeration.py +33 -61
snowflake/ml/modeling/cluster/k_means.py +33 -61
snowflake/ml/modeling/cluster/mean_shift.py +33 -61
snowflake/ml/modeling/cluster/mini_batch_k_means.py +33 -61
snowflake/ml/modeling/cluster/optics.py +33 -61
snowflake/ml/modeling/cluster/spectral_biclustering.py +33 -61
snowflake/ml/modeling/cluster/spectral_clustering.py +33 -61
snowflake/ml/modeling/cluster/spectral_coclustering.py +33 -61
snowflake/ml/modeling/compose/column_transformer.py +33 -61
snowflake/ml/modeling/compose/transformed_target_regressor.py +33 -61
snowflake/ml/modeling/covariance/elliptic_envelope.py +33 -61
snowflake/ml/modeling/covariance/empirical_covariance.py +33 -61
snowflake/ml/modeling/covariance/graphical_lasso.py +33 -61
snowflake/ml/modeling/covariance/graphical_lasso_cv.py +33 -61
snowflake/ml/modeling/covariance/ledoit_wolf.py +33 -61
snowflake/ml/modeling/covariance/min_cov_det.py +33 -61
snowflake/ml/modeling/covariance/oas.py +33 -61
snowflake/ml/modeling/covariance/shrunk_covariance.py +33 -61
snowflake/ml/modeling/decomposition/dictionary_learning.py +33 -61
snowflake/ml/modeling/decomposition/factor_analysis.py +33 -61
snowflake/ml/modeling/decomposition/fast_ica.py +33 -61
snowflake/ml/modeling/decomposition/incremental_pca.py +33 -61
snowflake/ml/modeling/decomposition/kernel_pca.py +33 -61
snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +33 -61
snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +33 -61
snowflake/ml/modeling/decomposition/pca.py +33 -61
snowflake/ml/modeling/decomposition/sparse_pca.py +33 -61
snowflake/ml/modeling/decomposition/truncated_svd.py +33 -61
snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +33 -61
snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +33 -61
snowflake/ml/modeling/ensemble/ada_boost_classifier.py +33 -61
snowflake/ml/modeling/ensemble/ada_boost_regressor.py +33 -61
snowflake/ml/modeling/ensemble/bagging_classifier.py +33 -61
snowflake/ml/modeling/ensemble/bagging_regressor.py +33 -61
snowflake/ml/modeling/ensemble/extra_trees_classifier.py +33 -61
snowflake/ml/modeling/ensemble/extra_trees_regressor.py +33 -61
snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +33 -61
snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +33 -61
snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +33 -61
snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +33 -61
snowflake/ml/modeling/ensemble/isolation_forest.py +33 -61
snowflake/ml/modeling/ensemble/random_forest_classifier.py +33 -61
snowflake/ml/modeling/ensemble/random_forest_regressor.py +33 -61
snowflake/ml/modeling/ensemble/stacking_regressor.py +33 -61
snowflake/ml/modeling/ensemble/voting_classifier.py +33 -61
snowflake/ml/modeling/ensemble/voting_regressor.py +33 -61
snowflake/ml/modeling/feature_selection/generic_univariate_select.py +33 -61
snowflake/ml/modeling/feature_selection/select_fdr.py +33 -61
snowflake/ml/modeling/feature_selection/select_fpr.py +33 -61
snowflake/ml/modeling/feature_selection/select_fwe.py +33 -61
snowflake/ml/modeling/feature_selection/select_k_best.py +33 -61
snowflake/ml/modeling/feature_selection/select_percentile.py +33 -61
snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +33 -61
snowflake/ml/modeling/feature_selection/variance_threshold.py +33 -61
snowflake/ml/modeling/framework/base.py +55 -5
snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +33 -61
snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +33 -61
snowflake/ml/modeling/impute/iterative_imputer.py +33 -61
snowflake/ml/modeling/impute/knn_imputer.py +33 -61
snowflake/ml/modeling/impute/missing_indicator.py +33 -61
snowflake/ml/modeling/impute/simple_imputer.py +4 -15
snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +33 -61
snowflake/ml/modeling/kernel_approximation/nystroem.py +33 -61
snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +33 -61
snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +33 -61
snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +33 -61
snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +33 -61
snowflake/ml/modeling/lightgbm/lgbm_classifier.py +36 -63
snowflake/ml/modeling/lightgbm/lgbm_regressor.py +36 -63
snowflake/ml/modeling/linear_model/ard_regression.py +33 -61
snowflake/ml/modeling/linear_model/bayesian_ridge.py +33 -61
snowflake/ml/modeling/linear_model/elastic_net.py +33 -61
snowflake/ml/modeling/linear_model/elastic_net_cv.py +33 -61
snowflake/ml/modeling/linear_model/gamma_regressor.py +33 -61
snowflake/ml/modeling/linear_model/huber_regressor.py +33 -61
snowflake/ml/modeling/linear_model/lars.py +33 -61
snowflake/ml/modeling/linear_model/lars_cv.py +33 -61
snowflake/ml/modeling/linear_model/lasso.py +33 -61
snowflake/ml/modeling/linear_model/lasso_cv.py +33 -61
snowflake/ml/modeling/linear_model/lasso_lars.py +33 -61
snowflake/ml/modeling/linear_model/lasso_lars_cv.py +33 -61
snowflake/ml/modeling/linear_model/lasso_lars_ic.py +33 -61
snowflake/ml/modeling/linear_model/linear_regression.py +33 -61
snowflake/ml/modeling/linear_model/logistic_regression.py +33 -61
snowflake/ml/modeling/linear_model/logistic_regression_cv.py +33 -61
snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +33 -61
snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +33 -61
snowflake/ml/modeling/linear_model/multi_task_lasso.py +33 -61
snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +33 -61
snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +33 -61
snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +33 -61
snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +33 -61
snowflake/ml/modeling/linear_model/perceptron.py +33 -61
snowflake/ml/modeling/linear_model/poisson_regressor.py +33 -61
snowflake/ml/modeling/linear_model/ransac_regressor.py +33 -61
snowflake/ml/modeling/linear_model/ridge.py +33 -61
snowflake/ml/modeling/linear_model/ridge_classifier.py +33 -61
snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +33 -61
snowflake/ml/modeling/linear_model/ridge_cv.py +33 -61
snowflake/ml/modeling/linear_model/sgd_classifier.py +33 -61
snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +33 -61
snowflake/ml/modeling/linear_model/sgd_regressor.py +33 -61
snowflake/ml/modeling/linear_model/theil_sen_regressor.py +33 -61
snowflake/ml/modeling/linear_model/tweedie_regressor.py +33 -61
snowflake/ml/modeling/manifold/isomap.py +33 -61
snowflake/ml/modeling/manifold/mds.py +33 -61
snowflake/ml/modeling/manifold/spectral_embedding.py +33 -61
snowflake/ml/modeling/manifold/tsne.py +33 -61
snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +33 -61
snowflake/ml/modeling/mixture/gaussian_mixture.py +33 -61
snowflake/ml/modeling/model_selection/grid_search_cv.py +39 -57
snowflake/ml/modeling/model_selection/randomized_search_cv.py +26 -57
snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +33 -61
snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +33 -61
snowflake/ml/modeling/multiclass/output_code_classifier.py +33 -61
snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +33 -61
snowflake/ml/modeling/naive_bayes/categorical_nb.py +33 -61
snowflake/ml/modeling/naive_bayes/complement_nb.py +33 -61
snowflake/ml/modeling/naive_bayes/gaussian_nb.py +33 -61
snowflake/ml/modeling/naive_bayes/multinomial_nb.py +33 -61
snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +33 -61
snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +33 -61
snowflake/ml/modeling/neighbors/kernel_density.py +33 -61
snowflake/ml/modeling/neighbors/local_outlier_factor.py +33 -61
snowflake/ml/modeling/neighbors/nearest_centroid.py +33 -61
snowflake/ml/modeling/neighbors/nearest_neighbors.py +33 -61
snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +33 -61
snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +33 -61
snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +33 -61
snowflake/ml/modeling/neural_network/bernoulli_rbm.py +33 -61
snowflake/ml/modeling/neural_network/mlp_classifier.py +33 -61
snowflake/ml/modeling/neural_network/mlp_regressor.py +33 -61
snowflake/ml/modeling/preprocessing/polynomial_features.py +33 -61
snowflake/ml/modeling/semi_supervised/label_propagation.py +33 -61
snowflake/ml/modeling/semi_supervised/label_spreading.py +33 -61
snowflake/ml/modeling/svm/linear_svc.py +33 -61
snowflake/ml/modeling/svm/linear_svr.py +33 -61
snowflake/ml/modeling/svm/nu_svc.py +33 -61
snowflake/ml/modeling/svm/nu_svr.py +33 -61
snowflake/ml/modeling/svm/svc.py +33 -61
snowflake/ml/modeling/svm/svr.py +33 -61
snowflake/ml/modeling/tree/decision_tree_classifier.py +33 -61
snowflake/ml/modeling/tree/decision_tree_regressor.py +33 -61
snowflake/ml/modeling/tree/extra_tree_classifier.py +33 -61
snowflake/ml/modeling/tree/extra_tree_regressor.py +33 -61
snowflake/ml/modeling/xgboost/xgb_classifier.py +33 -61
snowflake/ml/modeling/xgboost/xgb_regressor.py +33 -61
snowflake/ml/modeling/xgboost/xgbrf_classifier.py +33 -61
snowflake/ml/modeling/xgboost/xgbrf_regressor.py +33 -61
snowflake/ml/registry/_manager/model_manager.py +6 -2
snowflake/ml/registry/model_registry.py +100 -27
snowflake/ml/registry/registry.py +6 -2
snowflake/ml/version.py +1 -1
{snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/METADATA +43 -7
{snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/RECORD +211 -206
{snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/LICENSE.txt +0 -0
{snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/WHEEL +0 -0
{snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/top_level.txt +0 -0

snowflake/ml/model/_packager/model_handlers/sentence_transformers.py ADDED Viewed

@@ -0,0 +1,214 @@
+import logging
+import os
+from typing import TYPE_CHECKING, Callable, Dict, Optional, Type, cast, final
+import cloudpickle
+import pandas as pd
+from typing_extensions import TypeGuard, Unpack
+from snowflake.ml._internal import type_utils
+from snowflake.ml.model import custom_model, model_signature, type_hints as model_types
+from snowflake.ml.model._packager.model_env import model_env
+from snowflake.ml.model._packager.model_handlers import _base, _utils as handlers_utils
+from snowflake.ml.model._packager.model_handlers_migrator import base_migrator
+from snowflake.ml.model._packager.model_meta import (
+    model_blob_meta,
+    model_meta as model_meta_api,
+)
+from snowflake.ml.model._signatures import utils as model_signature_utils
+from snowflake.snowpark._internal import utils as snowpark_utils
+if TYPE_CHECKING:
+    import sentence_transformers
+logger = logging.getLogger(__name__)
+@final
+class SentenceTransformerHandler(_base.BaseModelHandler["sentence_transformers.SentenceTransformer"]):
+    HANDLER_TYPE = "sentence_transformers"
+    HANDLER_VERSION = "2024-03-15"
+    _MIN_SNOWPARK_ML_VERSION = "1.3.1"
+    _HANDLER_MIGRATOR_PLANS: Dict[str, Type[base_migrator.BaseModelHandlerMigrator]] = {}
+    MODELE_BLOB_FILE_OR_DIR = "model"
+    DEFAULT_TARGET_METHODS = ["encode"]
+    @classmethod
+    def can_handle(
+        cls,
+        model: model_types.SupportedModelType,
+    ) -> TypeGuard["sentence_transformers.SentenceTransformer"]:
+        if type_utils.LazyType("sentence_transformers.SentenceTransformer").isinstance(model):
+            return True
+        return False
+    @classmethod
+    def cast_model(
+        cls,
+        model: model_types.SupportedModelType,
+    ) -> "sentence_transformers.SentenceTransformer":
+        import sentence_transformers
+        assert isinstance(model, sentence_transformers.SentenceTransformer)
+        return cast(sentence_transformers.SentenceTransformer, model)
+    @classmethod
+    def save_model(
+        cls,
+        name: str,
+        model: "sentence_transformers.SentenceTransformer",
+        model_meta: model_meta_api.ModelMetadata,
+        model_blobs_dir_path: str,
+        sample_input_data: Optional[model_types.SupportedDataType] = None,
+        is_sub_model: Optional[bool] = False,
+        **kwargs: Unpack[model_types.SentenceTransformersSaveOptions],  # registry.log_model(options={...})
+    ) -> None:
+        # Validate target methods and signature (if possible)
+        if not is_sub_model:
+            target_methods = handlers_utils.get_target_methods(
+                model=model,
+                target_methods=kwargs.pop("target_methods", None),
+                default_target_methods=cls.DEFAULT_TARGET_METHODS,
+            )
+            assert target_methods == ["encode"], "target_methods can only be ['encode']"
+            def get_prediction(
+                target_method_name: str, sample_input_data: model_types.SupportedLocalDataType
+            ) -> model_types.SupportedLocalDataType:
+                return _sentence_transformer_encode(model, sample_input_data)
+            if model_meta.signatures:
+                handlers_utils.validate_target_methods(model, list(model_meta.signatures.keys()))
+                model_meta = handlers_utils.validate_signature(
+                    model=model,
+                    model_meta=model_meta,
+                    target_methods=target_methods,
+                    sample_input_data=sample_input_data,
+                    get_prediction_fn=get_prediction,
+                )
+            else:
+                handlers_utils.validate_target_methods(model, target_methods)  # DEFAULT_TARGET_METHODS only
+                if sample_input_data is not None:
+                    model_meta = handlers_utils.validate_signature(
+                        model=model,
+                        model_meta=model_meta,
+                        target_methods=target_methods,
+                        sample_input_data=sample_input_data,
+                        get_prediction_fn=get_prediction,
+                    )
+        # save model
+        model_blob_path = os.path.join(model_blobs_dir_path, name)
+        os.makedirs(model_blob_path, exist_ok=True)
+        model.save(os.path.join(model_blob_path, cls.MODELE_BLOB_FILE_OR_DIR))
+        # save model metadata
+        base_meta = model_blob_meta.ModelBlobMeta(
+            name=name,
+            model_type=cls.HANDLER_TYPE,
+            handler_version=cls.HANDLER_VERSION,
+            path=cls.MODELE_BLOB_FILE_OR_DIR,
+        )
+        model_meta.models[name] = base_meta
+        model_meta.min_snowpark_ml_version = cls._MIN_SNOWPARK_ML_VERSION
+        model_meta.env.include_if_absent(
+            [
+                model_env.ModelDependency(requirement="sentence-transformers", pip_name="sentence-transformers"),
+            ],
+            check_local_version=True,
+        )
+    @classmethod
+    def load_model(
+        cls,
+        name: str,
+        model_meta: model_meta_api.ModelMetadata,
+        model_blobs_dir_path: str,
+        **kwargs: Unpack[model_types.ModelLoadOption],  # use_gpu
+    ) -> "sentence_transformers.SentenceTransformer":
+        import sentence_transformers
+        if snowpark_utils.is_in_stored_procedure():  # type: ignore[no-untyped-call]
+            # We need to redirect the same folders to a writable location in the sandbox.
+            os.environ["TRANSFORMERS_CACHE"] = "/tmp"
+        model_blob_path = os.path.join(model_blobs_dir_path, name)
+        model_blobs_metadata = model_meta.models
+        model_blob_metadata = model_blobs_metadata[name]
+        model_blob_filename = model_blob_metadata.path
+        model_blob_file_or_dir_path = os.path.join(model_blob_path, model_blob_filename)
+        if os.path.isdir(model_blob_file_or_dir_path):  # if the saved model is a directory
+            model = sentence_transformers.SentenceTransformer(model_blob_file_or_dir_path)
+        else:
+            assert os.path.isfile(model_blob_file_or_dir_path)  # if the saved model is a file
+            with open(model_blob_file_or_dir_path, "rb") as f:
+                model = cloudpickle.load(f)
+            assert isinstance(model, sentence_transformers.SentenceTransformer)
+        return model
+    @classmethod
+    def convert_as_custom_model(
+        cls,
+        raw_model: "sentence_transformers.SentenceTransformer",
+        model_meta: model_meta_api.ModelMetadata,
+        **kwargs: Unpack[model_types.ModelLoadOption],
+    ) -> custom_model.CustomModel:
+        import sentence_transformers
+        from snowflake.ml.model import custom_model
+        def _create_custom_model(
+            raw_model: "sentence_transformers.SentenceTransformer",
+            model_meta: model_meta_api.ModelMetadata,
+        ) -> Type[custom_model.CustomModel]:
+            def get_prediction(
+                raw_model: "sentence_transformers.SentenceTransformer",
+                signature: model_signature.ModelSignature,
+                target_method: str,
+            ) -> Callable[[custom_model.CustomModel, pd.DataFrame], pd.DataFrame]:
+                @custom_model.inference_api
+                def fn(self: custom_model.CustomModel, X: pd.DataFrame) -> pd.DataFrame:
+                    predictions_df = _sentence_transformer_encode(raw_model, X)
+                    return model_signature_utils.rename_pandas_df(predictions_df, signature.outputs)
+                return fn
+            type_method_dict = {}
+            for target_method_name, sig in model_meta.signatures.items():
+                if target_method_name == "encode":
+                    type_method_dict[target_method_name] = get_prediction(raw_model, sig, target_method_name)
+                else:
+                    ValueError(f"{target_method_name} is currently not supported.")
+            _SentenceTransformer = type(
+                "_SentenceTransformer",
+                (custom_model.CustomModel,),
+                type_method_dict,
+            )
+            return _SentenceTransformer
+        assert isinstance(raw_model, sentence_transformers.SentenceTransformer)
+        model = raw_model
+        _SentenceTransformer = _create_custom_model(model, model_meta)
+        sentence_transformers_SentenceTransformer_model = _SentenceTransformer(custom_model.ModelContext())
+        predict_method = getattr(sentence_transformers_SentenceTransformer_model, "encode", None)
+        assert callable(predict_method)
+        return sentence_transformers_SentenceTransformer_model
+def _sentence_transformer_encode(
+    model: "sentence_transformers.SentenceTransformer", X: model_types.SupportedLocalDataType
+) -> model_types.SupportedLocalDataType:
+    if not isinstance(X, pd.DataFrame):
+        X = model_signature._convert_local_data_to_df(X)
+    assert X.shape[1] == 1, "SentenceTransformer can only accept 1 input column when converted to pd.DataFrame"
+    X_list = X.iloc[:, 0].tolist()
+    assert callable(getattr(model, "encode", None))
+    return pd.DataFrame({0: model.encode(X_list, batch_size=X.shape[0]).tolist()})

snowflake/ml/model/_packager/model_handlers/sklearn.py CHANGED Viewed

@@ -72,7 +72,7 @@ class SKLModelHandler(_base.BaseModelHandler[Union["sklearn.base.BaseEstimator",
         model: Union["sklearn.base.BaseEstimator", "sklearn.pipeline.Pipeline"],
         model_meta: model_meta_api.ModelMetadata,
         model_blobs_dir_path: str,
-        sample_input: Optional[model_types.SupportedDataType] = None,
+        sample_input_data: Optional[model_types.SupportedDataType] = None,
         is_sub_model: Optional[bool] = False,
         **kwargs: Unpack[model_types.SKLModelSaveOptions],
     ) -> None:
@@ -89,21 +89,21 @@ class SKLModelHandler(_base.BaseModelHandler[Union["sklearn.base.BaseEstimator",
             )
             def get_prediction(
-                target_method_name: str, sample_input: model_types.SupportedLocalDataType
+                target_method_name: str, sample_input_data: model_types.SupportedLocalDataType
             ) -> model_types.SupportedLocalDataType:
-                if not isinstance(sample_input, (pd.DataFrame, np.ndarray)):
-                    sample_input = model_signature._convert_local_data_to_df(sample_input)
+                if not isinstance(sample_input_data, (pd.DataFrame, np.ndarray)):
+                    sample_input_data = model_signature._convert_local_data_to_df(sample_input_data)
                 target_method = getattr(model, target_method_name, None)
                 assert callable(target_method)
-                predictions_df = target_method(sample_input)
+                predictions_df = target_method(sample_input_data)
                 return predictions_df
             model_meta = handlers_utils.validate_signature(
                 model=model,
                 model_meta=model_meta,
                 target_methods=target_methods,
-                sample_input=sample_input,
+                sample_input_data=sample_input_data,
                 get_prediction_fn=get_prediction,
             )

snowflake/ml/model/_packager/model_handlers/snowmlmodel.py CHANGED Viewed

@@ -69,7 +69,7 @@ class SnowMLModelHandler(_base.BaseModelHandler["BaseEstimator"]):
         model: "BaseEstimator",
         model_meta: model_meta_api.ModelMetadata,
         model_blobs_dir_path: str,
-        sample_input: Optional[model_types.SupportedDataType] = None,
+        sample_input_data: Optional[model_types.SupportedDataType] = None,
         is_sub_model: Optional[bool] = False,
         **kwargs: Unpack[model_types.SNOWModelSaveOptions],
     ) -> None:
@@ -79,7 +79,7 @@ class SnowMLModelHandler(_base.BaseModelHandler["BaseEstimator"]):
         # Pipeline is inherited from BaseEstimator, so no need to add one more check
         if not is_sub_model:
-            if sample_input is not None or model_meta.signatures:
+            if sample_input_data is not None or model_meta.signatures:
                 warnings.warn(
                     "Inferring model signature from sample input or providing model signature for Snowpark ML "
                     + "Modeling model is not required. Model signature will automatically be inferred during fitting. ",
@@ -87,7 +87,19 @@ class SnowMLModelHandler(_base.BaseModelHandler["BaseEstimator"]):
                     stacklevel=2,
                 )
             assert hasattr(model, "model_signatures"), "Model does not have model signatures as expected."
-            model_meta.signatures = getattr(model, "model_signatures", {})
+            model_signature_dict = getattr(model, "model_signatures", {})
+            target_methods = kwargs.pop("target_methods", None)
+            if not target_methods:
+                model_meta.signatures = model_signature_dict
+            else:
+                temp_model_signature_dict = {}
+                for method_name in target_methods:
+                    method_model_signature = model_signature_dict.get(method_name, None)
+                    if method_model_signature is not None:
+                        temp_model_signature_dict[method_name] = method_model_signature
+                    else:
+                        raise ValueError(f"Target method {method_name} does not exist in the model.")
+                model_meta.signatures = temp_model_signature_dict
         model_blob_path = os.path.join(model_blobs_dir_path, name)
         os.makedirs(model_blob_path, exist_ok=True)

snowflake/ml/model/_packager/model_handlers/tensorflow.py CHANGED Viewed

@@ -64,7 +64,7 @@ class TensorFlowHandler(_base.BaseModelHandler["tensorflow.Module"]):
         model: "tensorflow.Module",
         model_meta: model_meta_api.ModelMetadata,
         model_blobs_dir_path: str,
-        sample_input: Optional[model_types.SupportedDataType] = None,
+        sample_input_data: Optional[model_types.SupportedDataType] = None,
         is_sub_model: Optional[bool] = False,
         **kwargs: Unpack[model_types.TensorflowSaveOptions],
     ) -> None:
@@ -85,18 +85,18 @@ class TensorFlowHandler(_base.BaseModelHandler["tensorflow.Module"]):
             )
             def get_prediction(
-                target_method_name: str, sample_input: "model_types.SupportedLocalDataType"
+                target_method_name: str, sample_input_data: "model_types.SupportedLocalDataType"
             ) -> model_types.SupportedLocalDataType:
-                if not tensorflow_handler.SeqOfTensorflowTensorHandler.can_handle(sample_input):
-                    sample_input = tensorflow_handler.SeqOfTensorflowTensorHandler.convert_from_df(
-                        model_signature._convert_local_data_to_df(sample_input)
+                if not tensorflow_handler.SeqOfTensorflowTensorHandler.can_handle(sample_input_data):
+                    sample_input_data = tensorflow_handler.SeqOfTensorflowTensorHandler.convert_from_df(
+                        model_signature._convert_local_data_to_df(sample_input_data)
                     )
                 target_method = getattr(model, target_method_name, None)
                 assert callable(target_method)
-                for tensor in sample_input:
+                for tensor in sample_input_data:
                     tensorflow.stop_gradient(tensor)
-                predictions_df = target_method(*sample_input)
+                predictions_df = target_method(*sample_input_data)
                 if isinstance(predictions_df, (tensorflow.Tensor, tensorflow.Variable, np.ndarray)):
                     predictions_df = [predictions_df]
@@ -107,7 +107,7 @@ class TensorFlowHandler(_base.BaseModelHandler["tensorflow.Module"]):
                 model=model,
                 model_meta=model_meta,
                 target_methods=target_methods,
-                sample_input=sample_input,
+                sample_input_data=sample_input_data,
                 get_prediction_fn=get_prediction,
             )

snowflake/ml/model/_packager/model_handlers/torchscript.py CHANGED Viewed

@@ -62,7 +62,7 @@ class TorchScriptHandler(_base.BaseModelHandler["torch.jit.ScriptModule"]):  # t
         model: "torch.jit.ScriptModule",  # type:ignore[name-defined]
         model_meta: model_meta_api.ModelMetadata,
         model_blobs_dir_path: str,
-        sample_input: Optional[model_types.SupportedDataType] = None,
+        sample_input_data: Optional[model_types.SupportedDataType] = None,
         is_sub_model: Optional[bool] = False,
         **kwargs: Unpack[model_types.TorchScriptSaveOptions],
     ) -> None:
@@ -78,18 +78,18 @@ class TorchScriptHandler(_base.BaseModelHandler["torch.jit.ScriptModule"]):  # t
             )
             def get_prediction(
-                target_method_name: str, sample_input: "model_types.SupportedLocalDataType"
+                target_method_name: str, sample_input_data: "model_types.SupportedLocalDataType"
             ) -> model_types.SupportedLocalDataType:
-                if not pytorch_handler.SeqOfPyTorchTensorHandler.can_handle(sample_input):
-                    sample_input = pytorch_handler.SeqOfPyTorchTensorHandler.convert_from_df(
-                        model_signature._convert_local_data_to_df(sample_input)
+                if not pytorch_handler.SeqOfPyTorchTensorHandler.can_handle(sample_input_data):
+                    sample_input_data = pytorch_handler.SeqOfPyTorchTensorHandler.convert_from_df(
+                        model_signature._convert_local_data_to_df(sample_input_data)
                     )
                 model.eval()
                 target_method = getattr(model, target_method_name, None)
                 assert callable(target_method)
                 with torch.no_grad():
-                    predictions_df = target_method(*sample_input)
+                    predictions_df = target_method(*sample_input_data)
                 if isinstance(predictions_df, torch.Tensor):
                     predictions_df = [predictions_df]
@@ -100,7 +100,7 @@ class TorchScriptHandler(_base.BaseModelHandler["torch.jit.ScriptModule"]):  # t
                 model=model,
                 model_meta=model_meta,
                 target_methods=target_methods,
-                sample_input=sample_input,
+                sample_input_data=sample_input_data,
                 get_prediction_fn=get_prediction,
             )

snowflake/ml/model/_packager/model_handlers/xgboost.py CHANGED Viewed

@@ -45,7 +45,7 @@ class XGBModelHandler(_base.BaseModelHandler[Union["xgboost.Booster", "xgboost.X
     _HANDLER_MIGRATOR_PLANS: Dict[str, Type[base_migrator.BaseModelHandlerMigrator]] = {}
     MODELE_BLOB_FILE_OR_DIR = "model.ubj"
-    DEFAULT_TARGET_METHODS = ["apply", "predict", "predict_proba"]
+    DEFAULT_TARGET_METHODS = ["predict", "predict_proba"]
     @classmethod
     def can_handle(
@@ -76,7 +76,7 @@ class XGBModelHandler(_base.BaseModelHandler[Union["xgboost.Booster", "xgboost.X
         model: Union["xgboost.Booster", "xgboost.XGBModel"],
         model_meta: model_meta_api.ModelMetadata,
         model_blobs_dir_path: str,
-        sample_input: Optional[model_types.SupportedDataType] = None,
+        sample_input_data: Optional[model_types.SupportedDataType] = None,
         is_sub_model: Optional[bool] = False,
         **kwargs: Unpack[model_types.XGBModelSaveOptions],
     ) -> None:
@@ -92,24 +92,24 @@ class XGBModelHandler(_base.BaseModelHandler[Union["xgboost.Booster", "xgboost.X
             )
             def get_prediction(
-                target_method_name: str, sample_input: model_types.SupportedLocalDataType
+                target_method_name: str, sample_input_data: model_types.SupportedLocalDataType
             ) -> model_types.SupportedLocalDataType:
-                if not isinstance(sample_input, (pd.DataFrame, np.ndarray)):
-                    sample_input = model_signature._convert_local_data_to_df(sample_input)
+                if not isinstance(sample_input_data, (pd.DataFrame, np.ndarray)):
+                    sample_input_data = model_signature._convert_local_data_to_df(sample_input_data)
                 if isinstance(model, xgboost.Booster):
-                    sample_input = xgboost.DMatrix(sample_input)
+                    sample_input_data = xgboost.DMatrix(sample_input_data)
                 target_method = getattr(model, target_method_name, None)
                 assert callable(target_method)
-                predictions_df = target_method(sample_input)
+                predictions_df = target_method(sample_input_data)
                 return predictions_df
             model_meta = handlers_utils.validate_signature(
                 model=model,
                 model_meta=model_meta,
                 target_methods=target_methods,
-                sample_input=sample_input,
+                sample_input_data=sample_input_data,
                 get_prediction_fn=get_prediction,
             )

snowflake/ml/model/_packager/model_meta/_core_requirements.py CHANGED Viewed

@@ -6,6 +6,6 @@ REQUIREMENTS = [
     "packaging>=20.9,<24",
     "pandas>=1.0.0,<2",
     "pyyaml>=6.0,<7",
-    "snowflake-snowpark-python>=1.8.0,<2,!=1.12.0",
+    "snowflake-snowpark-python>=1.11.1,<2,!=1.12.0",
     "typing-extensions>=4.1.0,<5"
 ]

snowflake/ml/model/_packager/model_packager.py CHANGED Viewed

@@ -40,7 +40,7 @@ class ModelPackager:
         name: str,
         model: model_types.SupportedModelType,
         signatures: Optional[Dict[str, model_signature.ModelSignature]] = None,
-        sample_input: Optional[model_types.SupportedDataType] = None,
+        sample_input_data: Optional[model_types.SupportedDataType] = None,
         metadata: Optional[Dict[str, str]] = None,
         conda_dependencies: Optional[List[str]] = None,
         pip_requirements: Optional[List[str]] = None,
@@ -49,18 +49,20 @@ class ModelPackager:
         code_paths: Optional[List[str]] = None,
         options: Optional[model_types.ModelSaveOption] = None,
     ) -> None:
-        if (signatures is None) and (sample_input is None) and not model_handler.is_auto_signature_model(model):
+        if (signatures is None) and (sample_input_data is None) and not model_handler.is_auto_signature_model(model):
             raise snowml_exceptions.SnowflakeMLException(
                 error_code=error_codes.INVALID_ARGUMENT,
                 original_exception=ValueError(
-                    "Signatures and sample_input both cannot be None at the same time for this kind of model."
+                    "Signatures and sample_input_data both cannot be None at the same time for this kind of model."
                 ),
             )
-        if (signatures is not None) and (sample_input is not None):
+        if (signatures is not None) and (sample_input_data is not None):
             raise snowml_exceptions.SnowflakeMLException(
                 error_code=error_codes.INVALID_ARGUMENT,
-                original_exception=ValueError("Signatures and sample_input both cannot be specified at the same time."),
+                original_exception=ValueError(
+                    "Signatures and sample_input_data both cannot be specified at the same time."
+                ),
             )
         if not options:
@@ -93,7 +95,7 @@ class ModelPackager:
                 model=model,
                 model_meta=meta,
                 model_blobs_dir_path=model_blobs_path,
-                sample_input=sample_input,
+                sample_input_data=sample_input_data,
                 is_sub_model=False,
                 **options,
             )

snowflake/ml/model/custom_model.py CHANGED Viewed

@@ -149,7 +149,9 @@ class CustomModel:
         context: A ModelContext object showing sub-models and artifacts related to this model.
     """
-    def __init__(self, context: ModelContext) -> None:
+    def __init__(self, context: Optional[ModelContext] = None) -> None:
+        if context is None:
+            context = ModelContext()
         self.context = context
         for method in self._get_infer_methods():
             _validate_predict_function(method)

snowflake/ml/model/type_hints.py CHANGED Viewed

@@ -22,6 +22,7 @@ if TYPE_CHECKING:
     import mlflow
     import numpy as np
     import pandas as pd
+    import sentence_transformers
     import sklearn.base
     import sklearn.pipeline
     import tensorflow
@@ -32,6 +33,7 @@ if TYPE_CHECKING:
     import snowflake.ml.model.custom_model
     import snowflake.ml.model.models.huggingface_pipeline
     import snowflake.ml.model.models.llm
+    import snowflake.ml.model.models.sentence_transformers
     import snowflake.snowpark
     from snowflake.ml.modeling.framework import base  # noqa: F401
@@ -81,7 +83,9 @@ SupportedNoSignatureRequirementsModelType = Union[
     "base.BaseEstimator",
     "mlflow.pyfunc.PyFuncModel",
     "transformers.Pipeline",
+    "sentence_transformers.SentenceTransformer",
     "snowflake.ml.model.models.huggingface_pipeline.HuggingFacePipelineModel",
+    "snowflake.ml.model.models.sentence_transformers.SentenceTransformer",
     "snowflake.ml.model.models.llm.LLM",
 ]
@@ -106,6 +110,7 @@ Here is all acceptable types of Snowflake native model packaging and its handler
 | mlflow.pyfunc.PyFuncModel | mlflow.py   | _MLFlowHandler |
 | transformers.Pipeline | huggingface_pipeline.py | _HuggingFacePipelineHandler |
 | huggingface_pipeline.HuggingFacePipelineModel | huggingface_pipeline.py | _HuggingFacePipelineHandler |
+| sentence_transformers.SentenceTransformer | sentence_transformers.py | _SentenceTransformerHandler |
 """
 SupportedModelHandlerType = Literal[
@@ -113,6 +118,7 @@ SupportedModelHandlerType = Literal[
     "huggingface_pipeline",
     "mlflow",
     "pytorch",
+    "sentence_transformers",
     "sklearn",
     "snowml",
     "tensorflow",
@@ -215,6 +221,7 @@ class BaseModelSaveOption(TypedDict):
     embed_local_ml_library: NotRequired[bool]
     relax_version: NotRequired[bool]
     _legacy_save: NotRequired[bool]
+    function_type: NotRequired[Literal["FUNCTION", "TABLE_FUNCTION"]]
     method_options: NotRequired[Dict[str, ModelMethodSaveOptions]]
@@ -261,6 +268,11 @@ class HuggingFaceSaveOptions(BaseModelSaveOption):
     cuda_version: NotRequired[str]
+class SentenceTransformersSaveOptions(BaseModelSaveOption):
+    target_methods: NotRequired[Sequence[str]]
+    cuda_version: NotRequired[str]
 class LLMSaveOptions(BaseModelSaveOption):
     cuda_version: NotRequired[str]
@@ -276,6 +288,7 @@ ModelSaveOption = Union[
     TensorflowSaveOptions,
     MLFlowSaveOptions,
     HuggingFaceSaveOptions,
+    SentenceTransformersSaveOptions,
     LLMSaveOptions,
 ]

snowflake/ml/modeling/_internal/estimator_utils.py CHANGED Viewed

@@ -1,7 +1,9 @@
 import inspect
-from typing import Any, Callable, Dict, Set, Tuple
+import numbers
+from typing import Any, Callable, Dict, List, Set, Tuple
 import numpy as np
+from numpy import typing as npt
 from typing_extensions import TypeGuard
 from snowflake.ml._internal.exceptions import error_codes, exceptions
@@ -153,3 +155,61 @@ def get_module_name(model: object) -> str:
             original_exception=ValueError(f"Unable to infer the source module of the given object {model}."),
         )
     return module.__name__
+def handle_inference_result(
+    inference_res: Any, output_cols: List[str], inference_method: str, within_udf: bool = False
+) -> Tuple[npt.NDArray[Any], List[str]]:
+    if isinstance(inference_res, list) and len(inference_res) > 0 and isinstance(inference_res[0], np.ndarray):
+        # In case of multioutput estimators, predict_proba, decision_function etc., functions return a list of
+        # ndarrays. We need to concatenate them.
+        # First compute output column names
+        if len(output_cols) == len(inference_res):
+            actual_output_cols = []
+            for idx, np_arr in enumerate(inference_res):
+                for i in range(1 if len(np_arr.shape) <= 1 else np_arr.shape[1]):
+                    actual_output_cols.append(f"{output_cols[idx]}_{i}")
+            output_cols = actual_output_cols
+        # Concatenate np arrays
+        transformed_numpy_array = np.concatenate(inference_res, axis=1)
+    elif isinstance(inference_res, tuple) and len(inference_res) > 0 and isinstance(inference_res[0], np.ndarray):
+        # In case of kneighbors, functions return a tuple of ndarrays.
+        transformed_numpy_array = np.stack(inference_res, axis=1)
+    elif isinstance(inference_res, numbers.Number):
+        # In case of BernoulliRBM, functions return a float
+        transformed_numpy_array = np.array([inference_res])
+    else:
+        transformed_numpy_array = inference_res
+    if (len(transformed_numpy_array.shape) == 3) and inference_method != "kneighbors":
+        # VotingClassifier will return results of shape (n_classifiers, n_samples, n_classes)
+        # when voting = "soft" and flatten_transform = False. We can't handle unflatten transforms,
+        # so we ignore flatten_transform flag and flatten the results.
+        transformed_numpy_array = np.hstack(transformed_numpy_array)  # type: ignore[call-overload]
+    if len(transformed_numpy_array.shape) == 1:
+        transformed_numpy_array = np.reshape(transformed_numpy_array, (-1, 1))
+    shape = transformed_numpy_array.shape
+    if len(shape) > 1:
+        if shape[1] != len(output_cols):
+            # HeterogeneousEnsemble's transform method produce results with variying shapes
+            # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes).
+            # It is hard to predict the response shape without using fragile introspection logic.
+            # So, to avoid that we are packing the results into a dataframe of shape (n_samples, 1) with
+            # each element being a list.
+            if len(output_cols) != 1:
+                raise TypeError(
+                    "expected_output_cols must be same length as transformed array or should be of length 1."
+                    f"Currently expected_output_cols shape is {len(output_cols)}, "
+                    f"transformed array shape is {shape}. "
+                )
+            if not within_udf:
+                actual_output_cols = []
+                for i in range(shape[1]):
+                    actual_output_cols.append(f"{output_cols[0]}_{i}")
+                output_cols = actual_output_cols
+    return transformed_numpy_array, output_cols

snowflake-ml-python 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

snowflake-ml-python 1.3.0py3-none-any.whl → 1.4.0py3-none-any.whl