PyPI - snowflake-ml-python - Versions diffs - 1.6.1__py3-none-any.whl → 1.6.3__py3-none-any.whl - Mend

snowflake-ml-python 1.6.1py3-none-any.whl → 1.6.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (284) hide show

snowflake/cortex/__init__.py +4 -0
snowflake/cortex/_classify_text.py +2 -2
snowflake/cortex/_embed_text_1024.py +37 -0
snowflake/cortex/_embed_text_768.py +37 -0
snowflake/cortex/_extract_answer.py +2 -2
snowflake/cortex/_sentiment.py +2 -2
snowflake/cortex/_summarize.py +2 -2
snowflake/cortex/_translate.py +2 -2
snowflake/cortex/_util.py +4 -4
snowflake/ml/_internal/env_utils.py +5 -5
snowflake/ml/_internal/exceptions/error_codes.py +2 -0
snowflake/ml/_internal/telemetry.py +142 -20
snowflake/ml/_internal/utils/db_utils.py +50 -0
snowflake/ml/_internal/utils/identifier.py +48 -11
snowflake/ml/_internal/utils/service_logger.py +63 -0
snowflake/ml/_internal/utils/snowflake_env.py +23 -13
snowflake/ml/_internal/utils/sql_identifier.py +26 -2
snowflake/ml/_internal/utils/table_manager.py +19 -1
snowflake/ml/data/_internal/arrow_ingestor.py +1 -11
snowflake/ml/data/data_connector.py +33 -7
snowflake/ml/data/ingestor_utils.py +20 -10
snowflake/ml/data/torch_utils.py +68 -0
snowflake/ml/dataset/dataset.py +1 -3
snowflake/ml/feature_store/access_manager.py +3 -3
snowflake/ml/feature_store/feature_store.py +60 -19
snowflake/ml/feature_store/feature_view.py +84 -30
snowflake/ml/fileset/embedded_stage_fs.py +1 -1
snowflake/ml/fileset/fileset.py +1 -1
snowflake/ml/fileset/sfcfs.py +9 -3
snowflake/ml/fileset/stage_fs.py +2 -1
snowflake/ml/lineage/lineage_node.py +7 -2
snowflake/ml/model/__init__.py +1 -2
snowflake/ml/model/_client/model/model_version_impl.py +96 -12
snowflake/ml/model/_client/ops/model_ops.py +124 -6
snowflake/ml/model/_client/ops/service_ops.py +309 -9
snowflake/ml/model/_client/service/model_deployment_spec.py +8 -5
snowflake/ml/model/_client/service/model_deployment_spec_schema.py +2 -2
snowflake/ml/model/_client/sql/_base.py +5 -0
snowflake/ml/model/_client/sql/model.py +1 -0
snowflake/ml/model/_client/sql/model_version.py +9 -5
snowflake/ml/model/_client/sql/service.py +121 -20
snowflake/ml/model/_model_composer/model_composer.py +11 -39
snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +31 -11
snowflake/ml/model/_packager/model_env/model_env.py +4 -38
snowflake/ml/model/_packager/model_handlers/_utils.py +134 -28
snowflake/ml/model/_packager/model_handlers/catboost.py +31 -30
snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +26 -18
snowflake/ml/model/_packager/model_handlers/lightgbm.py +31 -58
snowflake/ml/model/_packager/model_handlers/mlflow.py +3 -5
snowflake/ml/model/_packager/model_handlers/model_objective_utils.py +169 -0
snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +15 -8
snowflake/ml/model/_packager/model_handlers/sklearn.py +56 -60
snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +141 -9
snowflake/ml/model/_packager/model_handlers/torchscript.py +2 -2
snowflake/ml/model/_packager/model_handlers/xgboost.py +63 -48
snowflake/ml/model/_packager/model_meta/model_meta.py +16 -42
snowflake/ml/model/_packager/model_meta/model_meta_schema.py +1 -14
snowflake/ml/model/_packager/model_packager.py +14 -8
snowflake/ml/model/_packager/model_runtime/model_runtime.py +11 -0
snowflake/ml/model/_signatures/pytorch_handler.py +1 -1
snowflake/ml/model/_signatures/snowpark_handler.py +3 -2
snowflake/ml/model/_signatures/utils.py +9 -0
snowflake/ml/model/type_hints.py +12 -145
snowflake/ml/modeling/_internal/constants.py +1 -0
snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +5 -5
snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +9 -6
snowflake/ml/modeling/_internal/model_specifications.py +2 -0
snowflake/ml/modeling/_internal/model_trainer.py +1 -0
snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +2 -4
snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +5 -5
snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +130 -166
snowflake/ml/modeling/_internal/snowpark_implementations/xgboost_external_memory_trainer.py +0 -1
snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +61 -21
snowflake/ml/modeling/cluster/affinity_propagation.py +61 -21
snowflake/ml/modeling/cluster/agglomerative_clustering.py +61 -21
snowflake/ml/modeling/cluster/birch.py +61 -21
snowflake/ml/modeling/cluster/bisecting_k_means.py +61 -21
snowflake/ml/modeling/cluster/dbscan.py +61 -21
snowflake/ml/modeling/cluster/feature_agglomeration.py +61 -21
snowflake/ml/modeling/cluster/k_means.py +61 -21
snowflake/ml/modeling/cluster/mean_shift.py +61 -21
snowflake/ml/modeling/cluster/mini_batch_k_means.py +61 -21
snowflake/ml/modeling/cluster/optics.py +61 -21
snowflake/ml/modeling/cluster/spectral_biclustering.py +61 -21
snowflake/ml/modeling/cluster/spectral_clustering.py +61 -21
snowflake/ml/modeling/cluster/spectral_coclustering.py +61 -21
snowflake/ml/modeling/compose/column_transformer.py +61 -21
snowflake/ml/modeling/compose/transformed_target_regressor.py +61 -21
snowflake/ml/modeling/covariance/elliptic_envelope.py +61 -21
snowflake/ml/modeling/covariance/empirical_covariance.py +61 -21
snowflake/ml/modeling/covariance/graphical_lasso.py +61 -21
snowflake/ml/modeling/covariance/graphical_lasso_cv.py +61 -21
snowflake/ml/modeling/covariance/ledoit_wolf.py +61 -21
snowflake/ml/modeling/covariance/min_cov_det.py +61 -21
snowflake/ml/modeling/covariance/oas.py +61 -21
snowflake/ml/modeling/covariance/shrunk_covariance.py +61 -21
snowflake/ml/modeling/decomposition/dictionary_learning.py +61 -21
snowflake/ml/modeling/decomposition/factor_analysis.py +61 -21
snowflake/ml/modeling/decomposition/fast_ica.py +61 -21
snowflake/ml/modeling/decomposition/incremental_pca.py +61 -21
snowflake/ml/modeling/decomposition/kernel_pca.py +61 -21
snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +61 -21
snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +61 -21
snowflake/ml/modeling/decomposition/pca.py +61 -21
snowflake/ml/modeling/decomposition/sparse_pca.py +61 -21
snowflake/ml/modeling/decomposition/truncated_svd.py +61 -21
snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +61 -21
snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +61 -21
snowflake/ml/modeling/ensemble/ada_boost_classifier.py +61 -21
snowflake/ml/modeling/ensemble/ada_boost_regressor.py +61 -21
snowflake/ml/modeling/ensemble/bagging_classifier.py +61 -21
snowflake/ml/modeling/ensemble/bagging_regressor.py +61 -21
snowflake/ml/modeling/ensemble/extra_trees_classifier.py +61 -21
snowflake/ml/modeling/ensemble/extra_trees_regressor.py +61 -21
snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +61 -21
snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +61 -21
snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +61 -21
snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +61 -21
snowflake/ml/modeling/ensemble/isolation_forest.py +61 -21
snowflake/ml/modeling/ensemble/random_forest_classifier.py +61 -21
snowflake/ml/modeling/ensemble/random_forest_regressor.py +61 -21
snowflake/ml/modeling/ensemble/stacking_regressor.py +61 -21
snowflake/ml/modeling/ensemble/voting_classifier.py +61 -21
snowflake/ml/modeling/ensemble/voting_regressor.py +61 -21
snowflake/ml/modeling/feature_selection/generic_univariate_select.py +61 -21
snowflake/ml/modeling/feature_selection/select_fdr.py +61 -21
snowflake/ml/modeling/feature_selection/select_fpr.py +61 -21
snowflake/ml/modeling/feature_selection/select_fwe.py +61 -21
snowflake/ml/modeling/feature_selection/select_k_best.py +61 -21
snowflake/ml/modeling/feature_selection/select_percentile.py +61 -21
snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +61 -21
snowflake/ml/modeling/feature_selection/variance_threshold.py +61 -21
snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +61 -21
snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +61 -21
snowflake/ml/modeling/impute/iterative_imputer.py +61 -21
snowflake/ml/modeling/impute/knn_imputer.py +61 -21
snowflake/ml/modeling/impute/missing_indicator.py +61 -21
snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +61 -21
snowflake/ml/modeling/kernel_approximation/nystroem.py +61 -21
snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +61 -21
snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +61 -21
snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +61 -21
snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +61 -21
snowflake/ml/modeling/lightgbm/lgbm_classifier.py +61 -21
snowflake/ml/modeling/lightgbm/lgbm_regressor.py +61 -21
snowflake/ml/modeling/linear_model/ard_regression.py +61 -21
snowflake/ml/modeling/linear_model/bayesian_ridge.py +61 -21
snowflake/ml/modeling/linear_model/elastic_net.py +61 -21
snowflake/ml/modeling/linear_model/elastic_net_cv.py +61 -21
snowflake/ml/modeling/linear_model/gamma_regressor.py +61 -21
snowflake/ml/modeling/linear_model/huber_regressor.py +61 -21
snowflake/ml/modeling/linear_model/lars.py +61 -21
snowflake/ml/modeling/linear_model/lars_cv.py +61 -21
snowflake/ml/modeling/linear_model/lasso.py +61 -21
snowflake/ml/modeling/linear_model/lasso_cv.py +61 -21
snowflake/ml/modeling/linear_model/lasso_lars.py +61 -21
snowflake/ml/modeling/linear_model/lasso_lars_cv.py +61 -21
snowflake/ml/modeling/linear_model/lasso_lars_ic.py +61 -21
snowflake/ml/modeling/linear_model/linear_regression.py +61 -21
snowflake/ml/modeling/linear_model/logistic_regression.py +61 -21
snowflake/ml/modeling/linear_model/logistic_regression_cv.py +61 -21
snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +61 -21
snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +61 -21
snowflake/ml/modeling/linear_model/multi_task_lasso.py +61 -21
snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +61 -21
snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +61 -21
snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +61 -21
snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +61 -21
snowflake/ml/modeling/linear_model/perceptron.py +61 -21
snowflake/ml/modeling/linear_model/poisson_regressor.py +61 -21
snowflake/ml/modeling/linear_model/ransac_regressor.py +61 -21
snowflake/ml/modeling/linear_model/ridge.py +61 -21
snowflake/ml/modeling/linear_model/ridge_classifier.py +61 -21
snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +61 -21
snowflake/ml/modeling/linear_model/ridge_cv.py +61 -21
snowflake/ml/modeling/linear_model/sgd_classifier.py +61 -21
snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +61 -21
snowflake/ml/modeling/linear_model/sgd_regressor.py +61 -21
snowflake/ml/modeling/linear_model/theil_sen_regressor.py +61 -21
snowflake/ml/modeling/linear_model/tweedie_regressor.py +61 -21
snowflake/ml/modeling/manifold/isomap.py +61 -21
snowflake/ml/modeling/manifold/mds.py +61 -21
snowflake/ml/modeling/manifold/spectral_embedding.py +61 -21
snowflake/ml/modeling/manifold/tsne.py +61 -21
snowflake/ml/modeling/metrics/metrics_utils.py +2 -2
snowflake/ml/modeling/metrics/ranking.py +0 -3
snowflake/ml/modeling/metrics/regression.py +0 -3
snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +61 -21
snowflake/ml/modeling/mixture/gaussian_mixture.py +61 -21
snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +61 -21
snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +61 -21
snowflake/ml/modeling/multiclass/output_code_classifier.py +61 -21
snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +61 -21
snowflake/ml/modeling/naive_bayes/categorical_nb.py +61 -21
snowflake/ml/modeling/naive_bayes/complement_nb.py +61 -21
snowflake/ml/modeling/naive_bayes/gaussian_nb.py +61 -21
snowflake/ml/modeling/naive_bayes/multinomial_nb.py +61 -21
snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +61 -21
snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +61 -21
snowflake/ml/modeling/neighbors/kernel_density.py +61 -21
snowflake/ml/modeling/neighbors/local_outlier_factor.py +61 -21
snowflake/ml/modeling/neighbors/nearest_centroid.py +61 -21
snowflake/ml/modeling/neighbors/nearest_neighbors.py +61 -21
snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +61 -21
snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +61 -21
snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +61 -21
snowflake/ml/modeling/neural_network/bernoulli_rbm.py +61 -21
snowflake/ml/modeling/neural_network/mlp_classifier.py +61 -21
snowflake/ml/modeling/neural_network/mlp_regressor.py +61 -21
snowflake/ml/modeling/parameters/disable_model_tracer.py +5 -0
snowflake/ml/modeling/pipeline/pipeline.py +1 -13
snowflake/ml/modeling/preprocessing/polynomial_features.py +61 -21
snowflake/ml/modeling/semi_supervised/label_propagation.py +61 -21
snowflake/ml/modeling/semi_supervised/label_spreading.py +61 -21
snowflake/ml/modeling/svm/linear_svc.py +61 -21
snowflake/ml/modeling/svm/linear_svr.py +61 -21
snowflake/ml/modeling/svm/nu_svc.py +61 -21
snowflake/ml/modeling/svm/nu_svr.py +61 -21
snowflake/ml/modeling/svm/svc.py +61 -21
snowflake/ml/modeling/svm/svr.py +61 -21
snowflake/ml/modeling/tree/decision_tree_classifier.py +61 -21
snowflake/ml/modeling/tree/decision_tree_regressor.py +61 -21
snowflake/ml/modeling/tree/extra_tree_classifier.py +61 -21
snowflake/ml/modeling/tree/extra_tree_regressor.py +61 -21
snowflake/ml/modeling/xgboost/xgb_classifier.py +64 -23
snowflake/ml/modeling/xgboost/xgb_regressor.py +64 -23
snowflake/ml/modeling/xgboost/xgbrf_classifier.py +64 -23
snowflake/ml/modeling/xgboost/xgbrf_regressor.py +64 -23
snowflake/ml/monitoring/_client/model_monitor.py +126 -0
snowflake/ml/monitoring/_client/model_monitor_manager.py +361 -0
snowflake/ml/monitoring/_client/model_monitor_version.py +1 -0
snowflake/ml/monitoring/_client/monitor_sql_client.py +1335 -0
snowflake/ml/monitoring/_client/queries/record_count.ssql +14 -0
snowflake/ml/monitoring/_client/queries/rmse.ssql +28 -0
snowflake/ml/monitoring/entities/model_monitor_config.py +28 -0
snowflake/ml/monitoring/entities/model_monitor_interval.py +46 -0
snowflake/ml/monitoring/entities/output_score_type.py +90 -0
snowflake/ml/registry/_manager/model_manager.py +4 -0
snowflake/ml/registry/registry.py +166 -8
snowflake/ml/version.py +1 -1
{snowflake_ml_python-1.6.1.dist-info → snowflake_ml_python-1.6.3.dist-info}/METADATA +43 -9
snowflake_ml_python-1.6.3.dist-info/RECORD +400 -0
{snowflake_ml_python-1.6.1.dist-info → snowflake_ml_python-1.6.3.dist-info}/WHEEL +1 -1
snowflake/ml/_internal/container_services/image_registry/credential.py +0 -84
snowflake/ml/_internal/container_services/image_registry/http_client.py +0 -127
snowflake/ml/_internal/container_services/image_registry/imagelib.py +0 -400
snowflake/ml/_internal/container_services/image_registry/registry_client.py +0 -212
snowflake/ml/_internal/utils/log_stream_processor.py +0 -30
snowflake/ml/_internal/utils/session_token_manager.py +0 -46
snowflake/ml/_internal/utils/spcs_attribution_utils.py +0 -122
snowflake/ml/_internal/utils/uri.py +0 -77
snowflake/ml/data/torch_dataset.py +0 -33
snowflake/ml/model/_api.py +0 -568
snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +0 -12
snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +0 -249
snowflake/ml/model/_deploy_client/image_builds/docker_context.py +0 -130
snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +0 -36
snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +0 -268
snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +0 -215
snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +0 -53
snowflake/ml/model/_deploy_client/image_builds/templates/image_build_job_spec_template +0 -38
snowflake/ml/model/_deploy_client/image_builds/templates/kaniko_shell_script_template +0 -105
snowflake/ml/model/_deploy_client/snowservice/deploy.py +0 -611
snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +0 -116
snowflake/ml/model/_deploy_client/snowservice/instance_types.py +0 -10
snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +0 -28
snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template_with_model +0 -21
snowflake/ml/model/_deploy_client/utils/constants.py +0 -48
snowflake/ml/model/_deploy_client/utils/snowservice_client.py +0 -280
snowflake/ml/model/_deploy_client/warehouse/deploy.py +0 -202
snowflake/ml/model/_deploy_client/warehouse/infer_template.py +0 -99
snowflake/ml/model/_packager/model_handlers/llm.py +0 -267
snowflake/ml/model/_packager/model_meta/_core_requirements.py +0 -11
snowflake/ml/model/deploy_platforms.py +0 -6
snowflake/ml/model/models/llm.py +0 -104
snowflake/ml/monitoring/monitor.py +0 -203
snowflake/ml/registry/_initial_schema.py +0 -142
snowflake/ml/registry/_schema.py +0 -82
snowflake/ml/registry/_schema_upgrade_plans.py +0 -116
snowflake/ml/registry/_schema_version_manager.py +0 -163
snowflake/ml/registry/model_registry.py +0 -2048
snowflake_ml_python-1.6.1.dist-info/RECORD +0 -422
{snowflake_ml_python-1.6.1.dist-info → snowflake_ml_python-1.6.3.dist-info}/LICENSE.txt +0 -0
{snowflake_ml_python-1.6.1.dist-info → snowflake_ml_python-1.6.3.dist-info}/top_level.txt +0 -0

snowflake/ml/model/_packager/model_handlers/catboost.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
+import warnings
 from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Type, cast, final
 import numpy as np
@@ -8,7 +9,11 @@ from typing_extensions import TypeGuard, Unpack
 from snowflake.ml._internal import type_utils
 from snowflake.ml.model import custom_model, model_signature, type_hints as model_types
 from snowflake.ml.model._packager.model_env import model_env
-from snowflake.ml.model._packager.model_handlers import _base, _utils as handlers_utils
+from snowflake.ml.model._packager.model_handlers import (
+    _base,
+    _utils as handlers_utils,
+    model_objective_utils,
+)
 from snowflake.ml.model._packager.model_handlers_migrator import base_migrator
 from snowflake.ml.model._packager.model_meta import (
     model_blob_meta,
@@ -32,22 +37,7 @@ class CatBoostModelHandler(_base.BaseModelHandler["catboost.CatBoost"]):
     MODEL_BLOB_FILE_OR_DIR = "model.bin"
     DEFAULT_TARGET_METHODS = ["predict", "predict_proba"]
-    @classmethod
-    def get_model_objective(cls, model: "catboost.CatBoost") -> model_meta_schema.ModelObjective:
-        import catboost
-        if isinstance(model, catboost.CatBoostClassifier):
-            num_classes = handlers_utils.get_num_classes_if_exists(model)
-            if num_classes == 2:
-                return model_meta_schema.ModelObjective.BINARY_CLASSIFICATION
-            return model_meta_schema.ModelObjective.MULTI_CLASSIFICATION
-        if isinstance(model, catboost.CatBoostRanker):
-            return model_meta_schema.ModelObjective.RANKING
-        if isinstance(model, catboost.CatBoostRegressor):
-            return model_meta_schema.ModelObjective.REGRESSION
-        # TODO: Find out model type from the generic Catboost Model
-        return model_meta_schema.ModelObjective.UNKNOWN
+    EXPLAIN_TARGET_METHODS = ["predict", "predict_proba"]
     @classmethod
     def can_handle(cls, model: model_types.SupportedModelType) -> TypeGuard["catboost.CatBoost"]:
@@ -77,6 +67,8 @@ class CatBoostModelHandler(_base.BaseModelHandler["catboost.CatBoost"]):
         is_sub_model: Optional[bool] = False,
         **kwargs: Unpack[model_types.CatBoostModelSaveOptions],
     ) -> None:
+        enable_explainability = kwargs.get("enable_explainability", True)
         import catboost
         assert isinstance(model, catboost.CatBoost)
@@ -105,22 +97,34 @@ class CatBoostModelHandler(_base.BaseModelHandler["catboost.CatBoost"]):
                 sample_input_data=sample_input_data,
                 get_prediction_fn=get_prediction,
             )
-            model_objective = cls.get_model_objective(model)
-            model_meta.model_objective = model_objective
-            if kwargs.get("enable_explainability", True):
-                output_type = model_signature.DataType.DOUBLE
-                if model_objective == model_meta_schema.ModelObjective.MULTI_CLASSIFICATION:
-                    output_type = model_signature.DataType.STRING
+            model_task_and_output = model_objective_utils.get_model_task_and_output_type(model)
+            model_meta.task = model_task_and_output.task
+            if enable_explainability:
+                explain_target_method = handlers_utils.get_explain_target_method(model_meta, cls.EXPLAIN_TARGET_METHODS)
                 model_meta = handlers_utils.add_explain_method_signature(
                     model_meta=model_meta,
                     explain_method="explain",
-                    target_method="predict",
-                    output_return_type=output_type,
+                    target_method=explain_target_method,
+                    output_return_type=model_task_and_output.output_type,
                 )
                 model_meta.function_properties = {
                     "explain": {model_meta_schema.FunctionProperties.PARTITIONED.value: False}
                 }
+                background_data = handlers_utils.get_explainability_supported_background(
+                    sample_input_data, model_meta, explain_target_method
+                )
+                if background_data is not None:
+                    handlers_utils.save_background_data(
+                        model_blobs_dir_path, cls.EXPLAIN_ARTIFACTS_DIR, cls.BG_DATA_FILE_SUFFIX, name, background_data
+                    )
+                else:
+                    warnings.warn(
+                        "sample_input_data should be provided for better explainability results",
+                        category=UserWarning,
+                        stacklevel=1,
+                    )
         model_blob_path = os.path.join(model_blobs_dir_path, name)
         os.makedirs(model_blob_path, exist_ok=True)
         model_save_path = os.path.join(model_blob_path, cls.MODEL_BLOB_FILE_OR_DIR)
@@ -143,11 +147,8 @@ class CatBoostModelHandler(_base.BaseModelHandler["catboost.CatBoost"]):
             ],
             check_local_version=True,
         )
-        if kwargs.get("enable_explainability", True):
-            model_meta.env.include_if_absent(
-                [model_env.ModelDependency(requirement="shap", pip_name="shap")],
-                check_local_version=True,
-            )
+        if enable_explainability:
+            model_meta.env.include_if_absent([model_env.ModelDependency(requirement="shap", pip_name="shap")])
             model_meta.explain_algorithm = model_meta_schema.ModelExplainAlgorithm.SHAP
         model_meta.env.cuda_version = kwargs.get("cuda_version", model_env.DEFAULT_CUDA_VERSION)

snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py CHANGED Viewed

@@ -242,10 +242,10 @@ class HuggingFacePipelineHandler(
             task, spcs_only=(not type_utils.LazyType("transformers.Pipeline").isinstance(model))
         )
         if framework is None or framework == "pt":
-            # Since we set default cuda version to be 11.7, to make sure it works with GPU, we need to have a default
-            # Pytorch version that works with CUDA 11.7 as well. This is required for huggingface pipelines only as
+            # Since we set default cuda version to be 11.8, to make sure it works with GPU, we need to have a default
+            # Pytorch version that works with CUDA 11.8 as well. This is required for huggingface pipelines only as
             # users are not required to install pytorch locally if they are using the wrapper.
-            pkgs_requirements.append(model_env.ModelDependency(requirement="pytorch==2.0.1", pip_name="torch"))
+            pkgs_requirements.append(model_env.ModelDependency(requirement="pytorch", pip_name="torch"))
         elif framework == "tf":
             pkgs_requirements.append(model_env.ModelDependency(requirement="tensorflow", pip_name="tensorflow"))
         model_meta.env.include_if_absent(
@@ -369,7 +369,9 @@ class HuggingFacePipelineHandler(
                     else:
                         # For others, we could offer the whole dataframe as a list.
                         # Some of them may need some conversion
-                        if isinstance(raw_model, transformers.ConversationalPipeline):
+                        if hasattr(transformers, "ConversationalPipeline") and isinstance(
+                            raw_model, transformers.ConversationalPipeline
+                        ):
                             input_data = [
                                 transformers.Conversation(
                                     text=conv_data["user_inputs"][0],
@@ -391,27 +393,33 @@ class HuggingFacePipelineHandler(
                     # Making it not aligned with the auto-inferred signature.
                     # If the output is a dict, we could blindly create a list containing that.
                     # Otherwise, creating pandas DataFrame won't succeed.
-                    if isinstance(temp_res, (dict, transformers.Conversation)) or (
-                        # For some pipeline that is expected to generate a list of dict per input
-                        # When it omit outer list, it becomes list of dict instead of list of list of dict.
-                        # We need to distinguish them from those pipelines that designed to output a dict per input
-                        # So we need to check the pipeline type.
-                        isinstance(
-                            raw_model,
-                            (
-                                transformers.FillMaskPipeline,
-                                transformers.QuestionAnsweringPipeline,
-                            ),
+                    if (
+                        (hasattr(transformers, "Conversation") and isinstance(temp_res, transformers.Conversation))
+                        or isinstance(temp_res, dict)
+                        or (
+                            # For some pipeline that is expected to generate a list of dict per input
+                            # When it omit outer list, it becomes list of dict instead of list of list of dict.
+                            # We need to distinguish them from those pipelines that designed to output a dict per input
+                            # So we need to check the pipeline type.
+                            isinstance(
+                                raw_model,
+                                (
+                                    transformers.FillMaskPipeline,
+                                    transformers.QuestionAnsweringPipeline,
+                                ),
+                            )
+                            and X.shape[0] == 1
+                            and isinstance(temp_res[0], dict)
                         )
-                        and X.shape[0] == 1
-                        and isinstance(temp_res[0], dict)
                     ):
                         temp_res = [temp_res]
                     if len(temp_res) == 0:
                         return pd.DataFrame()
-                    if isinstance(raw_model, transformers.ConversationalPipeline):
+                    if hasattr(transformers, "ConversationalPipeline") and isinstance(
+                        raw_model, transformers.ConversationalPipeline
+                    ):
                         temp_res = [[conv.generated_responses] for conv in temp_res]
                     # To concat those who outputs a list with one input.

snowflake/ml/model/_packager/model_handlers/lightgbm.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
+import warnings
 from typing import (
     TYPE_CHECKING,
     Any,
@@ -19,7 +20,11 @@ from typing_extensions import TypeGuard, Unpack
 from snowflake.ml._internal import type_utils
 from snowflake.ml.model import custom_model, model_signature, type_hints as model_types
 from snowflake.ml.model._packager.model_env import model_env
-from snowflake.ml.model._packager.model_handlers import _base, _utils as handlers_utils
+from snowflake.ml.model._packager.model_handlers import (
+    _base,
+    _utils as handlers_utils,
+    model_objective_utils,
+)
 from snowflake.ml.model._packager.model_handlers_migrator import base_migrator
 from snowflake.ml.model._packager.model_meta import (
     model_blob_meta,
@@ -43,47 +48,7 @@ class LGBMModelHandler(_base.BaseModelHandler[Union["lightgbm.Booster", "lightgb
     MODEL_BLOB_FILE_OR_DIR = "model.pkl"
     DEFAULT_TARGET_METHODS = ["predict", "predict_proba"]
-    _BINARY_CLASSIFICATION_OBJECTIVES = ["binary"]
-    _MULTI_CLASSIFICATION_OBJECTIVES = ["multiclass", "multiclassova"]
-    _RANKING_OBJECTIVES = ["lambdarank", "rank_xendcg"]
-    _REGRESSION_OBJECTIVES = [
-        "regression",
-        "regression_l1",
-        "huber",
-        "fair",
-        "poisson",
-        "quantile",
-        "tweedie",
-        "mape",
-        "gamma",
-    ]
-    @classmethod
-    def get_model_objective(
-        cls, model: Union["lightgbm.Booster", "lightgbm.LGBMModel"]
-    ) -> model_meta_schema.ModelObjective:
-        import lightgbm
-        # does not account for cross-entropy and custom
-        if isinstance(model, lightgbm.LGBMClassifier):
-            num_classes = handlers_utils.get_num_classes_if_exists(model)
-            if num_classes == 2:
-                return model_meta_schema.ModelObjective.BINARY_CLASSIFICATION
-            return model_meta_schema.ModelObjective.MULTI_CLASSIFICATION
-        if isinstance(model, lightgbm.LGBMRanker):
-            return model_meta_schema.ModelObjective.RANKING
-        if isinstance(model, lightgbm.LGBMRegressor):
-            return model_meta_schema.ModelObjective.REGRESSION
-        model_objective = model.params["objective"]
-        if model_objective in cls._BINARY_CLASSIFICATION_OBJECTIVES:
-            return model_meta_schema.ModelObjective.BINARY_CLASSIFICATION
-        if model_objective in cls._MULTI_CLASSIFICATION_OBJECTIVES:
-            return model_meta_schema.ModelObjective.MULTI_CLASSIFICATION
-        if model_objective in cls._RANKING_OBJECTIVES:
-            return model_meta_schema.ModelObjective.RANKING
-        if model_objective in cls._REGRESSION_OBJECTIVES:
-            return model_meta_schema.ModelObjective.REGRESSION
-        return model_meta_schema.ModelObjective.UNKNOWN
+    EXPLAIN_TARGET_METHODS = ["predict", "predict_proba"]
     @classmethod
     def can_handle(
@@ -118,6 +83,8 @@ class LGBMModelHandler(_base.BaseModelHandler[Union["lightgbm.Booster", "lightgb
         is_sub_model: Optional[bool] = False,
         **kwargs: Unpack[model_types.LGBMModelSaveOptions],
     ) -> None:
+        enable_explainability = kwargs.get("enable_explainability", True)
         import lightgbm
         assert isinstance(model, lightgbm.Booster) or isinstance(model, lightgbm.LGBMModel)
@@ -146,25 +113,34 @@ class LGBMModelHandler(_base.BaseModelHandler[Union["lightgbm.Booster", "lightgb
                 sample_input_data=sample_input_data,
                 get_prediction_fn=get_prediction,
             )
-            model_objective = cls.get_model_objective(model)
-            model_meta.model_objective = model_objective
-            if kwargs.get("enable_explainability", True):
-                output_type = model_signature.DataType.DOUBLE
-                if model_objective in [
-                    model_meta_schema.ModelObjective.BINARY_CLASSIFICATION,
-                    model_meta_schema.ModelObjective.MULTI_CLASSIFICATION,
-                ]:
-                    output_type = model_signature.DataType.STRING
+            model_task_and_output = model_objective_utils.get_model_task_and_output_type(model)
+            model_meta.task = handlers_utils.validate_model_task(model_meta.task, model_task_and_output.task)
+            if enable_explainability:
+                explain_target_method = handlers_utils.get_explain_target_method(model_meta, cls.EXPLAIN_TARGET_METHODS)
                 model_meta = handlers_utils.add_explain_method_signature(
                     model_meta=model_meta,
                     explain_method="explain",
-                    target_method="predict",
-                    output_return_type=output_type,
+                    target_method=explain_target_method,
+                    output_return_type=model_task_and_output.output_type,
                 )
                 model_meta.function_properties = {
                     "explain": {model_meta_schema.FunctionProperties.PARTITIONED.value: False}
                 }
+                background_data = handlers_utils.get_explainability_supported_background(
+                    sample_input_data, model_meta, explain_target_method
+                )
+                if background_data is not None:
+                    handlers_utils.save_background_data(
+                        model_blobs_dir_path, cls.EXPLAIN_ARTIFACTS_DIR, cls.BG_DATA_FILE_SUFFIX, name, background_data
+                    )
+                else:
+                    warnings.warn(
+                        "sample_input_data should be provided for better explainability results",
+                        category=UserWarning,
+                        stacklevel=1,
+                    )
         model_blob_path = os.path.join(model_blobs_dir_path, name)
         os.makedirs(model_blob_path, exist_ok=True)
@@ -189,11 +165,8 @@ class LGBMModelHandler(_base.BaseModelHandler[Union["lightgbm.Booster", "lightgb
             ],
             check_local_version=True,
         )
-        if kwargs.get("enable_explainability", True):
-            model_meta.env.include_if_absent(
-                [model_env.ModelDependency(requirement="shap", pip_name="shap")],
-                check_local_version=True,
-            )
+        if enable_explainability:
+            model_meta.env.include_if_absent([model_env.ModelDependency(requirement="shap", pip_name="shap")])
             model_meta.explain_algorithm = model_meta_schema.ModelExplainAlgorithm.SHAP
         return None

snowflake/ml/model/_packager/model_handlers/mlflow.py CHANGED Viewed

@@ -168,11 +168,6 @@ class MLFlowHandler(_base.BaseModelHandler["mlflow.pyfunc.PyFuncModel"]):
     ) -> "mlflow.pyfunc.PyFuncModel":
         import mlflow
-        if snowpark_utils.is_in_stored_procedure():  # type: ignore[no-untyped-call]
-            # We need to redirect the mlruns folder to a writable location in the sandbox.
-            tmpdir = tempfile.TemporaryDirectory(dir="/tmp")
-            mlflow.set_tracking_uri(f"file://{tmpdir}")
         model_blob_path = os.path.join(model_blobs_dir_path, name)
         model_blobs_metadata = model_meta.models
         model_blob_metadata = model_blobs_metadata[name]
@@ -183,6 +178,9 @@ class MLFlowHandler(_base.BaseModelHandler["mlflow.pyfunc.PyFuncModel"]):
         model_artifact_path = model_blob_options["artifact_path"]
         model_blob_filename = model_blob_metadata.path
+        if snowpark_utils.is_in_stored_procedure():  # type: ignore[no-untyped-call]
+            return mlflow.pyfunc.load_model(os.path.join(model_blob_path, model_blob_filename, model_artifact_path))
         # This is to make sure the loaded model can be saved again.
         with mlflow.start_run() as run:
             mlflow.log_artifacts(

snowflake/ml/model/_packager/model_handlers/model_objective_utils.py ADDED Viewed

@@ -0,0 +1,169 @@
+import json
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Any, Union
+from snowflake.ml._internal import type_utils
+from snowflake.ml.model import model_signature, type_hints
+from snowflake.ml.model._packager.model_handlers import _utils as handlers_utils
+if TYPE_CHECKING:
+    import catboost
+    import lightgbm
+    import sklearn
+    import sklearn.pipeline
+    import xgboost
+@dataclass
+class ModelTaskAndOutputType:
+    task: type_hints.Task
+    output_type: model_signature.DataType
+def get_task_skl(model: Union["sklearn.base.BaseEstimator", "sklearn.pipeline.Pipeline"]) -> type_hints.Task:
+    from sklearn.base import is_classifier, is_regressor
+    if type_utils.LazyType("sklearn.pipeline.Pipeline").isinstance(model):
+        return type_hints.Task.UNKNOWN
+    if is_regressor(model):
+        return type_hints.Task.TABULAR_REGRESSION
+    if is_classifier(model):
+        classes_list = getattr(model, "classes_", [])
+        num_classes = getattr(model, "n_classes_", None) or len(classes_list)
+        if isinstance(num_classes, int):
+            if num_classes > 2:
+                return type_hints.Task.TABULAR_MULTI_CLASSIFICATION
+            return type_hints.Task.TABULAR_BINARY_CLASSIFICATION
+        return type_hints.Task.UNKNOWN
+    return type_hints.Task.UNKNOWN
+def get_model_task_catboost(model: "catboost.CatBoost") -> type_hints.Task:
+    loss_function = None
+    if type_utils.LazyType("catboost.CatBoost").isinstance(model):
+        loss_function = model.get_all_params()["loss_function"]  # type: ignore[attr-defined]
+    if (type_utils.LazyType("catboost.CatBoostClassifier").isinstance(model)) or model._is_classification_objective(
+        loss_function
+    ):
+        num_classes = handlers_utils.get_num_classes_if_exists(model)
+        if num_classes == 0:
+            return type_hints.Task.UNKNOWN
+        if num_classes <= 2:
+            return type_hints.Task.TABULAR_BINARY_CLASSIFICATION
+        return type_hints.Task.TABULAR_MULTI_CLASSIFICATION
+    if (type_utils.LazyType("catboost.CatBoostRanker").isinstance(model)) or model._is_ranking_objective(loss_function):
+        return type_hints.Task.TABULAR_RANKING
+    if (type_utils.LazyType("catboost.CatBoostRegressor").isinstance(model)) or model._is_regression_objective(
+        loss_function
+    ):
+        return type_hints.Task.TABULAR_REGRESSION
+    return type_hints.Task.UNKNOWN
+def get_model_task_lightgbm(model: Union["lightgbm.Booster", "lightgbm.LGBMModel"]) -> type_hints.Task:
+    _BINARY_CLASSIFICATION_OBJECTIVES = ["binary"]
+    _MULTI_CLASSIFICATION_OBJECTIVES = ["multiclass", "multiclassova"]
+    _RANKING_OBJECTIVES = ["lambdarank", "rank_xendcg"]
+    _REGRESSION_OBJECTIVES = [
+        "regression",
+        "regression_l1",
+        "huber",
+        "fair",
+        "poisson",
+        "quantile",
+        "tweedie",
+        "mape",
+        "gamma",
+    ]
+    # does not account for cross-entropy and custom
+    model_task = ""
+    if type_utils.LazyType("lightgbm.Booster").isinstance(model):
+        model_task = model.params["objective"]  # type: ignore[attr-defined]
+    elif hasattr(model, "objective_"):
+        model_task = model.objective_
+    if model_task in _BINARY_CLASSIFICATION_OBJECTIVES:
+        return type_hints.Task.TABULAR_BINARY_CLASSIFICATION
+    if model_task in _MULTI_CLASSIFICATION_OBJECTIVES:
+        return type_hints.Task.TABULAR_MULTI_CLASSIFICATION
+    if model_task in _RANKING_OBJECTIVES:
+        return type_hints.Task.TABULAR_RANKING
+    if model_task in _REGRESSION_OBJECTIVES:
+        return type_hints.Task.TABULAR_REGRESSION
+    return type_hints.Task.UNKNOWN
+def get_model_task_xgb(model: Union["xgboost.Booster", "xgboost.XGBModel"]) -> type_hints.Task:
+    _BINARY_CLASSIFICATION_OBJECTIVE_PREFIX = ["binary:"]
+    _MULTI_CLASSIFICATION_OBJECTIVE_PREFIX = ["multi:"]
+    _RANKING_OBJECTIVE_PREFIX = ["rank:"]
+    _REGRESSION_OBJECTIVE_PREFIX = ["reg:"]
+    model_task = ""
+    if type_utils.LazyType("xgboost.Booster").isinstance(model):
+        model_params = json.loads(model.save_config())  # type: ignore[attr-defined]
+        model_task = model_params.get("learner", {}).get("objective", "")
+    else:
+        if hasattr(model, "get_params"):
+            model_task = model.get_params().get("objective", "")
+    if isinstance(model_task, dict):
+        model_task = model_task.get("name", "")
+    for classification_objective in _BINARY_CLASSIFICATION_OBJECTIVE_PREFIX:
+        if classification_objective in model_task:
+            return type_hints.Task.TABULAR_BINARY_CLASSIFICATION
+    for classification_objective in _MULTI_CLASSIFICATION_OBJECTIVE_PREFIX:
+        if classification_objective in model_task:
+            return type_hints.Task.TABULAR_MULTI_CLASSIFICATION
+    for ranking_objective in _RANKING_OBJECTIVE_PREFIX:
+        if ranking_objective in model_task:
+            return type_hints.Task.TABULAR_RANKING
+    for regression_objective in _REGRESSION_OBJECTIVE_PREFIX:
+        if regression_objective in model_task:
+            return type_hints.Task.TABULAR_REGRESSION
+    return type_hints.Task.UNKNOWN
+def get_model_task_and_output_type(model: Any) -> ModelTaskAndOutputType:
+    if type_utils.LazyType("xgboost.Booster").isinstance(model) or type_utils.LazyType("xgboost.XGBModel").isinstance(
+        model
+    ):
+        task = get_model_task_xgb(model)
+        output_type = model_signature.DataType.DOUBLE
+        if task == type_hints.Task.TABULAR_MULTI_CLASSIFICATION:
+            output_type = model_signature.DataType.STRING
+        return ModelTaskAndOutputType(task=task, output_type=output_type)
+    if type_utils.LazyType("lightgbm.Booster").isinstance(model) or type_utils.LazyType(
+        "lightgbm.LGBMModel"
+    ).isinstance(model):
+        task = get_model_task_lightgbm(model)
+        output_type = model_signature.DataType.DOUBLE
+        if task in [
+            type_hints.Task.TABULAR_BINARY_CLASSIFICATION,
+            type_hints.Task.TABULAR_MULTI_CLASSIFICATION,
+        ]:
+            output_type = model_signature.DataType.STRING
+        return ModelTaskAndOutputType(task=task, output_type=output_type)
+    if type_utils.LazyType("catboost.CatBoost").isinstance(model):
+        task = get_model_task_catboost(model)
+        output_type = model_signature.DataType.DOUBLE
+        if task == type_hints.Task.TABULAR_MULTI_CLASSIFICATION:
+            output_type = model_signature.DataType.STRING
+        return ModelTaskAndOutputType(task=task, output_type=output_type)
+    if type_utils.LazyType("sklearn.base.BaseEstimator").isinstance(model) or type_utils.LazyType(
+        "sklearn.pipeline.Pipeline"
+    ).isinstance(model):
+        task = get_task_skl(model)
+        output_type = model_signature.DataType.DOUBLE
+        if task == type_hints.Task.TABULAR_MULTI_CLASSIFICATION:
+            output_type = model_signature.DataType.STRING
+        return ModelTaskAndOutputType(task=task, output_type=output_type)
+    raise ValueError(f"Model type {type(model)} is not supported")

snowflake/ml/model/_packager/model_handlers/sentence_transformers.py CHANGED Viewed

@@ -2,7 +2,6 @@ import logging
 import os
 from typing import TYPE_CHECKING, Callable, Dict, Optional, Type, cast, final
-import cloudpickle
 import pandas as pd
 from typing_extensions import TypeGuard, Unpack
@@ -120,9 +119,21 @@ class SentenceTransformerHandler(_base.BaseModelHandler["sentence_transformers.S
         model_meta.env.include_if_absent(
             [
                 model_env.ModelDependency(requirement="sentence-transformers", pip_name="sentence-transformers"),
+                model_env.ModelDependency(requirement="transformers", pip_name="transformers"),
+                model_env.ModelDependency(requirement="pytorch", pip_name="torch"),
             ],
             check_local_version=True,
         )
+        model_meta.env.cuda_version = kwargs.get("cuda_version", model_env.DEFAULT_CUDA_VERSION)
+    @staticmethod
+    def _get_device_config(**kwargs: Unpack[model_types.SentenceTransformersLoadOptions]) -> Optional[str]:
+        if kwargs.get("device", None) is not None:
+            return kwargs["device"]
+        elif kwargs.get("use_gpu", False):
+            return "cuda"
+        return None
     @classmethod
     def load_model(
@@ -144,13 +155,9 @@ class SentenceTransformerHandler(_base.BaseModelHandler["sentence_transformers.S
         model_blob_filename = model_blob_metadata.path
         model_blob_file_or_dir_path = os.path.join(model_blob_path, model_blob_filename)
-        if os.path.isdir(model_blob_file_or_dir_path):  # if the saved model is a directory
-            model = sentence_transformers.SentenceTransformer(model_blob_file_or_dir_path)
-        else:
-            assert os.path.isfile(model_blob_file_or_dir_path)  # if the saved model is a file
-            with open(model_blob_file_or_dir_path, "rb") as f:
-                model = cloudpickle.load(f)
-            assert isinstance(model, sentence_transformers.SentenceTransformer)
+        model = sentence_transformers.SentenceTransformer(
+            model_blob_file_or_dir_path, device=cls._get_device_config(**kwargs)
+        )
         return model
     @classmethod

snowflake-ml-python 1.6.1__py3-none-any.whl → 1.6.3__py3-none-any.whl

snowflake-ml-python 1.6.1py3-none-any.whl → 1.6.3py3-none-any.whl