PyPI - snowflake-ml-python - Versions diffs - 1.6.1__py3-none-any.whl → 1.6.2__py3-none-any.whl - Mend

snowflake-ml-python 1.6.1py3-none-any.whl → 1.6.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (212) hide show

snowflake/ml/_internal/telemetry.py +142 -20
snowflake/ml/_internal/utils/identifier.py +48 -11
snowflake/ml/_internal/utils/snowflake_env.py +23 -13
snowflake/ml/_internal/utils/sql_identifier.py +1 -1
snowflake/ml/_internal/utils/table_manager.py +19 -1
snowflake/ml/_internal/utils/uri.py +2 -2
snowflake/ml/data/data_connector.py +33 -7
snowflake/ml/data/torch_utils.py +68 -0
snowflake/ml/dataset/dataset.py +1 -3
snowflake/ml/feature_store/feature_store.py +41 -17
snowflake/ml/feature_store/feature_view.py +2 -2
snowflake/ml/fileset/embedded_stage_fs.py +1 -1
snowflake/ml/fileset/fileset.py +1 -1
snowflake/ml/fileset/sfcfs.py +9 -3
snowflake/ml/model/_client/model/model_version_impl.py +22 -7
snowflake/ml/model/_client/ops/model_ops.py +39 -3
snowflake/ml/model/_client/ops/service_ops.py +198 -7
snowflake/ml/model/_client/service/model_deployment_spec.py +4 -5
snowflake/ml/model/_client/service/model_deployment_spec_schema.py +1 -2
snowflake/ml/model/_client/sql/service.py +85 -18
snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +1 -1
snowflake/ml/model/_deploy_client/snowservice/deploy.py +3 -3
snowflake/ml/model/_model_composer/model_composer.py +2 -0
snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +3 -8
snowflake/ml/model/_packager/model_handlers/_utils.py +46 -14
snowflake/ml/model/_packager/model_handlers/catboost.py +17 -15
snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +23 -15
snowflake/ml/model/_packager/model_handlers/lightgbm.py +15 -57
snowflake/ml/model/_packager/model_handlers/llm.py +4 -2
snowflake/ml/model/_packager/model_handlers/model_objective_utils.py +116 -0
snowflake/ml/model/_packager/model_handlers/sklearn.py +36 -24
snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +119 -6
snowflake/ml/model/_packager/model_handlers/torchscript.py +2 -2
snowflake/ml/model/_packager/model_handlers/xgboost.py +48 -48
snowflake/ml/model/_packager/model_meta/model_meta.py +10 -7
snowflake/ml/model/_packager/model_meta/model_meta_schema.py +0 -8
snowflake/ml/model/_packager/model_packager.py +2 -0
snowflake/ml/model/_signatures/pytorch_handler.py +1 -1
snowflake/ml/model/_signatures/utils.py +9 -0
snowflake/ml/model/models/llm.py +3 -1
snowflake/ml/model/type_hints.py +9 -1
snowflake/ml/modeling/_internal/constants.py +1 -0
snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +5 -5
snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +9 -6
snowflake/ml/modeling/_internal/model_specifications.py +2 -0
snowflake/ml/modeling/_internal/model_trainer.py +1 -0
snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +2 -2
snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +5 -5
snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +113 -160
snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +60 -21
snowflake/ml/modeling/cluster/affinity_propagation.py +60 -21
snowflake/ml/modeling/cluster/agglomerative_clustering.py +60 -21
snowflake/ml/modeling/cluster/birch.py +60 -21
snowflake/ml/modeling/cluster/bisecting_k_means.py +60 -21
snowflake/ml/modeling/cluster/dbscan.py +60 -21
snowflake/ml/modeling/cluster/feature_agglomeration.py +60 -21
snowflake/ml/modeling/cluster/k_means.py +60 -21
snowflake/ml/modeling/cluster/mean_shift.py +60 -21
snowflake/ml/modeling/cluster/mini_batch_k_means.py +60 -21
snowflake/ml/modeling/cluster/optics.py +60 -21
snowflake/ml/modeling/cluster/spectral_biclustering.py +60 -21
snowflake/ml/modeling/cluster/spectral_clustering.py +60 -21
snowflake/ml/modeling/cluster/spectral_coclustering.py +60 -21
snowflake/ml/modeling/compose/column_transformer.py +60 -21
snowflake/ml/modeling/compose/transformed_target_regressor.py +60 -21
snowflake/ml/modeling/covariance/elliptic_envelope.py +60 -21
snowflake/ml/modeling/covariance/empirical_covariance.py +60 -21
snowflake/ml/modeling/covariance/graphical_lasso.py +60 -21
snowflake/ml/modeling/covariance/graphical_lasso_cv.py +60 -21
snowflake/ml/modeling/covariance/ledoit_wolf.py +60 -21
snowflake/ml/modeling/covariance/min_cov_det.py +60 -21
snowflake/ml/modeling/covariance/oas.py +60 -21
snowflake/ml/modeling/covariance/shrunk_covariance.py +60 -21
snowflake/ml/modeling/decomposition/dictionary_learning.py +60 -21
snowflake/ml/modeling/decomposition/factor_analysis.py +60 -21
snowflake/ml/modeling/decomposition/fast_ica.py +60 -21
snowflake/ml/modeling/decomposition/incremental_pca.py +60 -21
snowflake/ml/modeling/decomposition/kernel_pca.py +60 -21
snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +60 -21
snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +60 -21
snowflake/ml/modeling/decomposition/pca.py +60 -21
snowflake/ml/modeling/decomposition/sparse_pca.py +60 -21
snowflake/ml/modeling/decomposition/truncated_svd.py +60 -21
snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +60 -21
snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +60 -21
snowflake/ml/modeling/ensemble/ada_boost_classifier.py +60 -21
snowflake/ml/modeling/ensemble/ada_boost_regressor.py +60 -21
snowflake/ml/modeling/ensemble/bagging_classifier.py +60 -21
snowflake/ml/modeling/ensemble/bagging_regressor.py +60 -21
snowflake/ml/modeling/ensemble/extra_trees_classifier.py +60 -21
snowflake/ml/modeling/ensemble/extra_trees_regressor.py +60 -21
snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +60 -21
snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +60 -21
snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +60 -21
snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +60 -21
snowflake/ml/modeling/ensemble/isolation_forest.py +60 -21
snowflake/ml/modeling/ensemble/random_forest_classifier.py +60 -21
snowflake/ml/modeling/ensemble/random_forest_regressor.py +60 -21
snowflake/ml/modeling/ensemble/stacking_regressor.py +60 -21
snowflake/ml/modeling/ensemble/voting_classifier.py +60 -21
snowflake/ml/modeling/ensemble/voting_regressor.py +60 -21
snowflake/ml/modeling/feature_selection/generic_univariate_select.py +60 -21
snowflake/ml/modeling/feature_selection/select_fdr.py +60 -21
snowflake/ml/modeling/feature_selection/select_fpr.py +60 -21
snowflake/ml/modeling/feature_selection/select_fwe.py +60 -21
snowflake/ml/modeling/feature_selection/select_k_best.py +60 -21
snowflake/ml/modeling/feature_selection/select_percentile.py +60 -21
snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +60 -21
snowflake/ml/modeling/feature_selection/variance_threshold.py +60 -21
snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +60 -21
snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +60 -21
snowflake/ml/modeling/impute/iterative_imputer.py +60 -21
snowflake/ml/modeling/impute/knn_imputer.py +60 -21
snowflake/ml/modeling/impute/missing_indicator.py +60 -21
snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +60 -21
snowflake/ml/modeling/kernel_approximation/nystroem.py +60 -21
snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +60 -21
snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +60 -21
snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +60 -21
snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +60 -21
snowflake/ml/modeling/lightgbm/lgbm_classifier.py +60 -21
snowflake/ml/modeling/lightgbm/lgbm_regressor.py +60 -21
snowflake/ml/modeling/linear_model/ard_regression.py +60 -21
snowflake/ml/modeling/linear_model/bayesian_ridge.py +60 -21
snowflake/ml/modeling/linear_model/elastic_net.py +60 -21
snowflake/ml/modeling/linear_model/elastic_net_cv.py +60 -21
snowflake/ml/modeling/linear_model/gamma_regressor.py +60 -21
snowflake/ml/modeling/linear_model/huber_regressor.py +60 -21
snowflake/ml/modeling/linear_model/lars.py +60 -21
snowflake/ml/modeling/linear_model/lars_cv.py +60 -21
snowflake/ml/modeling/linear_model/lasso.py +60 -21
snowflake/ml/modeling/linear_model/lasso_cv.py +60 -21
snowflake/ml/modeling/linear_model/lasso_lars.py +60 -21
snowflake/ml/modeling/linear_model/lasso_lars_cv.py +60 -21
snowflake/ml/modeling/linear_model/lasso_lars_ic.py +60 -21
snowflake/ml/modeling/linear_model/linear_regression.py +60 -21
snowflake/ml/modeling/linear_model/logistic_regression.py +60 -21
snowflake/ml/modeling/linear_model/logistic_regression_cv.py +60 -21
snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +60 -21
snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +60 -21
snowflake/ml/modeling/linear_model/multi_task_lasso.py +60 -21
snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +60 -21
snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +60 -21
snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +60 -21
snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +60 -21
snowflake/ml/modeling/linear_model/perceptron.py +60 -21
snowflake/ml/modeling/linear_model/poisson_regressor.py +60 -21
snowflake/ml/modeling/linear_model/ransac_regressor.py +60 -21
snowflake/ml/modeling/linear_model/ridge.py +60 -21
snowflake/ml/modeling/linear_model/ridge_classifier.py +60 -21
snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +60 -21
snowflake/ml/modeling/linear_model/ridge_cv.py +60 -21
snowflake/ml/modeling/linear_model/sgd_classifier.py +60 -21
snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +60 -21
snowflake/ml/modeling/linear_model/sgd_regressor.py +60 -21
snowflake/ml/modeling/linear_model/theil_sen_regressor.py +60 -21
snowflake/ml/modeling/linear_model/tweedie_regressor.py +60 -21
snowflake/ml/modeling/manifold/isomap.py +60 -21
snowflake/ml/modeling/manifold/mds.py +60 -21
snowflake/ml/modeling/manifold/spectral_embedding.py +60 -21
snowflake/ml/modeling/manifold/tsne.py +60 -21
snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +60 -21
snowflake/ml/modeling/mixture/gaussian_mixture.py +60 -21
snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +60 -21
snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +60 -21
snowflake/ml/modeling/multiclass/output_code_classifier.py +60 -21
snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +60 -21
snowflake/ml/modeling/naive_bayes/categorical_nb.py +60 -21
snowflake/ml/modeling/naive_bayes/complement_nb.py +60 -21
snowflake/ml/modeling/naive_bayes/gaussian_nb.py +60 -21
snowflake/ml/modeling/naive_bayes/multinomial_nb.py +60 -21
snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +60 -21
snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +60 -21
snowflake/ml/modeling/neighbors/kernel_density.py +60 -21
snowflake/ml/modeling/neighbors/local_outlier_factor.py +60 -21
snowflake/ml/modeling/neighbors/nearest_centroid.py +60 -21
snowflake/ml/modeling/neighbors/nearest_neighbors.py +60 -21
snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +60 -21
snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +60 -21
snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +60 -21
snowflake/ml/modeling/neural_network/bernoulli_rbm.py +60 -21
snowflake/ml/modeling/neural_network/mlp_classifier.py +60 -21
snowflake/ml/modeling/neural_network/mlp_regressor.py +60 -21
snowflake/ml/modeling/parameters/disable_model_tracer.py +5 -0
snowflake/ml/modeling/pipeline/pipeline.py +1 -12
snowflake/ml/modeling/preprocessing/polynomial_features.py +60 -21
snowflake/ml/modeling/semi_supervised/label_propagation.py +60 -21
snowflake/ml/modeling/semi_supervised/label_spreading.py +60 -21
snowflake/ml/modeling/svm/linear_svc.py +60 -21
snowflake/ml/modeling/svm/linear_svr.py +60 -21
snowflake/ml/modeling/svm/nu_svc.py +60 -21
snowflake/ml/modeling/svm/nu_svr.py +60 -21
snowflake/ml/modeling/svm/svc.py +60 -21
snowflake/ml/modeling/svm/svr.py +60 -21
snowflake/ml/modeling/tree/decision_tree_classifier.py +60 -21
snowflake/ml/modeling/tree/decision_tree_regressor.py +60 -21
snowflake/ml/modeling/tree/extra_tree_classifier.py +60 -21
snowflake/ml/modeling/tree/extra_tree_regressor.py +60 -21
snowflake/ml/modeling/xgboost/xgb_classifier.py +63 -23
snowflake/ml/modeling/xgboost/xgb_regressor.py +63 -23
snowflake/ml/modeling/xgboost/xgbrf_classifier.py +63 -23
snowflake/ml/modeling/xgboost/xgbrf_regressor.py +63 -23
snowflake/ml/registry/_manager/model_manager.py +4 -0
snowflake/ml/registry/model_registry.py +1 -1
snowflake/ml/registry/registry.py +1 -2
snowflake/ml/version.py +1 -1
{snowflake_ml_python-1.6.1.dist-info → snowflake_ml_python-1.6.2.dist-info}/METADATA +23 -4
{snowflake_ml_python-1.6.1.dist-info → snowflake_ml_python-1.6.2.dist-info}/RECORD +211 -209
{snowflake_ml_python-1.6.1.dist-info → snowflake_ml_python-1.6.2.dist-info}/WHEEL +1 -1
snowflake/ml/data/torch_dataset.py +0 -33
{snowflake_ml_python-1.6.1.dist-info → snowflake_ml_python-1.6.2.dist-info}/LICENSE.txt +0 -0
{snowflake_ml_python-1.6.1.dist-info → snowflake_ml_python-1.6.2.dist-info}/top_level.txt +0 -0

snowflake/ml/model/_client/sql/service.py CHANGED Viewed

@@ -1,6 +1,9 @@
+import enum
+import json
 import textwrap
 from typing import Any, Dict, List, Optional, Tuple
+from snowflake import snowpark
 from snowflake.ml._internal.utils import (
     identifier,
     query_result_checker,
@@ -11,6 +14,17 @@ from snowflake.snowpark import dataframe, functions as F, types as spt
 from snowflake.snowpark._internal import utils as snowpark_utils
+class ServiceStatus(enum.Enum):
+    UNKNOWN = "UNKNOWN"  # status is unknown because we have not received enough data from K8s yet.
+    PENDING = "PENDING"  # resource set is being created, can't be used yet
+    READY = "READY"  # resource set has been deployed.
+    DELETING = "DELETING"  # resource set is being deleted
+    FAILED = "FAILED"  # resource set has failed and cannot be used anymore
+    DONE = "DONE"  # resource set has finished running
+    NOT_FOUND = "NOT_FOUND"  # not found or deleted
+    INTERNAL_ERROR = "INTERNAL_ERROR"  # there was an internal service error.
 class ServiceSQLClient(_base._BaseSQLClient):
     def build_model_container(
         self,
@@ -30,20 +44,21 @@ class ServiceSQLClient(_base._BaseSQLClient):
     ) -> None:
         actual_image_repo_database = image_repo_database_name or self._database_name
         actual_image_repo_schema = image_repo_schema_name or self._schema_name
-        fq_model_name = self.fully_qualified_object_name(database_name, schema_name, model_name)
-        fq_image_repo_name = "/" + "/".join(
-            [
-                actual_image_repo_database.identifier(),
-                actual_image_repo_schema.identifier(),
-                image_repo_name.identifier(),
-            ]
+        actual_model_database = database_name or self._database_name
+        actual_model_schema = schema_name or self._schema_name
+        fq_model_name = self.fully_qualified_object_name(actual_model_database, actual_model_schema, model_name)
+        fq_image_repo_name = identifier.get_schema_level_object_identifier(
+            actual_image_repo_database.identifier(),
+            actual_image_repo_schema.identifier(),
+            image_repo_name.identifier(),
         )
-        is_gpu = gpu is not None
+        is_gpu_str = "TRUE" if gpu else "FALSE"
+        force_rebuild_str = "TRUE" if force_rebuild else "FALSE"
         query_result_checker.SqlResultValidator(
             self._session,
             (
                 f"CALL SYSTEM$BUILD_MODEL_CONTAINER('{fq_model_name}', '{version_name}', '{compute_pool_name}',"
-                f" '{fq_image_repo_name}', '{is_gpu}', '{force_rebuild}', '', '{external_access_integration}')"
+                f" '{fq_image_repo_name}', '{is_gpu_str}', '{force_rebuild_str}', '', '{external_access_integration}')"
             ),
             statement_params=statement_params,
         ).has_dimensions(expected_rows=1, expected_cols=1).validate()
@@ -54,12 +69,12 @@ class ServiceSQLClient(_base._BaseSQLClient):
         stage_path: str,
         model_deployment_spec_file_rel_path: str,
         statement_params: Optional[Dict[str, Any]] = None,
-    ) -> None:
-        query_result_checker.SqlResultValidator(
-            self._session,
-            f"CALL SYSTEM$DEPLOY_MODEL('@{stage_path}/{model_deployment_spec_file_rel_path}')",
-            statement_params=statement_params,
-        ).has_dimensions(expected_rows=1, expected_cols=1).validate()
+    ) -> Tuple[str, snowpark.AsyncJob]:
+        async_job = self._session.sql(
+            f"CALL SYSTEM$DEPLOY_MODEL('@{stage_path}/{model_deployment_spec_file_rel_path}')"
+        ).collect(block=False, statement_params=statement_params)
+        assert isinstance(async_job, snowpark.AsyncJob)
+        return async_job.query_id, async_job
     def invoke_function_method(
         self,
@@ -74,12 +89,20 @@ class ServiceSQLClient(_base._BaseSQLClient):
         statement_params: Optional[Dict[str, Any]] = None,
     ) -> dataframe.DataFrame:
         with_statements = []
+        actual_database_name = database_name or self._database_name
+        actual_schema_name = schema_name or self._schema_name
+        function_name = identifier.concat_names([service_name.identifier(), "_", method_name.identifier()])
+        fully_qualified_function_name = identifier.get_schema_level_object_identifier(
+            actual_database_name.identifier(),
+            actual_schema_name.identifier(),
+            function_name,
+        )
         if len(input_df.queries["queries"]) == 1 and len(input_df.queries["post_actions"]) == 0:
             INTERMEDIATE_TABLE_NAME = "SNOWPARK_ML_MODEL_INFERENCE_INPUT"
             with_statements.append(f"{INTERMEDIATE_TABLE_NAME} AS ({input_df.queries['queries'][0]})")
         else:
-            actual_database_name = database_name or self._database_name
-            actual_schema_name = schema_name or self._schema_name
             tmp_table_name = snowpark_utils.random_name_for_temp_object(snowpark_utils.TempObjectType.TABLE)
             INTERMEDIATE_TABLE_NAME = identifier.get_schema_level_object_identifier(
                 actual_database_name.identifier(),
@@ -104,7 +127,7 @@ class ServiceSQLClient(_base._BaseSQLClient):
         sql = textwrap.dedent(
             f"""{with_sql}
                 SELECT *,
-                    {service_name.identifier()}_{method_name.identifier()}({args_sql}) AS {INTERMEDIATE_OBJ_NAME}
+                    {fully_qualified_function_name}({args_sql}) AS {INTERMEDIATE_OBJ_NAME}
                 FROM {INTERMEDIATE_TABLE_NAME}"""
         )
@@ -127,3 +150,47 @@ class ServiceSQLClient(_base._BaseSQLClient):
             output_df._statement_params = statement_params  # type: ignore[assignment]
         return output_df
+    def get_service_logs(
+        self,
+        *,
+        service_name: str,
+        instance_id: str = "0",
+        container_name: str,
+        statement_params: Optional[Dict[str, Any]] = None,
+    ) -> str:
+        system_func = "SYSTEM$GET_SERVICE_LOGS"
+        rows = (
+            query_result_checker.SqlResultValidator(
+                self._session,
+                f"CALL {system_func}('{service_name}', '{instance_id}', '{container_name}')",
+                statement_params=statement_params,
+            )
+            .has_dimensions(expected_rows=1, expected_cols=1)
+            .validate()
+        )
+        return str(rows[0][system_func])
+    def get_service_status(
+        self,
+        *,
+        service_name: str,
+        include_message: bool = False,
+        statement_params: Optional[Dict[str, Any]] = None,
+    ) -> Tuple[ServiceStatus, Optional[str]]:
+        system_func = "SYSTEM$GET_SERVICE_STATUS"
+        rows = (
+            query_result_checker.SqlResultValidator(
+                self._session,
+                f"CALL {system_func}('{service_name}')",
+                statement_params=statement_params,
+            )
+            .has_dimensions(expected_rows=1, expected_cols=1)
+            .validate()
+        )
+        metadata = json.loads(rows[0][system_func])[0]
+        if metadata and metadata["status"]:
+            service_status = ServiceStatus(metadata["status"])
+            message = metadata["message"] if include_message else None
+            return service_status, message
+        return ServiceStatus.UNKNOWN, None

snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py CHANGED Viewed

@@ -182,7 +182,7 @@ class ServerImageBuilder(base_image_builder.ImageBuilder):
         with file_utils.open_file(spec_file_path, "w+") as spec_file:
             assert self.artifact_stage_location.startswith("@")
             normed_artifact_stage_path = posixpath.normpath(identifier.remove_prefix(self.artifact_stage_location, "@"))
-            (db, schema, stage, path) = identifier.parse_schema_level_object_identifier(normed_artifact_stage_path)
+            (db, schema, stage, path) = identifier.parse_snowflake_stage_path(normed_artifact_stage_path)
             content = Template(spec_template).safe_substitute(
                 {
                     "base_image": base_image,

snowflake/ml/model/_deploy_client/snowservice/deploy.py CHANGED Viewed

@@ -280,7 +280,7 @@ def _get_or_create_image_repo(session: Session, *, service_func_name: str, image
         conn = session._conn._conn
         # We try to use the same db and schema as the service function locates, as we could retrieve those information
         # if that is a fully qualified one. If not we use the current session one.
-        (_db, _schema, _, _) = identifier.parse_schema_level_object_identifier(service_func_name)
+        (_db, _schema, _) = identifier.parse_schema_level_object_identifier(service_func_name)
         db = _db if _db is not None else conn._database
         schema = _schema if _schema is not None else conn._schema
         assert isinstance(db, str) and isinstance(schema, str)
@@ -343,7 +343,7 @@ class SnowServiceDeployment:
         self.model_zip_stage_path = model_zip_stage_path
         self.options = options
         self.target_method = target_method
-        (db, schema, _, _) = identifier.parse_schema_level_object_identifier(service_func_name)
+        (db, schema, _) = identifier.parse_schema_level_object_identifier(service_func_name)
         self._service_name = identifier.get_schema_level_object_identifier(db, schema, f"service_{model_id}")
         self._job_name = identifier.get_schema_level_object_identifier(db, schema, f"build_{model_id}")
@@ -503,7 +503,7 @@ class SnowServiceDeployment:
                 norm_stage_path = posixpath.normpath(identifier.remove_prefix(self.model_zip_stage_path, "@"))
                 # Ensure model stage path has root prefix as stage mount will it mount it to root.
                 absolute_model_stage_path = os.path.join("/", norm_stage_path)
-                (db, schema, stage, path) = identifier.parse_schema_level_object_identifier(norm_stage_path)
+                (db, schema, stage, path) = identifier.parse_snowflake_stage_path(norm_stage_path)
                 substitutes = {
                     "image": image,
                     "predict_endpoint_name": constants.PREDICT,

snowflake/ml/model/_model_composer/model_composer.py CHANGED Viewed

@@ -92,6 +92,7 @@ class ModelComposer:
         python_version: Optional[str] = None,
         ext_modules: Optional[List[ModuleType]] = None,
         code_paths: Optional[List[str]] = None,
+        model_objective: model_types.ModelObjective = model_types.ModelObjective.UNKNOWN,
         options: Optional[model_types.ModelSaveOption] = None,
     ) -> model_meta.ModelMetadata:
         if not options:
@@ -120,6 +121,7 @@ class ModelComposer:
             python_version=python_version,
             ext_modules=ext_modules,
             code_paths=code_paths,
+            model_objective=model_objective,
             options=options,
         )
         assert self.packager.meta is not None

snowflake/ml/model/_model_composer/model_manifest/model_manifest.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import collections
 import copy
 import pathlib
-import warnings
 from typing import List, Optional, cast
 import yaml
@@ -78,13 +77,9 @@ class ModelManifest:
             )
         dependencies = model_manifest_schema.ModelRuntimeDependenciesDict(conda=runtime_dict["dependencies"]["conda"])
-        if options.get("include_pip_dependencies"):
-            warnings.warn(
-                "`include_pip_dependencies` specified as True: pip dependencies will be included and may not"
-                "be warehouse-compabible. The model may need to be run in SPCS.",
-                category=UserWarning,
-                stacklevel=1,
-            )
+        # We only want to include pip dependencies file if there are any pip requirements.
+        if len(model_meta.env.pip_requirements) > 0:
             dependencies["pip"] = runtime_dict["dependencies"]["pip"]
         manifest_dict = model_manifest_schema.ModelManifestDict(

snowflake/ml/model/_packager/model_handlers/_utils.py CHANGED Viewed

@@ -1,9 +1,11 @@
 import json
+import warnings
 from typing import Any, Callable, Iterable, Optional, Sequence, cast
 import numpy as np
 import numpy.typing as npt
 import pandas as pd
+from absl import logging
 from snowflake.ml.model import model_signature, type_hints as model_types
 from snowflake.ml.model._packager.model_meta import model_meta
@@ -11,6 +13,17 @@ from snowflake.ml.model._signatures import snowpark_handler
 from snowflake.snowpark import DataFrame as SnowparkDataFrame
+class NumpyEncoder(json.JSONEncoder):
+    def default(self, obj: Any) -> Any:
+        if isinstance(obj, np.integer):
+            return int(obj)
+        if isinstance(obj, np.floating):
+            return float(obj)
+        if isinstance(obj, np.ndarray):
+            return obj.tolist()
+        return super().default(obj)
 def _is_callable(model: model_types.SupportedModelType, method_name: str) -> bool:
     return callable(getattr(model, method_name, None))
@@ -93,23 +106,42 @@ def convert_explanations_to_2D_df(
         return pd.DataFrame(explanations)
     if hasattr(model, "classes_"):
-        classes_list = [cl for cl in model.classes_]  # type:ignore[union-attr]
+        classes_list = [str(cl) for cl in model.classes_]  # type:ignore[union-attr]
         len_classes = len(classes_list)
         if explanations.shape[2] != len_classes:
             raise ValueError(f"Model has {len_classes} classes but explanations have {explanations.shape[2]}")
     else:
-        classes_list = [i for i in range(explanations.shape[2])]
-    exp_2d = []
-    # TODO (SNOW-1549044): Optimize this
-    for row in explanations:
-        col_list = []
-        for column in row:
-            class_explanations = {}
-            for cl, cl_exp in zip(classes_list, column):
-                if isinstance(cl, (int, np.integer)):
-                    cl = int(cl)
-                class_explanations[cl] = cl_exp
-            col_list.append(json.dumps(class_explanations))
-        exp_2d.append(col_list)
+        classes_list = [str(i) for i in range(explanations.shape[2])]
+    def row_to_dict(row: npt.NDArray[Any]) -> npt.NDArray[Any]:
+        """Converts a single row to a dictionary."""
+        # convert to object or numpy creates strings of fixed length
+        return np.asarray(json.dumps(dict(zip(classes_list, row)), cls=NumpyEncoder), dtype=object)
+    exp_2d = np.apply_along_axis(row_to_dict, -1, explanations)
     return pd.DataFrame(exp_2d)
+def validate_model_objective(
+    passed_model_objective: model_types.ModelObjective, inferred_model_objective: model_types.ModelObjective
+) -> model_types.ModelObjective:
+    if (
+        passed_model_objective != model_types.ModelObjective.UNKNOWN
+        and inferred_model_objective != model_types.ModelObjective.UNKNOWN
+    ):
+        if passed_model_objective != inferred_model_objective:
+            warnings.warn(
+                f"Inferred ModelObjective: {inferred_model_objective.name} is used as model objective for this model "
+                f"version and passed argument ModelObjective: {passed_model_objective.name} is ignored",
+                category=UserWarning,
+                stacklevel=1,
+            )
+        return inferred_model_objective
+    elif inferred_model_objective != model_types.ModelObjective.UNKNOWN:
+        logging.info(
+            f"Inferred ModelObjective: {inferred_model_objective.name} is used as model objective for this model "
+            f"version"
+        )
+        return inferred_model_objective
+    return passed_model_objective

snowflake/ml/model/_packager/model_handlers/catboost.py CHANGED Viewed

@@ -34,20 +34,20 @@ class CatBoostModelHandler(_base.BaseModelHandler["catboost.CatBoost"]):
     DEFAULT_TARGET_METHODS = ["predict", "predict_proba"]
     @classmethod
-    def get_model_objective(cls, model: "catboost.CatBoost") -> model_meta_schema.ModelObjective:
+    def get_model_objective_and_output_type(cls, model: "catboost.CatBoost") -> model_types.ModelObjective:
         import catboost
         if isinstance(model, catboost.CatBoostClassifier):
             num_classes = handlers_utils.get_num_classes_if_exists(model)
             if num_classes == 2:
-                return model_meta_schema.ModelObjective.BINARY_CLASSIFICATION
-            return model_meta_schema.ModelObjective.MULTI_CLASSIFICATION
+                return model_types.ModelObjective.BINARY_CLASSIFICATION
+            return model_types.ModelObjective.MULTI_CLASSIFICATION
         if isinstance(model, catboost.CatBoostRanker):
-            return model_meta_schema.ModelObjective.RANKING
+            return model_types.ModelObjective.RANKING
         if isinstance(model, catboost.CatBoostRegressor):
-            return model_meta_schema.ModelObjective.REGRESSION
+            return model_types.ModelObjective.REGRESSION
         # TODO: Find out model type from the generic Catboost Model
-        return model_meta_schema.ModelObjective.UNKNOWN
+        return model_types.ModelObjective.UNKNOWN
     @classmethod
     def can_handle(cls, model: model_types.SupportedModelType) -> TypeGuard["catboost.CatBoost"]:
@@ -77,6 +77,8 @@ class CatBoostModelHandler(_base.BaseModelHandler["catboost.CatBoost"]):
         is_sub_model: Optional[bool] = False,
         **kwargs: Unpack[model_types.CatBoostModelSaveOptions],
     ) -> None:
+        enable_explainability = kwargs.get("enable_explainability", True)
         import catboost
         assert isinstance(model, catboost.CatBoost)
@@ -105,11 +107,14 @@ class CatBoostModelHandler(_base.BaseModelHandler["catboost.CatBoost"]):
                 sample_input_data=sample_input_data,
                 get_prediction_fn=get_prediction,
             )
-            model_objective = cls.get_model_objective(model)
-            model_meta.model_objective = model_objective
-            if kwargs.get("enable_explainability", True):
+            inferred_model_objective = cls.get_model_objective_and_output_type(model)
+            model_meta.model_objective = handlers_utils.validate_model_objective(
+                model_meta.model_objective, inferred_model_objective
+            )
+            model_objective = model_meta.model_objective
+            if enable_explainability:
                 output_type = model_signature.DataType.DOUBLE
-                if model_objective == model_meta_schema.ModelObjective.MULTI_CLASSIFICATION:
+                if model_objective == model_types.ModelObjective.MULTI_CLASSIFICATION:
                     output_type = model_signature.DataType.STRING
                 model_meta = handlers_utils.add_explain_method_signature(
                     model_meta=model_meta,
@@ -143,11 +148,8 @@ class CatBoostModelHandler(_base.BaseModelHandler["catboost.CatBoost"]):
             ],
             check_local_version=True,
         )
-        if kwargs.get("enable_explainability", True):
-            model_meta.env.include_if_absent(
-                [model_env.ModelDependency(requirement="shap", pip_name="shap")],
-                check_local_version=True,
-            )
+        if enable_explainability:
+            model_meta.env.include_if_absent([model_env.ModelDependency(requirement="shap", pip_name="shap")])
             model_meta.explain_algorithm = model_meta_schema.ModelExplainAlgorithm.SHAP
         model_meta.env.cuda_version = kwargs.get("cuda_version", model_env.DEFAULT_CUDA_VERSION)

snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py CHANGED Viewed

@@ -369,7 +369,9 @@ class HuggingFacePipelineHandler(
                     else:
                         # For others, we could offer the whole dataframe as a list.
                         # Some of them may need some conversion
-                        if isinstance(raw_model, transformers.ConversationalPipeline):
+                        if hasattr(transformers, "ConversationalPipeline") and isinstance(
+                            raw_model, transformers.ConversationalPipeline
+                        ):
                             input_data = [
                                 transformers.Conversation(
                                     text=conv_data["user_inputs"][0],
@@ -391,27 +393,33 @@ class HuggingFacePipelineHandler(
                     # Making it not aligned with the auto-inferred signature.
                     # If the output is a dict, we could blindly create a list containing that.
                     # Otherwise, creating pandas DataFrame won't succeed.
-                    if isinstance(temp_res, (dict, transformers.Conversation)) or (
-                        # For some pipeline that is expected to generate a list of dict per input
-                        # When it omit outer list, it becomes list of dict instead of list of list of dict.
-                        # We need to distinguish them from those pipelines that designed to output a dict per input
-                        # So we need to check the pipeline type.
-                        isinstance(
-                            raw_model,
-                            (
-                                transformers.FillMaskPipeline,
-                                transformers.QuestionAnsweringPipeline,
-                            ),
+                    if (
+                        (hasattr(transformers, "Conversation") and isinstance(temp_res, transformers.Conversation))
+                        or isinstance(temp_res, dict)
+                        or (
+                            # For some pipeline that is expected to generate a list of dict per input
+                            # When it omit outer list, it becomes list of dict instead of list of list of dict.
+                            # We need to distinguish them from those pipelines that designed to output a dict per input
+                            # So we need to check the pipeline type.
+                            isinstance(
+                                raw_model,
+                                (
+                                    transformers.FillMaskPipeline,
+                                    transformers.QuestionAnsweringPipeline,
+                                ),
+                            )
+                            and X.shape[0] == 1
+                            and isinstance(temp_res[0], dict)
                         )
-                        and X.shape[0] == 1
-                        and isinstance(temp_res[0], dict)
                     ):
                         temp_res = [temp_res]
                     if len(temp_res) == 0:
                         return pd.DataFrame()
-                    if isinstance(raw_model, transformers.ConversationalPipeline):
+                    if hasattr(transformers, "ConversationalPipeline") and isinstance(
+                        raw_model, transformers.ConversationalPipeline
+                    ):
                         temp_res = [[conv.generated_responses] for conv in temp_res]
                     # To concat those who outputs a list with one input.

snowflake/ml/model/_packager/model_handlers/lightgbm.py CHANGED Viewed

@@ -19,7 +19,11 @@ from typing_extensions import TypeGuard, Unpack
 from snowflake.ml._internal import type_utils
 from snowflake.ml.model import custom_model, model_signature, type_hints as model_types
 from snowflake.ml.model._packager.model_env import model_env
-from snowflake.ml.model._packager.model_handlers import _base, _utils as handlers_utils
+from snowflake.ml.model._packager.model_handlers import (
+    _base,
+    _utils as handlers_utils,
+    model_objective_utils,
+)
 from snowflake.ml.model._packager.model_handlers_migrator import base_migrator
 from snowflake.ml.model._packager.model_meta import (
     model_blob_meta,
@@ -43,47 +47,6 @@ class LGBMModelHandler(_base.BaseModelHandler[Union["lightgbm.Booster", "lightgb
     MODEL_BLOB_FILE_OR_DIR = "model.pkl"
     DEFAULT_TARGET_METHODS = ["predict", "predict_proba"]
-    _BINARY_CLASSIFICATION_OBJECTIVES = ["binary"]
-    _MULTI_CLASSIFICATION_OBJECTIVES = ["multiclass", "multiclassova"]
-    _RANKING_OBJECTIVES = ["lambdarank", "rank_xendcg"]
-    _REGRESSION_OBJECTIVES = [
-        "regression",
-        "regression_l1",
-        "huber",
-        "fair",
-        "poisson",
-        "quantile",
-        "tweedie",
-        "mape",
-        "gamma",
-    ]
-    @classmethod
-    def get_model_objective(
-        cls, model: Union["lightgbm.Booster", "lightgbm.LGBMModel"]
-    ) -> model_meta_schema.ModelObjective:
-        import lightgbm
-        # does not account for cross-entropy and custom
-        if isinstance(model, lightgbm.LGBMClassifier):
-            num_classes = handlers_utils.get_num_classes_if_exists(model)
-            if num_classes == 2:
-                return model_meta_schema.ModelObjective.BINARY_CLASSIFICATION
-            return model_meta_schema.ModelObjective.MULTI_CLASSIFICATION
-        if isinstance(model, lightgbm.LGBMRanker):
-            return model_meta_schema.ModelObjective.RANKING
-        if isinstance(model, lightgbm.LGBMRegressor):
-            return model_meta_schema.ModelObjective.REGRESSION
-        model_objective = model.params["objective"]
-        if model_objective in cls._BINARY_CLASSIFICATION_OBJECTIVES:
-            return model_meta_schema.ModelObjective.BINARY_CLASSIFICATION
-        if model_objective in cls._MULTI_CLASSIFICATION_OBJECTIVES:
-            return model_meta_schema.ModelObjective.MULTI_CLASSIFICATION
-        if model_objective in cls._RANKING_OBJECTIVES:
-            return model_meta_schema.ModelObjective.RANKING
-        if model_objective in cls._REGRESSION_OBJECTIVES:
-            return model_meta_schema.ModelObjective.REGRESSION
-        return model_meta_schema.ModelObjective.UNKNOWN
     @classmethod
     def can_handle(
@@ -118,6 +81,8 @@ class LGBMModelHandler(_base.BaseModelHandler[Union["lightgbm.Booster", "lightgb
         is_sub_model: Optional[bool] = False,
         **kwargs: Unpack[model_types.LGBMModelSaveOptions],
     ) -> None:
+        enable_explainability = kwargs.get("enable_explainability", True)
         import lightgbm
         assert isinstance(model, lightgbm.Booster) or isinstance(model, lightgbm.LGBMModel)
@@ -146,20 +111,16 @@ class LGBMModelHandler(_base.BaseModelHandler[Union["lightgbm.Booster", "lightgb
                 sample_input_data=sample_input_data,
                 get_prediction_fn=get_prediction,
             )
-            model_objective = cls.get_model_objective(model)
-            model_meta.model_objective = model_objective
-            if kwargs.get("enable_explainability", True):
-                output_type = model_signature.DataType.DOUBLE
-                if model_objective in [
-                    model_meta_schema.ModelObjective.BINARY_CLASSIFICATION,
-                    model_meta_schema.ModelObjective.MULTI_CLASSIFICATION,
-                ]:
-                    output_type = model_signature.DataType.STRING
+            model_objective_and_output = model_objective_utils.get_model_objective_and_output_type(model)
+            model_meta.model_objective = handlers_utils.validate_model_objective(
+                model_meta.model_objective, model_objective_and_output.objective
+            )
+            if enable_explainability:
                 model_meta = handlers_utils.add_explain_method_signature(
                     model_meta=model_meta,
                     explain_method="explain",
                     target_method="predict",
-                    output_return_type=output_type,
+                    output_return_type=model_objective_and_output.output_type,
                 )
                 model_meta.function_properties = {
                     "explain": {model_meta_schema.FunctionProperties.PARTITIONED.value: False}
@@ -189,11 +150,8 @@ class LGBMModelHandler(_base.BaseModelHandler[Union["lightgbm.Booster", "lightgb
             ],
             check_local_version=True,
         )
-        if kwargs.get("enable_explainability", True):
-            model_meta.env.include_if_absent(
-                [model_env.ModelDependency(requirement="shap", pip_name="shap")],
-                check_local_version=True,
-            )
+        if enable_explainability:
+            model_meta.env.include_if_absent([model_env.ModelDependency(requirement="shap", pip_name="shap")])
             model_meta.explain_algorithm = model_meta_schema.ModelExplainAlgorithm.SHAP
         return None

snowflake/ml/model/_packager/model_handlers/llm.py CHANGED Viewed

@@ -205,7 +205,9 @@ class LLMHandler(_base.BaseModelHandler[llm.LLM]):
                     "token": raw_model.token,
                 }
                 model_dir_path = raw_model.model_id_or_path
-                peft_config = peft.PeftConfig.from_pretrained(model_dir_path)  # type: ignore[attr-defined]
+                peft_config = peft.PeftConfig.from_pretrained(  # type: ignore[no-untyped-call, attr-defined]
+                    model_dir_path
+                )
                 base_model_path = peft_config.base_model_name_or_path
                 tokenizer = transformers.AutoTokenizer.from_pretrained(
                     base_model_path,
@@ -221,7 +223,7 @@ class LLMHandler(_base.BaseModelHandler[llm.LLM]):
                     model_dir_path,
                     device_map="auto",
                     torch_dtype="auto",
-                    **hub_kwargs,
+                    **hub_kwargs,  # type: ignore[arg-type]
                 )
                 hf_model.eval()
                 hf_model = hf_model.merge_and_unload()

snowflake-ml-python 1.6.1__py3-none-any.whl → 1.6.2__py3-none-any.whl

snowflake-ml-python 1.6.1py3-none-any.whl → 1.6.2py3-none-any.whl