PyPI - snowflake-ml-python - Versions diffs - 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl - Mend

snowflake-ml-python 1.4.0py3-none-any.whl → 1.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (234) hide show

snowflake/ml/_internal/env_utils.py +77 -32
snowflake/ml/_internal/exceptions/dataset_error_messages.py +5 -0
snowflake/ml/_internal/exceptions/dataset_errors.py +24 -0
snowflake/ml/_internal/exceptions/error_codes.py +3 -0
snowflake/ml/_internal/lineage/data_source.py +10 -0
snowflake/ml/_internal/lineage/dataset_dataframe.py +44 -0
snowflake/ml/_internal/utils/identifier.py +3 -1
snowflake/ml/_internal/utils/sql_identifier.py +2 -6
snowflake/ml/dataset/__init__.py +10 -0
snowflake/ml/dataset/dataset.py +454 -129
snowflake/ml/dataset/dataset_factory.py +53 -0
snowflake/ml/dataset/dataset_metadata.py +103 -0
snowflake/ml/dataset/dataset_reader.py +202 -0
snowflake/ml/feature_store/feature_store.py +531 -332
snowflake/ml/feature_store/feature_view.py +40 -23
snowflake/ml/fileset/embedded_stage_fs.py +146 -0
snowflake/ml/fileset/sfcfs.py +56 -54
snowflake/ml/fileset/snowfs.py +159 -0
snowflake/ml/fileset/stage_fs.py +49 -17
snowflake/ml/model/__init__.py +2 -2
snowflake/ml/model/_api.py +16 -1
snowflake/ml/model/_client/model/model_impl.py +27 -0
snowflake/ml/model/_client/model/model_version_impl.py +137 -50
snowflake/ml/model/_client/ops/model_ops.py +159 -40
snowflake/ml/model/_client/sql/model.py +25 -2
snowflake/ml/model/_client/sql/model_version.py +131 -2
snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +5 -1
snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +1 -0
snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
snowflake/ml/model/_deploy_client/utils/constants.py +0 -5
snowflake/ml/model/_deploy_client/utils/snowservice_client.py +21 -50
snowflake/ml/model/_model_composer/model_composer.py +22 -1
snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +38 -51
snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +19 -1
snowflake/ml/model/_model_composer/model_method/model_method.py +6 -10
snowflake/ml/model/_packager/model_env/model_env.py +41 -0
snowflake/ml/model/_packager/model_handlers/catboost.py +206 -0
snowflake/ml/model/_packager/model_handlers/lightgbm.py +218 -0
snowflake/ml/model/_packager/model_handlers/sklearn.py +3 -0
snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
snowflake/ml/model/_packager/model_meta/model_meta.py +37 -11
snowflake/ml/model/_packager/model_meta/model_meta_schema.py +20 -1
snowflake/ml/model/_packager/model_meta_migrator/migrator_plans.py +3 -1
snowflake/ml/model/_packager/model_packager.py +2 -5
snowflake/ml/model/{_model_composer/model_runtime/_runtime_requirements.py → _packager/model_runtime/_snowml_inference_alternative_requirements.py} +1 -1
snowflake/ml/model/_packager/model_runtime/model_runtime.py +137 -0
snowflake/ml/model/type_hints.py +21 -2
snowflake/ml/modeling/_internal/estimator_utils.py +16 -11
snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -1
snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +55 -3
snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +34 -18
snowflake/ml/modeling/_internal/model_trainer.py +7 -0
snowflake/ml/modeling/_internal/model_trainer_builder.py +42 -9
snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +13 -14
snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +29 -7
snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +261 -16
snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +246 -175
snowflake/ml/modeling/cluster/affinity_propagation.py +246 -175
snowflake/ml/modeling/cluster/agglomerative_clustering.py +246 -175
snowflake/ml/modeling/cluster/birch.py +248 -175
snowflake/ml/modeling/cluster/bisecting_k_means.py +248 -175
snowflake/ml/modeling/cluster/dbscan.py +246 -175
snowflake/ml/modeling/cluster/feature_agglomeration.py +248 -175
snowflake/ml/modeling/cluster/k_means.py +248 -175
snowflake/ml/modeling/cluster/mean_shift.py +246 -175
snowflake/ml/modeling/cluster/mini_batch_k_means.py +248 -175
snowflake/ml/modeling/cluster/optics.py +246 -175
snowflake/ml/modeling/cluster/spectral_biclustering.py +246 -175
snowflake/ml/modeling/cluster/spectral_clustering.py +246 -175
snowflake/ml/modeling/cluster/spectral_coclustering.py +246 -175
snowflake/ml/modeling/compose/column_transformer.py +248 -175
snowflake/ml/modeling/compose/transformed_target_regressor.py +246 -175
snowflake/ml/modeling/covariance/elliptic_envelope.py +246 -175
snowflake/ml/modeling/covariance/empirical_covariance.py +246 -175
snowflake/ml/modeling/covariance/graphical_lasso.py +246 -175
snowflake/ml/modeling/covariance/graphical_lasso_cv.py +246 -175
snowflake/ml/modeling/covariance/ledoit_wolf.py +246 -175
snowflake/ml/modeling/covariance/min_cov_det.py +246 -175
snowflake/ml/modeling/covariance/oas.py +246 -175
snowflake/ml/modeling/covariance/shrunk_covariance.py +246 -175
snowflake/ml/modeling/decomposition/dictionary_learning.py +248 -175
snowflake/ml/modeling/decomposition/factor_analysis.py +248 -175
snowflake/ml/modeling/decomposition/fast_ica.py +248 -175
snowflake/ml/modeling/decomposition/incremental_pca.py +248 -175
snowflake/ml/modeling/decomposition/kernel_pca.py +248 -175
snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +248 -175
snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +248 -175
snowflake/ml/modeling/decomposition/pca.py +248 -175
snowflake/ml/modeling/decomposition/sparse_pca.py +248 -175
snowflake/ml/modeling/decomposition/truncated_svd.py +248 -175
snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +248 -175
snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +246 -175
snowflake/ml/modeling/ensemble/ada_boost_classifier.py +246 -175
snowflake/ml/modeling/ensemble/ada_boost_regressor.py +246 -175
snowflake/ml/modeling/ensemble/bagging_classifier.py +246 -175
snowflake/ml/modeling/ensemble/bagging_regressor.py +246 -175
snowflake/ml/modeling/ensemble/extra_trees_classifier.py +246 -175
snowflake/ml/modeling/ensemble/extra_trees_regressor.py +246 -175
snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +246 -175
snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +246 -175
snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +246 -175
snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +246 -175
snowflake/ml/modeling/ensemble/isolation_forest.py +246 -175
snowflake/ml/modeling/ensemble/random_forest_classifier.py +246 -175
snowflake/ml/modeling/ensemble/random_forest_regressor.py +246 -175
snowflake/ml/modeling/ensemble/stacking_regressor.py +248 -175
snowflake/ml/modeling/ensemble/voting_classifier.py +248 -175
snowflake/ml/modeling/ensemble/voting_regressor.py +248 -175
snowflake/ml/modeling/feature_selection/generic_univariate_select.py +248 -175
snowflake/ml/modeling/feature_selection/select_fdr.py +248 -175
snowflake/ml/modeling/feature_selection/select_fpr.py +248 -175
snowflake/ml/modeling/feature_selection/select_fwe.py +248 -175
snowflake/ml/modeling/feature_selection/select_k_best.py +248 -175
snowflake/ml/modeling/feature_selection/select_percentile.py +248 -175
snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +248 -175
snowflake/ml/modeling/feature_selection/variance_threshold.py +248 -175
snowflake/ml/modeling/framework/_utils.py +8 -1
snowflake/ml/modeling/framework/base.py +72 -37
snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +246 -175
snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +246 -175
snowflake/ml/modeling/impute/iterative_imputer.py +248 -175
snowflake/ml/modeling/impute/knn_imputer.py +248 -175
snowflake/ml/modeling/impute/missing_indicator.py +248 -175
snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +248 -175
snowflake/ml/modeling/kernel_approximation/nystroem.py +248 -175
snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +248 -175
snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +248 -175
snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +248 -175
snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +246 -175
snowflake/ml/modeling/lightgbm/lgbm_classifier.py +246 -175
snowflake/ml/modeling/lightgbm/lgbm_regressor.py +246 -175
snowflake/ml/modeling/linear_model/ard_regression.py +246 -175
snowflake/ml/modeling/linear_model/bayesian_ridge.py +246 -175
snowflake/ml/modeling/linear_model/elastic_net.py +246 -175
snowflake/ml/modeling/linear_model/elastic_net_cv.py +246 -175
snowflake/ml/modeling/linear_model/gamma_regressor.py +246 -175
snowflake/ml/modeling/linear_model/huber_regressor.py +246 -175
snowflake/ml/modeling/linear_model/lars.py +246 -175
snowflake/ml/modeling/linear_model/lars_cv.py +246 -175
snowflake/ml/modeling/linear_model/lasso.py +246 -175
snowflake/ml/modeling/linear_model/lasso_cv.py +246 -175
snowflake/ml/modeling/linear_model/lasso_lars.py +246 -175
snowflake/ml/modeling/linear_model/lasso_lars_cv.py +246 -175
snowflake/ml/modeling/linear_model/lasso_lars_ic.py +246 -175
snowflake/ml/modeling/linear_model/linear_regression.py +246 -175
snowflake/ml/modeling/linear_model/logistic_regression.py +246 -175
snowflake/ml/modeling/linear_model/logistic_regression_cv.py +246 -175
snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +246 -175
snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +246 -175
snowflake/ml/modeling/linear_model/multi_task_lasso.py +246 -175
snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +246 -175
snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +246 -175
snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +246 -175
snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +246 -175
snowflake/ml/modeling/linear_model/perceptron.py +246 -175
snowflake/ml/modeling/linear_model/poisson_regressor.py +246 -175
snowflake/ml/modeling/linear_model/ransac_regressor.py +246 -175
snowflake/ml/modeling/linear_model/ridge.py +246 -175
snowflake/ml/modeling/linear_model/ridge_classifier.py +246 -175
snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +246 -175
snowflake/ml/modeling/linear_model/ridge_cv.py +246 -175
snowflake/ml/modeling/linear_model/sgd_classifier.py +246 -175
snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +246 -175
snowflake/ml/modeling/linear_model/sgd_regressor.py +246 -175
snowflake/ml/modeling/linear_model/theil_sen_regressor.py +246 -175
snowflake/ml/modeling/linear_model/tweedie_regressor.py +246 -175
snowflake/ml/modeling/manifold/isomap.py +248 -175
snowflake/ml/modeling/manifold/mds.py +248 -175
snowflake/ml/modeling/manifold/spectral_embedding.py +248 -175
snowflake/ml/modeling/manifold/tsne.py +248 -175
snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +246 -175
snowflake/ml/modeling/mixture/gaussian_mixture.py +246 -175
snowflake/ml/modeling/model_selection/grid_search_cv.py +63 -41
snowflake/ml/modeling/model_selection/randomized_search_cv.py +80 -38
snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +246 -175
snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +246 -175
snowflake/ml/modeling/multiclass/output_code_classifier.py +246 -175
snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +246 -175
snowflake/ml/modeling/naive_bayes/categorical_nb.py +246 -175
snowflake/ml/modeling/naive_bayes/complement_nb.py +246 -175
snowflake/ml/modeling/naive_bayes/gaussian_nb.py +246 -175
snowflake/ml/modeling/naive_bayes/multinomial_nb.py +246 -175
snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +246 -175
snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +246 -175
snowflake/ml/modeling/neighbors/kernel_density.py +246 -175
snowflake/ml/modeling/neighbors/local_outlier_factor.py +246 -175
snowflake/ml/modeling/neighbors/nearest_centroid.py +246 -175
snowflake/ml/modeling/neighbors/nearest_neighbors.py +246 -175
snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +248 -175
snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +246 -175
snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +246 -175
snowflake/ml/modeling/neural_network/bernoulli_rbm.py +248 -175
snowflake/ml/modeling/neural_network/mlp_classifier.py +246 -175
snowflake/ml/modeling/neural_network/mlp_regressor.py +246 -175
snowflake/ml/modeling/pipeline/pipeline.py +517 -35
snowflake/ml/modeling/preprocessing/binarizer.py +1 -5
snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +1 -5
snowflake/ml/modeling/preprocessing/label_encoder.py +1 -5
snowflake/ml/modeling/preprocessing/max_abs_scaler.py +1 -5
snowflake/ml/modeling/preprocessing/min_max_scaler.py +10 -12
snowflake/ml/modeling/preprocessing/normalizer.py +1 -5
snowflake/ml/modeling/preprocessing/one_hot_encoder.py +13 -5
snowflake/ml/modeling/preprocessing/ordinal_encoder.py +1 -5
snowflake/ml/modeling/preprocessing/polynomial_features.py +248 -175
snowflake/ml/modeling/preprocessing/robust_scaler.py +1 -5
snowflake/ml/modeling/preprocessing/standard_scaler.py +11 -11
snowflake/ml/modeling/semi_supervised/label_propagation.py +246 -175
snowflake/ml/modeling/semi_supervised/label_spreading.py +246 -175
snowflake/ml/modeling/svm/linear_svc.py +246 -175
snowflake/ml/modeling/svm/linear_svr.py +246 -175
snowflake/ml/modeling/svm/nu_svc.py +246 -175
snowflake/ml/modeling/svm/nu_svr.py +246 -175
snowflake/ml/modeling/svm/svc.py +246 -175
snowflake/ml/modeling/svm/svr.py +246 -175
snowflake/ml/modeling/tree/decision_tree_classifier.py +246 -175
snowflake/ml/modeling/tree/decision_tree_regressor.py +246 -175
snowflake/ml/modeling/tree/extra_tree_classifier.py +246 -175
snowflake/ml/modeling/tree/extra_tree_regressor.py +246 -175
snowflake/ml/modeling/xgboost/xgb_classifier.py +246 -175
snowflake/ml/modeling/xgboost/xgb_regressor.py +246 -175
snowflake/ml/modeling/xgboost/xgbrf_classifier.py +246 -175
snowflake/ml/modeling/xgboost/xgbrf_regressor.py +246 -175
snowflake/ml/registry/model_registry.py +3 -149
snowflake/ml/registry/registry.py +1 -1
snowflake/ml/version.py +1 -1
{snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/METADATA +129 -57
snowflake_ml_python-1.5.0.dist-info/RECORD +380 -0
snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -97
snowflake/ml/registry/_artifact_manager.py +0 -156
snowflake/ml/registry/artifact.py +0 -46
snowflake_ml_python-1.4.0.dist-info/RECORD +0 -370
{snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/LICENSE.txt +0 -0
{snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/WHEEL +0 -0
{snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/top_level.txt +0 -0

snowflake/ml/model/_packager/model_handlers/catboost.py ADDED Viewed

@@ -0,0 +1,206 @@
+import os
+from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Type, cast, final
+import numpy as np
+import pandas as pd
+from typing_extensions import TypeGuard, Unpack
+from snowflake.ml._internal import type_utils
+from snowflake.ml.model import custom_model, model_signature, type_hints as model_types
+from snowflake.ml.model._packager.model_env import model_env
+from snowflake.ml.model._packager.model_handlers import _base, _utils as handlers_utils
+from snowflake.ml.model._packager.model_handlers_migrator import base_migrator
+from snowflake.ml.model._packager.model_meta import (
+    model_blob_meta,
+    model_meta as model_meta_api,
+    model_meta_schema,
+)
+from snowflake.ml.model._signatures import numpy_handler, utils as model_signature_utils
+if TYPE_CHECKING:
+    import catboost
+@final
+class CatBoostModelHandler(_base.BaseModelHandler["catboost.CatBoost"]):
+    """Handler for CatBoost based model."""
+    HANDLER_TYPE = "catboost"
+    HANDLER_VERSION = "2024-03-21"
+    _MIN_SNOWPARK_ML_VERSION = "1.3.1"
+    _HANDLER_MIGRATOR_PLANS: Dict[str, Type[base_migrator.BaseModelHandlerMigrator]] = {}
+    MODELE_BLOB_FILE_OR_DIR = "model.bin"
+    DEFAULT_TARGET_METHODS = ["predict", "predict_proba"]
+    @classmethod
+    def can_handle(cls, model: model_types.SupportedModelType) -> TypeGuard["catboost.CatBoost"]:
+        return (type_utils.LazyType("catboost.CatBoost").isinstance(model)) and any(
+            (hasattr(model, method) and callable(getattr(model, method, None))) for method in cls.DEFAULT_TARGET_METHODS
+        )
+    @classmethod
+    def cast_model(
+        cls,
+        model: model_types.SupportedModelType,
+    ) -> "catboost.CatBoost":
+        import catboost
+        assert isinstance(model, catboost.CatBoost)
+        return model
+    @classmethod
+    def save_model(
+        cls,
+        name: str,
+        model: "catboost.CatBoost",
+        model_meta: model_meta_api.ModelMetadata,
+        model_blobs_dir_path: str,
+        sample_input_data: Optional[model_types.SupportedDataType] = None,
+        is_sub_model: Optional[bool] = False,
+        **kwargs: Unpack[model_types.CatBoostModelSaveOptions],
+    ) -> None:
+        import catboost
+        assert isinstance(model, catboost.CatBoost)
+        if not is_sub_model:
+            target_methods = handlers_utils.get_target_methods(
+                model=model,
+                target_methods=kwargs.pop("target_methods", None),
+                default_target_methods=cls.DEFAULT_TARGET_METHODS,
+            )
+            def get_prediction(
+                target_method_name: str, sample_input_data: model_types.SupportedLocalDataType
+            ) -> model_types.SupportedLocalDataType:
+                if not isinstance(sample_input_data, (pd.DataFrame, np.ndarray)):
+                    sample_input_data = model_signature._convert_local_data_to_df(sample_input_data)
+                target_method = getattr(model, target_method_name, None)
+                assert callable(target_method)
+                predictions_df = target_method(sample_input_data)
+                return predictions_df
+            model_meta = handlers_utils.validate_signature(
+                model=model,
+                model_meta=model_meta,
+                target_methods=target_methods,
+                sample_input_data=sample_input_data,
+                get_prediction_fn=get_prediction,
+            )
+        model_blob_path = os.path.join(model_blobs_dir_path, name)
+        os.makedirs(model_blob_path, exist_ok=True)
+        model_save_path = os.path.join(model_blob_path, cls.MODELE_BLOB_FILE_OR_DIR)
+        model.save_model(model_save_path)
+        base_meta = model_blob_meta.ModelBlobMeta(
+            name=name,
+            model_type=cls.HANDLER_TYPE,
+            handler_version=cls.HANDLER_VERSION,
+            path=cls.MODELE_BLOB_FILE_OR_DIR,
+            options=model_meta_schema.CatBoostModelBlobOptions({"catboost_estimator_type": model.__class__.__name__}),
+        )
+        model_meta.models[name] = base_meta
+        model_meta.min_snowpark_ml_version = cls._MIN_SNOWPARK_ML_VERSION
+        model_meta.env.include_if_absent(
+            [
+                model_env.ModelDependency(requirement="catboost", pip_name="catboost"),
+            ],
+            check_local_version=True,
+        )
+        model_meta.env.cuda_version = kwargs.get("cuda_version", model_env.DEFAULT_CUDA_VERSION)
+        return None
+    @classmethod
+    def load_model(
+        cls,
+        name: str,
+        model_meta: model_meta_api.ModelMetadata,
+        model_blobs_dir_path: str,
+        **kwargs: Unpack[model_types.ModelLoadOption],
+    ) -> "catboost.CatBoost":
+        import catboost
+        model_blob_path = os.path.join(model_blobs_dir_path, name)
+        model_blobs_metadata = model_meta.models
+        model_blob_metadata = model_blobs_metadata[name]
+        model_blob_filename = model_blob_metadata.path
+        model_blob_file_path = os.path.join(model_blob_path, model_blob_filename)
+        model_blob_options = cast(model_meta_schema.CatBoostModelBlobOptions, model_blob_metadata.options)
+        if "catboost_estimator_type" not in model_blob_options:
+            raise ValueError("Missing field `catboost_estimator_type` in model blob metadata for type `catboost`")
+        catboost_estimator_type = model_blob_options["catboost_estimator_type"]
+        if not hasattr(catboost, catboost_estimator_type):
+            raise ValueError("Type of CatBoost estimator is not supported.")
+        assert os.path.isfile(model_blob_file_path)  # saved model is a file
+        model = getattr(catboost, catboost_estimator_type)()
+        model.load_model(model_blob_file_path)
+        assert isinstance(model, getattr(catboost, catboost_estimator_type))
+        if kwargs.get("use_gpu", False):
+            assert type(kwargs.get("use_gpu", False)) == bool
+            gpu_params = {"task_type": "GPU"}
+            model.__dict__.update(gpu_params)
+        return model
+    @classmethod
+    def convert_as_custom_model(
+        cls,
+        raw_model: "catboost.CatBoost",
+        model_meta: model_meta_api.ModelMetadata,
+        **kwargs: Unpack[model_types.ModelLoadOption],
+    ) -> custom_model.CustomModel:
+        import catboost
+        from snowflake.ml.model import custom_model
+        def _create_custom_model(
+            raw_model: "catboost.CatBoost",
+            model_meta: model_meta_api.ModelMetadata,
+        ) -> Type[custom_model.CustomModel]:
+            def fn_factory(
+                raw_model: "catboost.CatBoost",
+                signature: model_signature.ModelSignature,
+                target_method: str,
+            ) -> Callable[[custom_model.CustomModel, pd.DataFrame], pd.DataFrame]:
+                @custom_model.inference_api
+                def fn(self: custom_model.CustomModel, X: pd.DataFrame) -> pd.DataFrame:
+                    res = getattr(raw_model, target_method)(X)
+                    if isinstance(res, list) and len(res) > 0 and isinstance(res[0], np.ndarray):
+                        # In case of multi-output estimators, predict_proba(), decision_function(), etc., functions
+                        # return a list of ndarrays. We need to deal them separately
+                        df = numpy_handler.SeqOfNumpyArrayHandler.convert_to_df(res)
+                    else:
+                        df = pd.DataFrame(res)
+                    return model_signature_utils.rename_pandas_df(df, signature.outputs)
+                return fn
+            type_method_dict: Dict[str, Any] = {"_raw_model": raw_model}
+            for target_method_name, sig in model_meta.signatures.items():
+                type_method_dict[target_method_name] = fn_factory(raw_model, sig, target_method_name)
+            _CatBoostModel = type(
+                "_CatBoostModel",
+                (custom_model.CustomModel,),
+                type_method_dict,
+            )
+            return _CatBoostModel
+        _CatBoostModel = _create_custom_model(raw_model, model_meta)
+        catboost_model = _CatBoostModel(custom_model.ModelContext())
+        return catboost_model

snowflake/ml/model/_packager/model_handlers/lightgbm.py ADDED Viewed

@@ -0,0 +1,218 @@
+import os
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Dict,
+    Optional,
+    Type,
+    Union,
+    cast,
+    final,
+)
+import cloudpickle
+import numpy as np
+import pandas as pd
+from typing_extensions import TypeGuard, Unpack
+from snowflake.ml._internal import type_utils
+from snowflake.ml.model import custom_model, model_signature, type_hints as model_types
+from snowflake.ml.model._packager.model_env import model_env
+from snowflake.ml.model._packager.model_handlers import _base, _utils as handlers_utils
+from snowflake.ml.model._packager.model_handlers_migrator import base_migrator
+from snowflake.ml.model._packager.model_meta import (
+    model_blob_meta,
+    model_meta as model_meta_api,
+    model_meta_schema,
+)
+from snowflake.ml.model._signatures import numpy_handler, utils as model_signature_utils
+if TYPE_CHECKING:
+    import lightgbm
+@final
+class LGBMModelHandler(_base.BaseModelHandler[Union["lightgbm.Booster", "lightgbm.LGBMModel"]]):
+    """Handler for LightGBM based model."""
+    HANDLER_TYPE = "lightgbm"
+    HANDLER_VERSION = "2024-03-19"
+    _MIN_SNOWPARK_ML_VERSION = "1.3.1"
+    _HANDLER_MIGRATOR_PLANS: Dict[str, Type[base_migrator.BaseModelHandlerMigrator]] = {}
+    MODELE_BLOB_FILE_OR_DIR = "model.pkl"
+    DEFAULT_TARGET_METHODS = ["predict", "predict_proba"]
+    @classmethod
+    def can_handle(
+        cls, model: model_types.SupportedModelType
+    ) -> TypeGuard[Union["lightgbm.Booster", "lightgbm.LGBMModel"]]:
+        return (
+            type_utils.LazyType("lightgbm.Booster").isinstance(model)
+            or type_utils.LazyType("lightgbm.LGBMModel").isinstance(model)
+        ) and any(
+            (hasattr(model, method) and callable(getattr(model, method, None))) for method in cls.DEFAULT_TARGET_METHODS
+        )
+    @classmethod
+    def cast_model(
+        cls,
+        model: model_types.SupportedModelType,
+    ) -> Union["lightgbm.Booster", "lightgbm.LGBMModel"]:
+        import lightgbm
+        assert isinstance(model, lightgbm.Booster) or isinstance(model, lightgbm.LGBMModel)
+        return model
+    @classmethod
+    def save_model(
+        cls,
+        name: str,
+        model: Union["lightgbm.Booster", "lightgbm.LGBMModel"],
+        model_meta: model_meta_api.ModelMetadata,
+        model_blobs_dir_path: str,
+        sample_input_data: Optional[model_types.SupportedDataType] = None,
+        is_sub_model: Optional[bool] = False,
+        **kwargs: Unpack[model_types.LGBMModelSaveOptions],
+    ) -> None:
+        import lightgbm
+        assert isinstance(model, lightgbm.Booster) or isinstance(model, lightgbm.LGBMModel)
+        if not is_sub_model:
+            target_methods = handlers_utils.get_target_methods(
+                model=model,
+                target_methods=kwargs.pop("target_methods", None),
+                default_target_methods=cls.DEFAULT_TARGET_METHODS,
+            )
+            def get_prediction(
+                target_method_name: str, sample_input_data: model_types.SupportedLocalDataType
+            ) -> model_types.SupportedLocalDataType:
+                if not isinstance(sample_input_data, (pd.DataFrame, np.ndarray)):
+                    sample_input_data = model_signature._convert_local_data_to_df(sample_input_data)
+                target_method = getattr(model, target_method_name, None)
+                assert callable(target_method)
+                predictions_df = target_method(sample_input_data)
+                return predictions_df
+            model_meta = handlers_utils.validate_signature(
+                model=model,
+                model_meta=model_meta,
+                target_methods=target_methods,
+                sample_input_data=sample_input_data,
+                get_prediction_fn=get_prediction,
+            )
+        model_blob_path = os.path.join(model_blobs_dir_path, name)
+        os.makedirs(model_blob_path, exist_ok=True)
+        model_save_path = os.path.join(model_blob_path, cls.MODELE_BLOB_FILE_OR_DIR)
+        with open(model_save_path, "wb") as f:
+            cloudpickle.dump(model, f)
+        base_meta = model_blob_meta.ModelBlobMeta(
+            name=name,
+            model_type=cls.HANDLER_TYPE,
+            handler_version=cls.HANDLER_VERSION,
+            path=cls.MODELE_BLOB_FILE_OR_DIR,
+            options=model_meta_schema.LightGBMModelBlobOptions({"lightgbm_estimator_type": model.__class__.__name__}),
+        )
+        model_meta.models[name] = base_meta
+        model_meta.min_snowpark_ml_version = cls._MIN_SNOWPARK_ML_VERSION
+        model_meta.env.include_if_absent(
+            [
+                model_env.ModelDependency(requirement="lightgbm", pip_name="lightgbm"),
+                model_env.ModelDependency(requirement="scikit-learn", pip_name="scikit-learn"),
+            ],
+            check_local_version=True,
+        )
+        return None
+    @classmethod
+    def load_model(
+        cls,
+        name: str,
+        model_meta: model_meta_api.ModelMetadata,
+        model_blobs_dir_path: str,
+        **kwargs: Unpack[model_types.ModelLoadOption],
+    ) -> Union["lightgbm.Booster", "lightgbm.LGBMModel"]:
+        import lightgbm
+        model_blob_path = os.path.join(model_blobs_dir_path, name)
+        model_blobs_metadata = model_meta.models
+        model_blob_metadata = model_blobs_metadata[name]
+        model_blob_filename = model_blob_metadata.path
+        model_blob_file_path = os.path.join(model_blob_path, model_blob_filename)
+        model_blob_options = cast(model_meta_schema.LightGBMModelBlobOptions, model_blob_metadata.options)
+        if "lightgbm_estimator_type" not in model_blob_options:
+            raise ValueError("Missing field `lightgbm_estimator_type` in model blob metadata for type `lightgbm`")
+        lightgbm_estimator_type = model_blob_options["lightgbm_estimator_type"]
+        if not hasattr(lightgbm, lightgbm_estimator_type):
+            raise ValueError("Type of LightGBM estimator is not supported.")
+        assert os.path.isfile(model_blob_file_path)  # saved model is a file
+        with open(model_blob_file_path, "rb") as f:
+            model = cloudpickle.load(f)
+        assert isinstance(model, getattr(lightgbm, lightgbm_estimator_type))
+        return model
+    @classmethod
+    def convert_as_custom_model(
+        cls,
+        raw_model: Union["lightgbm.Booster", "lightgbm.XGBModel"],
+        model_meta: model_meta_api.ModelMetadata,
+        **kwargs: Unpack[model_types.ModelLoadOption],
+    ) -> custom_model.CustomModel:
+        import lightgbm
+        from snowflake.ml.model import custom_model
+        def _create_custom_model(
+            raw_model: Union["lightgbm.Booster", "lightgbm.LGBMModel"],
+            model_meta: model_meta_api.ModelMetadata,
+        ) -> Type[custom_model.CustomModel]:
+            def fn_factory(
+                raw_model: Union["lightgbm.Booster", "lightgbm.LGBMModel"],
+                signature: model_signature.ModelSignature,
+                target_method: str,
+            ) -> Callable[[custom_model.CustomModel, pd.DataFrame], pd.DataFrame]:
+                @custom_model.inference_api
+                def fn(self: custom_model.CustomModel, X: pd.DataFrame) -> pd.DataFrame:
+                    res = getattr(raw_model, target_method)(X)
+                    if isinstance(res, list) and len(res) > 0 and isinstance(res[0], np.ndarray):
+                        # In case of multi-output estimators, predict_proba(), decision_function(), etc., functions
+                        # return a list of ndarrays. We need to deal them separately
+                        df = numpy_handler.SeqOfNumpyArrayHandler.convert_to_df(res)
+                    else:
+                        df = pd.DataFrame(res)
+                    return model_signature_utils.rename_pandas_df(df, signature.outputs)
+                return fn
+            type_method_dict: Dict[str, Any] = {"_raw_model": raw_model}
+            for target_method_name, sig in model_meta.signatures.items():
+                type_method_dict[target_method_name] = fn_factory(raw_model, sig, target_method_name)
+            _LightGBMModel = type(
+                "_LightGBMModel",
+                (custom_model.CustomModel,),
+                type_method_dict,
+            )
+            return _LightGBMModel
+        _LightGBMModel = _create_custom_model(raw_model, model_meta)
+        lightgbm_model = _LightGBMModel(custom_model.ModelContext())
+        return lightgbm_model

snowflake/ml/model/_packager/model_handlers/sklearn.py CHANGED Viewed

@@ -47,6 +47,9 @@ class SKLModelHandler(_base.BaseModelHandler[Union["sklearn.base.BaseEstimator",
                 or type_utils.LazyType("sklearn.pipeline.Pipeline").isinstance(model)
             )
             and (not type_utils.LazyType("xgboost.XGBModel").isinstance(model))  # XGBModel is actually a BaseEstimator
+            and (
+                not type_utils.LazyType("lightgbm.LGBMModel").isinstance(model)
+            )  # LGBMModel is actually a BaseEstimator
             and any(
                 (hasattr(model, method) and callable(getattr(model, method, None)))
                 for method in cls.DEFAULT_TARGET_METHODS

snowflake/ml/model/_packager/model_meta/_core_requirements.py CHANGED Viewed

@@ -4,7 +4,7 @@ REQUIREMENTS = [
     "cloudpickle>=2.0.0",
     "numpy>=1.23,<2",
     "packaging>=20.9,<24",
-    "pandas>=1.0.0,<2",
+    "pandas>=1.0.0,<3",
     "pyyaml>=6.0,<7",
     "snowflake-snowpark-python>=1.11.1,<2,!=1.12.0",
     "typing-extensions>=4.1.0,<5"

snowflake/ml/model/_packager/model_meta/model_meta.py CHANGED Viewed

@@ -23,6 +23,7 @@ from snowflake.ml.model._packager.model_meta import (
     model_meta_schema,
 )
 from snowflake.ml.model._packager.model_meta_migrator import migrator_plans
+from snowflake.ml.model._packager.model_runtime import model_runtime
 MODEL_METADATA_FILE = "model.yaml"
 MODEL_CODE_DIR = "code"
@@ -115,7 +116,6 @@ def create_model_metadata(
         python_version=python_version,
         embed_local_ml_library=embed_local_ml_library,
         legacy_save=legacy_save,
-        relax_version=relax_version,
     )
     if embed_local_ml_library:
@@ -156,6 +156,8 @@ def create_model_metadata(
                     cloudpickle.register_pickle_by_value(mod)
                     imported_modules.append(mod)
         yield model_meta
+        if relax_version:
+            model_meta.env.relax_version()
         model_meta.save(model_dir_path)
     finally:
         for mod in imported_modules:
@@ -169,7 +171,6 @@ def _create_env_for_model_metadata(
     python_version: Optional[str] = None,
     embed_local_ml_library: bool = False,
     legacy_save: bool = False,
-    relax_version: bool = False,
 ) -> model_env.ModelEnv:
     env = model_env.ModelEnv()
@@ -197,10 +198,6 @@ def _create_env_for_model_metadata(
             ],
             check_local_version=True,
         )
-    if relax_version:
-        env.relax_version()
     return env
@@ -237,6 +234,7 @@ class ModelMetadata:
         name: str,
         env: model_env.ModelEnv,
         model_type: model_types.SupportedModelHandlerType,
+        runtimes: Optional[Dict[str, model_runtime.ModelRuntime]] = None,
         signatures: Optional[Dict[str, model_signature.ModelSignature]] = None,
         metadata: Optional[Dict[str, str]] = None,
         creation_timestamp: Optional[str] = None,
@@ -262,6 +260,8 @@ class ModelMetadata:
         if models:
             self.models = models
+        self._runtimes = runtimes
         self.original_metadata_version = original_metadata_version
     @property
@@ -273,6 +273,19 @@ class ModelMetadata:
         parsed_min_snowpark_ml_version = version.parse(min_snowpark_ml_version)
         self._min_snowpark_ml_version = max(self._min_snowpark_ml_version, parsed_min_snowpark_ml_version)
+    @property
+    def runtimes(self) -> Dict[str, model_runtime.ModelRuntime]:
+        if self._runtimes and "cpu" in self._runtimes:
+            return self._runtimes
+        runtimes = {
+            "cpu": model_runtime.ModelRuntime("cpu", self.env),
+        }
+        if self.env.cuda_version:
+            runtimes.update(
+                {"gpu": model_runtime.ModelRuntime("gpu", self.env, is_gpu=True, server_availability_source="conda")}
+            )
+        return runtimes
     def save(self, model_dir_path: str) -> None:
         """Save the model metadata
@@ -291,6 +304,10 @@ class ModelMetadata:
             {
                 "creation_timestamp": self.creation_timestamp,
                 "env": self.env.save_as_dict(pathlib.Path(model_dir_path)),
+                "runtimes": {
+                    runtime_name: runtime.save(pathlib.Path(model_dir_path))
+                    for runtime_name, runtime in self.runtimes.items()
+                },
                 "metadata": self.metadata,
                 "model_type": self.model_type,
                 "models": {model_name: blob.to_dict() for model_name, blob in self.models.items()},
@@ -302,11 +319,8 @@ class ModelMetadata:
         )
         with open(model_yaml_path, "w", encoding="utf-8") as out:
-            yaml.safe_dump(
-                model_dict,
-                stream=out,
-                default_flow_style=False,
-            )
+            yaml.SafeDumper.ignore_aliases = lambda *args: True  # type: ignore[method-assign]
+            yaml.safe_dump(model_dict, stream=out, default_flow_style=False)
     @staticmethod
     def _validate_model_metadata(loaded_meta: Any) -> model_meta_schema.ModelMetadataDict:
@@ -330,6 +344,7 @@ class ModelMetadata:
         return model_meta_schema.ModelMetadataDict(
             creation_timestamp=loaded_meta["creation_timestamp"],
             env=loaded_meta["env"],
+            runtimes=loaded_meta.get("runtimes", None),
             metadata=loaded_meta.get("metadata", None),
             model_type=loaded_meta["model_type"],
             models=loaded_meta["models"],
@@ -363,10 +378,21 @@ class ModelMetadata:
         models = {name: model_blob_meta.ModelBlobMeta(**blob_meta) for name, blob_meta in model_dict["models"].items()}
         env = model_env.ModelEnv()
         env.load_from_dict(pathlib.Path(model_dir_path), model_dict["env"])
+        runtimes: Optional[Dict[str, model_runtime.ModelRuntime]]
+        if model_dict.get("runtimes", None):
+            runtimes = {
+                name: model_runtime.ModelRuntime.load(pathlib.Path(model_dir_path), name, env, runtime_dict)
+                for name, runtime_dict in model_dict["runtimes"].items()
+            }
+        else:
+            runtimes = None
         return cls(
             name=model_dict["name"],
             model_type=model_dict["model_type"],
             env=env,
+            runtimes=runtimes,
             signatures=signatures,
             metadata=model_dict.get("metadata", None),
             creation_timestamp=model_dict["creation_timestamp"],

snowflake/ml/model/_packager/model_meta/model_meta_schema.py CHANGED Viewed

@@ -1,7 +1,7 @@
 # This files contains schema definition of what will be written into model.yml
 # Changing this file should lead to a change of the schema version.
-from typing import Any, Dict, Optional, TypedDict, Union
+from typing import Any, Dict, List, Optional, TypedDict, Union
 from typing_extensions import NotRequired, Required
@@ -11,6 +11,16 @@ MODEL_METADATA_VERSION = "2023-12-01"
 MODEL_METADATA_MIN_SNOWPARK_ML_VERSION = "1.0.12"
+class ModelRuntimeDependenciesDict(TypedDict):
+    conda: Required[str]
+    pip: Required[str]
+class ModelRuntimeDict(TypedDict):
+    imports: Required[List[str]]
+    dependencies: Required[ModelRuntimeDependenciesDict]
 class ModelEnvDict(TypedDict):
     conda: Required[str]
     pip: Required[str]
@@ -23,11 +33,19 @@ class BaseModelBlobOptions(TypedDict):
     ...
+class CatBoostModelBlobOptions(BaseModelBlobOptions):
+    catboost_estimator_type: Required[str]
 class HuggingFacePipelineModelBlobOptions(BaseModelBlobOptions):
     task: Required[str]
     batch_size: Required[int]
+class LightGBMModelBlobOptions(BaseModelBlobOptions):
+    lightgbm_estimator_type: Required[str]
 class LLMModelBlobOptions(BaseModelBlobOptions):
     batch_size: Required[int]
@@ -61,6 +79,7 @@ class ModelBlobMetadataDict(TypedDict):
 class ModelMetadataDict(TypedDict):
     creation_timestamp: Required[str]
     env: Required[ModelEnvDict]
+    runtimes: NotRequired[Dict[str, ModelRuntimeDict]]
     metadata: NotRequired[Optional[Dict[str, str]]]
     model_type: Required[type_hints.SupportedModelHandlerType]
     models: Required[Dict[str, ModelBlobMetadataDict]]

snowflake/ml/model/_packager/model_meta_migrator/migrator_plans.py CHANGED Viewed

@@ -3,7 +3,9 @@ from typing import Any, Dict, Type
 from snowflake.ml.model._packager.model_meta import model_meta_schema
 from snowflake.ml.model._packager.model_meta_migrator import base_migrator, migrator_v1
-MODEL_META_MIGRATOR_PLANS: Dict[str, Type[base_migrator.BaseModelMetaMigrator]] = {"1": migrator_v1.MetaMigrator_v1}
+MODEL_META_MIGRATOR_PLANS: Dict[str, Type[base_migrator.BaseModelMetaMigrator]] = {
+    "1": migrator_v1.MetaMigrator_v1,
+}
 def migrate_metadata(loaded_meta: Dict[str, Any]) -> Dict[str, Any]:

snowflake/ml/model/_packager/model_packager.py CHANGED Viewed

@@ -4,7 +4,6 @@ from typing import Dict, List, Optional
 from absl import logging
-from snowflake.ml._internal import env_utils
 from snowflake.ml._internal.exceptions import (
     error_codes,
     exceptions as snowml_exceptions,
@@ -102,8 +101,8 @@ class ModelPackager:
             if signatures is None:
                 logging.info(f"Model signatures are auto inferred as:\n\n{meta.signatures}")
-            self.model = model
-            self.meta = meta
+        self.model = model
+        self.meta = meta
     def load(
         self,
@@ -129,8 +128,6 @@ class ModelPackager:
         model_meta.load_code_path(self.local_dir_path)
-        env_utils.validate_py_runtime_version(self.meta.env.python_version)
         handler = model_handler.load_handler(self.meta.model_type)
         if handler is None:
             raise snowml_exceptions.SnowflakeMLException(

snowflake/ml/model/{_model_composer/model_runtime/_runtime_requirements.py → _packager/model_runtime/_snowml_inference_alternative_requirements.py} RENAMED Viewed

@@ -3,7 +3,7 @@ REQUIREMENTS = [
     "anyio>=3.5.0,<4",
     "numpy>=1.23,<2",
     "packaging>=20.9,<24",
-    "pandas>=1.0.0,<2",
+    "pandas>=1.0.0,<3",
     "pyyaml>=6.0,<7",
     "snowflake-snowpark-python>=1.11.1,<2,!=1.12.0",
     "typing-extensions>=4.1.0,<5"

snowflake-ml-python 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl

snowflake-ml-python 1.4.0py3-none-any.whl → 1.5.0py3-none-any.whl