PyPI - snowflake-ml-python - Versions diffs - 1.4.0__py3-none-any.whl → 1.4.1__py3-none-any.whl - Mend

snowflake-ml-python 1.4.0py3-none-any.whl → 1.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (203) hide show

snowflake/ml/_internal/env_utils.py +11 -1
snowflake/ml/_internal/utils/identifier.py +3 -1
snowflake/ml/_internal/utils/sql_identifier.py +2 -6
snowflake/ml/feature_store/feature_store.py +151 -78
snowflake/ml/feature_store/feature_view.py +12 -24
snowflake/ml/fileset/sfcfs.py +56 -50
snowflake/ml/fileset/stage_fs.py +48 -13
snowflake/ml/model/_client/model/model_version_impl.py +2 -50
snowflake/ml/model/_client/ops/model_ops.py +78 -29
snowflake/ml/model/_client/sql/model.py +23 -2
snowflake/ml/model/_client/sql/model_version.py +22 -1
snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +19 -54
snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +8 -1
snowflake/ml/model/_model_composer/model_method/model_method.py +6 -10
snowflake/ml/model/_packager/model_handlers/catboost.py +206 -0
snowflake/ml/model/_packager/model_handlers/lightgbm.py +218 -0
snowflake/ml/model/_packager/model_handlers/sklearn.py +3 -0
snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
snowflake/ml/model/_packager/model_meta/model_meta.py +36 -6
snowflake/ml/model/_packager/model_meta/model_meta_schema.py +20 -1
snowflake/ml/model/_packager/model_meta_migrator/migrator_plans.py +3 -1
snowflake/ml/model/_packager/model_packager.py +2 -2
snowflake/ml/model/{_model_composer/model_runtime/_runtime_requirements.py → _packager/model_runtime/_snowml_inference_alternative_requirements.py} +1 -1
snowflake/ml/model/_packager/model_runtime/model_runtime.py +137 -0
snowflake/ml/model/type_hints.py +21 -2
snowflake/ml/modeling/_internal/estimator_utils.py +16 -11
snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -1
snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +13 -14
snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +5 -5
snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +195 -123
snowflake/ml/modeling/cluster/affinity_propagation.py +195 -123
snowflake/ml/modeling/cluster/agglomerative_clustering.py +195 -123
snowflake/ml/modeling/cluster/birch.py +195 -123
snowflake/ml/modeling/cluster/bisecting_k_means.py +195 -123
snowflake/ml/modeling/cluster/dbscan.py +195 -123
snowflake/ml/modeling/cluster/feature_agglomeration.py +195 -123
snowflake/ml/modeling/cluster/k_means.py +195 -123
snowflake/ml/modeling/cluster/mean_shift.py +195 -123
snowflake/ml/modeling/cluster/mini_batch_k_means.py +195 -123
snowflake/ml/modeling/cluster/optics.py +195 -123
snowflake/ml/modeling/cluster/spectral_biclustering.py +195 -123
snowflake/ml/modeling/cluster/spectral_clustering.py +195 -123
snowflake/ml/modeling/cluster/spectral_coclustering.py +195 -123
snowflake/ml/modeling/compose/column_transformer.py +195 -123
snowflake/ml/modeling/compose/transformed_target_regressor.py +195 -123
snowflake/ml/modeling/covariance/elliptic_envelope.py +195 -123
snowflake/ml/modeling/covariance/empirical_covariance.py +195 -123
snowflake/ml/modeling/covariance/graphical_lasso.py +195 -123
snowflake/ml/modeling/covariance/graphical_lasso_cv.py +195 -123
snowflake/ml/modeling/covariance/ledoit_wolf.py +195 -123
snowflake/ml/modeling/covariance/min_cov_det.py +195 -123
snowflake/ml/modeling/covariance/oas.py +195 -123
snowflake/ml/modeling/covariance/shrunk_covariance.py +195 -123
snowflake/ml/modeling/decomposition/dictionary_learning.py +195 -123
snowflake/ml/modeling/decomposition/factor_analysis.py +195 -123
snowflake/ml/modeling/decomposition/fast_ica.py +195 -123
snowflake/ml/modeling/decomposition/incremental_pca.py +195 -123
snowflake/ml/modeling/decomposition/kernel_pca.py +195 -123
snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +195 -123
snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +195 -123
snowflake/ml/modeling/decomposition/pca.py +195 -123
snowflake/ml/modeling/decomposition/sparse_pca.py +195 -123
snowflake/ml/modeling/decomposition/truncated_svd.py +195 -123
snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +195 -123
snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +195 -123
snowflake/ml/modeling/ensemble/ada_boost_classifier.py +195 -123
snowflake/ml/modeling/ensemble/ada_boost_regressor.py +195 -123
snowflake/ml/modeling/ensemble/bagging_classifier.py +195 -123
snowflake/ml/modeling/ensemble/bagging_regressor.py +195 -123
snowflake/ml/modeling/ensemble/extra_trees_classifier.py +195 -123
snowflake/ml/modeling/ensemble/extra_trees_regressor.py +195 -123
snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +195 -123
snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +195 -123
snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +195 -123
snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +195 -123
snowflake/ml/modeling/ensemble/isolation_forest.py +195 -123
snowflake/ml/modeling/ensemble/random_forest_classifier.py +195 -123
snowflake/ml/modeling/ensemble/random_forest_regressor.py +195 -123
snowflake/ml/modeling/ensemble/stacking_regressor.py +195 -123
snowflake/ml/modeling/ensemble/voting_classifier.py +195 -123
snowflake/ml/modeling/ensemble/voting_regressor.py +195 -123
snowflake/ml/modeling/feature_selection/generic_univariate_select.py +195 -123
snowflake/ml/modeling/feature_selection/select_fdr.py +195 -123
snowflake/ml/modeling/feature_selection/select_fpr.py +195 -123
snowflake/ml/modeling/feature_selection/select_fwe.py +195 -123
snowflake/ml/modeling/feature_selection/select_k_best.py +195 -123
snowflake/ml/modeling/feature_selection/select_percentile.py +195 -123
snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +195 -123
snowflake/ml/modeling/feature_selection/variance_threshold.py +195 -123
snowflake/ml/modeling/framework/_utils.py +8 -1
snowflake/ml/modeling/framework/base.py +9 -1
snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +195 -123
snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +195 -123
snowflake/ml/modeling/impute/iterative_imputer.py +195 -123
snowflake/ml/modeling/impute/knn_imputer.py +195 -123
snowflake/ml/modeling/impute/missing_indicator.py +195 -123
snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +195 -123
snowflake/ml/modeling/kernel_approximation/nystroem.py +195 -123
snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +195 -123
snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +195 -123
snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +195 -123
snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +195 -123
snowflake/ml/modeling/lightgbm/lgbm_classifier.py +195 -123
snowflake/ml/modeling/lightgbm/lgbm_regressor.py +195 -123
snowflake/ml/modeling/linear_model/ard_regression.py +195 -123
snowflake/ml/modeling/linear_model/bayesian_ridge.py +195 -123
snowflake/ml/modeling/linear_model/elastic_net.py +195 -123
snowflake/ml/modeling/linear_model/elastic_net_cv.py +195 -123
snowflake/ml/modeling/linear_model/gamma_regressor.py +195 -123
snowflake/ml/modeling/linear_model/huber_regressor.py +195 -123
snowflake/ml/modeling/linear_model/lars.py +195 -123
snowflake/ml/modeling/linear_model/lars_cv.py +195 -123
snowflake/ml/modeling/linear_model/lasso.py +195 -123
snowflake/ml/modeling/linear_model/lasso_cv.py +195 -123
snowflake/ml/modeling/linear_model/lasso_lars.py +195 -123
snowflake/ml/modeling/linear_model/lasso_lars_cv.py +195 -123
snowflake/ml/modeling/linear_model/lasso_lars_ic.py +195 -123
snowflake/ml/modeling/linear_model/linear_regression.py +195 -123
snowflake/ml/modeling/linear_model/logistic_regression.py +195 -123
snowflake/ml/modeling/linear_model/logistic_regression_cv.py +195 -123
snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +195 -123
snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +195 -123
snowflake/ml/modeling/linear_model/multi_task_lasso.py +195 -123
snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +195 -123
snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +195 -123
snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +195 -123
snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +195 -123
snowflake/ml/modeling/linear_model/perceptron.py +195 -123
snowflake/ml/modeling/linear_model/poisson_regressor.py +195 -123
snowflake/ml/modeling/linear_model/ransac_regressor.py +195 -123
snowflake/ml/modeling/linear_model/ridge.py +195 -123
snowflake/ml/modeling/linear_model/ridge_classifier.py +195 -123
snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +195 -123
snowflake/ml/modeling/linear_model/ridge_cv.py +195 -123
snowflake/ml/modeling/linear_model/sgd_classifier.py +195 -123
snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +195 -123
snowflake/ml/modeling/linear_model/sgd_regressor.py +195 -123
snowflake/ml/modeling/linear_model/theil_sen_regressor.py +195 -123
snowflake/ml/modeling/linear_model/tweedie_regressor.py +195 -123
snowflake/ml/modeling/manifold/isomap.py +195 -123
snowflake/ml/modeling/manifold/mds.py +195 -123
snowflake/ml/modeling/manifold/spectral_embedding.py +195 -123
snowflake/ml/modeling/manifold/tsne.py +195 -123
snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +195 -123
snowflake/ml/modeling/mixture/gaussian_mixture.py +195 -123
snowflake/ml/modeling/model_selection/grid_search_cv.py +42 -18
snowflake/ml/modeling/model_selection/randomized_search_cv.py +42 -18
snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +195 -123
snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +195 -123
snowflake/ml/modeling/multiclass/output_code_classifier.py +195 -123
snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +195 -123
snowflake/ml/modeling/naive_bayes/categorical_nb.py +195 -123
snowflake/ml/modeling/naive_bayes/complement_nb.py +195 -123
snowflake/ml/modeling/naive_bayes/gaussian_nb.py +195 -123
snowflake/ml/modeling/naive_bayes/multinomial_nb.py +195 -123
snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +195 -123
snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +195 -123
snowflake/ml/modeling/neighbors/kernel_density.py +195 -123
snowflake/ml/modeling/neighbors/local_outlier_factor.py +195 -123
snowflake/ml/modeling/neighbors/nearest_centroid.py +195 -123
snowflake/ml/modeling/neighbors/nearest_neighbors.py +195 -123
snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +195 -123
snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +195 -123
snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +195 -123
snowflake/ml/modeling/neural_network/bernoulli_rbm.py +195 -123
snowflake/ml/modeling/neural_network/mlp_classifier.py +195 -123
snowflake/ml/modeling/neural_network/mlp_regressor.py +195 -123
snowflake/ml/modeling/pipeline/pipeline.py +4 -4
snowflake/ml/modeling/preprocessing/binarizer.py +1 -5
snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +1 -5
snowflake/ml/modeling/preprocessing/label_encoder.py +1 -5
snowflake/ml/modeling/preprocessing/max_abs_scaler.py +1 -5
snowflake/ml/modeling/preprocessing/min_max_scaler.py +10 -12
snowflake/ml/modeling/preprocessing/normalizer.py +1 -5
snowflake/ml/modeling/preprocessing/one_hot_encoder.py +1 -5
snowflake/ml/modeling/preprocessing/ordinal_encoder.py +1 -5
snowflake/ml/modeling/preprocessing/polynomial_features.py +195 -123
snowflake/ml/modeling/preprocessing/robust_scaler.py +1 -5
snowflake/ml/modeling/preprocessing/standard_scaler.py +11 -11
snowflake/ml/modeling/semi_supervised/label_propagation.py +195 -123
snowflake/ml/modeling/semi_supervised/label_spreading.py +195 -123
snowflake/ml/modeling/svm/linear_svc.py +195 -123
snowflake/ml/modeling/svm/linear_svr.py +195 -123
snowflake/ml/modeling/svm/nu_svc.py +195 -123
snowflake/ml/modeling/svm/nu_svr.py +195 -123
snowflake/ml/modeling/svm/svc.py +195 -123
snowflake/ml/modeling/svm/svr.py +195 -123
snowflake/ml/modeling/tree/decision_tree_classifier.py +195 -123
snowflake/ml/modeling/tree/decision_tree_regressor.py +195 -123
snowflake/ml/modeling/tree/extra_tree_classifier.py +195 -123
snowflake/ml/modeling/tree/extra_tree_regressor.py +195 -123
snowflake/ml/modeling/xgboost/xgb_classifier.py +195 -123
snowflake/ml/modeling/xgboost/xgb_regressor.py +195 -123
snowflake/ml/modeling/xgboost/xgbrf_classifier.py +195 -123
snowflake/ml/modeling/xgboost/xgbrf_regressor.py +195 -123
snowflake/ml/registry/registry.py +1 -1
snowflake/ml/version.py +1 -1
{snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/METADATA +68 -57
{snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/RECORD +202 -200
snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -97
{snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/LICENSE.txt +0 -0
{snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/WHEEL +0 -0
{snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/top_level.txt +0 -0

snowflake/ml/model/_packager/model_meta/model_meta.py CHANGED Viewed

@@ -23,6 +23,7 @@ from snowflake.ml.model._packager.model_meta import (
     model_meta_schema,
 )
 from snowflake.ml.model._packager.model_meta_migrator import migrator_plans
+from snowflake.ml.model._packager.model_runtime import model_runtime
 MODEL_METADATA_FILE = "model.yaml"
 MODEL_CODE_DIR = "code"
@@ -115,7 +116,6 @@ def create_model_metadata(
         python_version=python_version,
         embed_local_ml_library=embed_local_ml_library,
         legacy_save=legacy_save,
-        relax_version=relax_version,
     )
     if embed_local_ml_library:
@@ -156,6 +156,8 @@ def create_model_metadata(
                     cloudpickle.register_pickle_by_value(mod)
                     imported_modules.append(mod)
         yield model_meta
+        if relax_version:
+            model_meta.env.relax_version()
         model_meta.save(model_dir_path)
     finally:
         for mod in imported_modules:
@@ -169,7 +171,6 @@ def _create_env_for_model_metadata(
     python_version: Optional[str] = None,
     embed_local_ml_library: bool = False,
     legacy_save: bool = False,
-    relax_version: bool = False,
 ) -> model_env.ModelEnv:
     env = model_env.ModelEnv()
@@ -197,10 +198,6 @@ def _create_env_for_model_metadata(
             ],
             check_local_version=True,
         )
-    if relax_version:
-        env.relax_version()
     return env
@@ -237,6 +234,7 @@ class ModelMetadata:
         name: str,
         env: model_env.ModelEnv,
         model_type: model_types.SupportedModelHandlerType,
+        runtimes: Optional[Dict[str, model_runtime.ModelRuntime]] = None,
         signatures: Optional[Dict[str, model_signature.ModelSignature]] = None,
         metadata: Optional[Dict[str, str]] = None,
         creation_timestamp: Optional[str] = None,
@@ -262,6 +260,8 @@ class ModelMetadata:
         if models:
             self.models = models
+        self._runtimes = runtimes
         self.original_metadata_version = original_metadata_version
     @property
@@ -273,6 +273,19 @@ class ModelMetadata:
         parsed_min_snowpark_ml_version = version.parse(min_snowpark_ml_version)
         self._min_snowpark_ml_version = max(self._min_snowpark_ml_version, parsed_min_snowpark_ml_version)
+    @property
+    def runtimes(self) -> Dict[str, model_runtime.ModelRuntime]:
+        if self._runtimes and "cpu" in self._runtimes:
+            return self._runtimes
+        runtimes = {
+            "cpu": model_runtime.ModelRuntime("cpu", self.env),
+        }
+        if self.env.cuda_version:
+            runtimes.update(
+                {"gpu": model_runtime.ModelRuntime("gpu", self.env, is_gpu=True, server_availability_source="conda")}
+            )
+        return runtimes
     def save(self, model_dir_path: str) -> None:
         """Save the model metadata
@@ -291,6 +304,10 @@ class ModelMetadata:
             {
                 "creation_timestamp": self.creation_timestamp,
                 "env": self.env.save_as_dict(pathlib.Path(model_dir_path)),
+                "runtimes": {
+                    runtime_name: runtime.save(pathlib.Path(model_dir_path))
+                    for runtime_name, runtime in self.runtimes.items()
+                },
                 "metadata": self.metadata,
                 "model_type": self.model_type,
                 "models": {model_name: blob.to_dict() for model_name, blob in self.models.items()},
@@ -302,6 +319,7 @@ class ModelMetadata:
         )
         with open(model_yaml_path, "w", encoding="utf-8") as out:
+            yaml.SafeDumper.ignore_aliases = lambda *args: True  # type: ignore[method-assign]
             yaml.safe_dump(
                 model_dict,
                 stream=out,
@@ -330,6 +348,7 @@ class ModelMetadata:
         return model_meta_schema.ModelMetadataDict(
             creation_timestamp=loaded_meta["creation_timestamp"],
             env=loaded_meta["env"],
+            runtimes=loaded_meta.get("runtimes", None),
             metadata=loaded_meta.get("metadata", None),
             model_type=loaded_meta["model_type"],
             models=loaded_meta["models"],
@@ -363,10 +382,21 @@ class ModelMetadata:
         models = {name: model_blob_meta.ModelBlobMeta(**blob_meta) for name, blob_meta in model_dict["models"].items()}
         env = model_env.ModelEnv()
         env.load_from_dict(pathlib.Path(model_dir_path), model_dict["env"])
+        runtimes: Optional[Dict[str, model_runtime.ModelRuntime]]
+        if model_dict.get("runtimes", None):
+            runtimes = {
+                name: model_runtime.ModelRuntime.load(pathlib.Path(model_dir_path), name, env, runtime_dict)
+                for name, runtime_dict in model_dict["runtimes"].items()
+            }
+        else:
+            runtimes = None
         return cls(
             name=model_dict["name"],
             model_type=model_dict["model_type"],
             env=env,
+            runtimes=runtimes,
             signatures=signatures,
             metadata=model_dict.get("metadata", None),
             creation_timestamp=model_dict["creation_timestamp"],

snowflake/ml/model/_packager/model_meta/model_meta_schema.py CHANGED Viewed

@@ -1,7 +1,7 @@
 # This files contains schema definition of what will be written into model.yml
 # Changing this file should lead to a change of the schema version.
-from typing import Any, Dict, Optional, TypedDict, Union
+from typing import Any, Dict, List, Optional, TypedDict, Union
 from typing_extensions import NotRequired, Required
@@ -11,6 +11,16 @@ MODEL_METADATA_VERSION = "2023-12-01"
 MODEL_METADATA_MIN_SNOWPARK_ML_VERSION = "1.0.12"
+class ModelRuntimeDependenciesDict(TypedDict):
+    conda: Required[str]
+    pip: Required[str]
+class ModelRuntimeDict(TypedDict):
+    imports: Required[List[str]]
+    dependencies: Required[ModelRuntimeDependenciesDict]
 class ModelEnvDict(TypedDict):
     conda: Required[str]
     pip: Required[str]
@@ -23,11 +33,19 @@ class BaseModelBlobOptions(TypedDict):
     ...
+class CatBoostModelBlobOptions(BaseModelBlobOptions):
+    catboost_estimator_type: Required[str]
 class HuggingFacePipelineModelBlobOptions(BaseModelBlobOptions):
     task: Required[str]
     batch_size: Required[int]
+class LightGBMModelBlobOptions(BaseModelBlobOptions):
+    lightgbm_estimator_type: Required[str]
 class LLMModelBlobOptions(BaseModelBlobOptions):
     batch_size: Required[int]
@@ -61,6 +79,7 @@ class ModelBlobMetadataDict(TypedDict):
 class ModelMetadataDict(TypedDict):
     creation_timestamp: Required[str]
     env: Required[ModelEnvDict]
+    runtimes: NotRequired[Dict[str, ModelRuntimeDict]]
     metadata: NotRequired[Optional[Dict[str, str]]]
     model_type: Required[type_hints.SupportedModelHandlerType]
     models: Required[Dict[str, ModelBlobMetadataDict]]

snowflake/ml/model/_packager/model_meta_migrator/migrator_plans.py CHANGED Viewed

@@ -3,7 +3,9 @@ from typing import Any, Dict, Type
 from snowflake.ml.model._packager.model_meta import model_meta_schema
 from snowflake.ml.model._packager.model_meta_migrator import base_migrator, migrator_v1
-MODEL_META_MIGRATOR_PLANS: Dict[str, Type[base_migrator.BaseModelMetaMigrator]] = {"1": migrator_v1.MetaMigrator_v1}
+MODEL_META_MIGRATOR_PLANS: Dict[str, Type[base_migrator.BaseModelMetaMigrator]] = {
+    "1": migrator_v1.MetaMigrator_v1,
+}
 def migrate_metadata(loaded_meta: Dict[str, Any]) -> Dict[str, Any]:

snowflake/ml/model/_packager/model_packager.py CHANGED Viewed

@@ -102,8 +102,8 @@ class ModelPackager:
             if signatures is None:
                 logging.info(f"Model signatures are auto inferred as:\n\n{meta.signatures}")
-            self.model = model
-            self.meta = meta
+        self.model = model
+        self.meta = meta
     def load(
         self,

snowflake/ml/model/{_model_composer/model_runtime/_runtime_requirements.py → _packager/model_runtime/_snowml_inference_alternative_requirements.py} RENAMED Viewed

@@ -3,7 +3,7 @@ REQUIREMENTS = [
     "anyio>=3.5.0,<4",
     "numpy>=1.23,<2",
     "packaging>=20.9,<24",
-    "pandas>=1.0.0,<2",
+    "pandas>=1.0.0,<3",
     "pyyaml>=6.0,<7",
     "snowflake-snowpark-python>=1.11.1,<2,!=1.12.0",
     "typing-extensions>=4.1.0,<5"

snowflake/ml/model/_packager/model_runtime/model_runtime.py ADDED Viewed

@@ -0,0 +1,137 @@
+import copy
+import pathlib
+import warnings
+from typing import List, Literal, Optional
+from packaging import requirements
+from snowflake.ml._internal import env as snowml_env, env_utils, file_utils
+from snowflake.ml.model._packager.model_env import model_env
+from snowflake.ml.model._packager.model_meta import model_meta_schema
+from snowflake.ml.model._packager.model_runtime import (
+    _snowml_inference_alternative_requirements,
+)
+_SNOWML_INFERENCE_ALTERNATIVE_DEPENDENCIES = [
+    str(env_utils.get_package_spec_with_supported_ops_only(requirements.Requirement(r)))
+    for r in _snowml_inference_alternative_requirements.REQUIREMENTS
+]
+class ModelRuntime:
+    """Class to represent runtime in a model, which controls the runtime and version, imports and dependencies.
+    Attributes:
+        runtime_env: ModelEnv object representing the actual environment when deploying. The environment is based on
+            the environment from the packaged model with additional dependencies required to deploy.
+        imports: List of files to be imported in the created functions. At least packed model should be imported.
+            If the required Snowpark ML library is not available in the server-side, we will automatically pack the
+            local version as well as "snowflake-ml-python.zip" and added into the imports.
+    """
+    RUNTIME_DIR_REL_PATH = "runtimes"
+    def __init__(
+        self,
+        name: str,
+        env: model_env.ModelEnv,
+        imports: Optional[List[pathlib.PurePosixPath]] = None,
+        is_gpu: bool = False,
+        server_availability_source: Literal["snowflake", "conda"] = "snowflake",
+        loading_from_file: bool = False,
+    ) -> None:
+        self.name = name
+        self.runtime_env = copy.deepcopy(env)
+        self.imports = imports or []
+        if loading_from_file:
+            return
+        snowml_pkg_spec = f"{env_utils.SNOWPARK_ML_PKG_NAME}=={self.runtime_env.snowpark_ml_version}"
+        if self.runtime_env._snowpark_ml_version.local:
+            self.embed_local_ml_library = True
+        else:
+            if server_availability_source == "snowflake":
+                snowml_server_availability = (
+                    len(
+                        env_utils.get_matched_package_versions_in_information_schema_with_active_session(
+                            reqs=[requirements.Requirement(snowml_pkg_spec)],
+                            python_version=snowml_env.PYTHON_VERSION,
+                        ).get(env_utils.SNOWPARK_ML_PKG_NAME, [])
+                    )
+                    >= 1
+                )
+            else:
+                snowml_server_availability = (
+                    len(
+                        env_utils.get_matched_package_versions_in_snowflake_conda_channel(
+                            req=requirements.Requirement(snowml_pkg_spec),
+                            python_version=snowml_env.PYTHON_VERSION,
+                        )
+                    )
+                    >= 1
+                )
+            self.embed_local_ml_library = not snowml_server_availability
+        additional_package = (
+            _SNOWML_INFERENCE_ALTERNATIVE_DEPENDENCIES if self.embed_local_ml_library else [snowml_pkg_spec]
+        )
+        self.runtime_env.include_if_absent(
+            [
+                model_env.ModelDependency(requirement=dep, pip_name=requirements.Requirement(dep).name)
+                for dep in additional_package
+            ],
+        )
+        if is_gpu:
+            self.runtime_env.generate_env_for_cuda()
+    @property
+    def runtime_rel_path(self) -> pathlib.PurePosixPath:
+        return pathlib.PurePosixPath(ModelRuntime.RUNTIME_DIR_REL_PATH) / self.name
+    def save(self, packager_path: pathlib.Path) -> model_meta_schema.ModelRuntimeDict:
+        runtime_base_path = packager_path / self.runtime_rel_path
+        runtime_base_path.mkdir(parents=True, exist_ok=True)
+        if getattr(self, "embed_local_ml_library", False):
+            snowpark_ml_lib_path = runtime_base_path / "snowflake-ml-python.zip"
+            file_utils.zip_python_package(str(snowpark_ml_lib_path), "snowflake.ml")
+            snowpark_ml_lib_rel_path = pathlib.PurePosixPath(snowpark_ml_lib_path.relative_to(packager_path).as_posix())
+            self.imports.append(snowpark_ml_lib_rel_path)
+        self.runtime_env.conda_env_rel_path = self.runtime_rel_path / self.runtime_env.conda_env_rel_path
+        self.runtime_env.pip_requirements_rel_path = self.runtime_rel_path / self.runtime_env.pip_requirements_rel_path
+        env_dict = self.runtime_env.save_as_dict(packager_path)
+        return model_meta_schema.ModelRuntimeDict(
+            imports=list(map(str, self.imports)),
+            dependencies=model_meta_schema.ModelRuntimeDependenciesDict(
+                conda=env_dict["conda"],
+                pip=env_dict["pip"],
+            ),
+        )
+    @staticmethod
+    def load(
+        packager_path: pathlib.Path,
+        name: str,
+        meta_env: model_env.ModelEnv,
+        loaded_dict: model_meta_schema.ModelRuntimeDict,
+    ) -> "ModelRuntime":
+        env = model_env.ModelEnv()
+        env.python_version = meta_env.python_version
+        env.cuda_version = meta_env.cuda_version
+        env.snowpark_ml_version = meta_env.snowpark_ml_version
+        conda_env_rel_path = pathlib.PurePosixPath(loaded_dict["dependencies"]["conda"])
+        pip_requirements_rel_path = pathlib.PurePosixPath(loaded_dict["dependencies"]["pip"])
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            env.load_from_conda_file(packager_path / conda_env_rel_path)
+            env.load_from_pip_file(packager_path / pip_requirements_rel_path)
+        return ModelRuntime(
+            name=name, env=env, imports=list(map(pathlib.PurePosixPath, loaded_dict["imports"])), loading_from_file=True
+        )

snowflake/ml/model/type_hints.py CHANGED Viewed

@@ -19,6 +19,8 @@ from snowflake.ml.model import deploy_platforms
 from snowflake.ml.model._signatures import core
 if TYPE_CHECKING:
+    import catboost
+    import lightgbm
     import mlflow
     import numpy as np
     import pandas as pd
@@ -33,7 +35,6 @@ if TYPE_CHECKING:
     import snowflake.ml.model.custom_model
     import snowflake.ml.model.models.huggingface_pipeline
     import snowflake.ml.model.models.llm
-    import snowflake.ml.model.models.sentence_transformers
     import snowflake.snowpark
     from snowflake.ml.modeling.framework import base  # noqa: F401
@@ -69,6 +70,9 @@ _DataType = TypeVar("_DataType", bound=SupportedDataType)
 CustomModelType = TypeVar("CustomModelType", bound="snowflake.ml.model.custom_model.CustomModel")
 SupportedRequireSignatureModelType = Union[
+    "catboost.CatBoost",
+    "lightgbm.LGBMModel",
+    "lightgbm.Booster",
     "snowflake.ml.model.custom_model.CustomModel",
     "sklearn.base.BaseEstimator",
     "sklearn.pipeline.Pipeline",
@@ -85,7 +89,6 @@ SupportedNoSignatureRequirementsModelType = Union[
     "transformers.Pipeline",
     "sentence_transformers.SentenceTransformer",
     "snowflake.ml.model.models.huggingface_pipeline.HuggingFacePipelineModel",
-    "snowflake.ml.model.models.sentence_transformers.SentenceTransformer",
     "snowflake.ml.model.models.llm.LLM",
 ]
@@ -98,11 +101,14 @@ Here is all acceptable types of Snowflake native model packaging and its handler
 | Type                            | Handler File | Handler             |
 |---------------------------------|--------------|---------------------|
+| catboost.CatBoost       | catboost.py   | _CatBoostModelHandler    |
 | snowflake.ml.model.custom_model.CustomModel | custom.py    | _CustomModelHandler |
 | sklearn.base.BaseEstimator      | sklearn.py   | _SKLModelHandler    |
 | sklearn.pipeline.Pipeline       | sklearn.py   | _SKLModelHandler    |
 | xgboost.XGBModel       | xgboost.py   | _XGBModelHandler    |
 | xgboost.Booster        | xgboost.py   | _XGBModelHandler    |
+| lightgbm.LGBMModel       | lightgbm.py   | _LGBMModelHandler    |
+| lightgbm.Booster        | lightgbm.py   | _LGBMModelHandler    |
 | snowflake.ml.framework.base.BaseEstimator      | snowmlmodel.py   | _SnowMLModelHandler    |
 | torch.nn.Module      | pytroch.py   | _PyTorchHandler    |
 | torch.jit.ScriptModule      | torchscript.py   | _TorchScriptHandler    |
@@ -114,8 +120,10 @@ Here is all acceptable types of Snowflake native model packaging and its handler
 """
 SupportedModelHandlerType = Literal[
+    "catboost",
     "custom",
     "huggingface_pipeline",
+    "lightgbm",
     "mlflow",
     "pytorch",
     "sentence_transformers",
@@ -225,6 +233,11 @@ class BaseModelSaveOption(TypedDict):
     method_options: NotRequired[Dict[str, ModelMethodSaveOptions]]
+class CatBoostModelSaveOptions(BaseModelSaveOption):
+    target_methods: NotRequired[Sequence[str]]
+    cuda_version: NotRequired[str]
 class CustomModelSaveOption(BaseModelSaveOption):
     cuda_version: NotRequired[str]
@@ -238,6 +251,10 @@ class XGBModelSaveOptions(BaseModelSaveOption):
     cuda_version: NotRequired[str]
+class LGBMModelSaveOptions(BaseModelSaveOption):
+    target_methods: NotRequired[Sequence[str]]
 class SNOWModelSaveOptions(BaseModelSaveOption):
     target_methods: NotRequired[Sequence[str]]
@@ -279,7 +296,9 @@ class LLMSaveOptions(BaseModelSaveOption):
 ModelSaveOption = Union[
     BaseModelSaveOption,
+    CatBoostModelSaveOptions,
     CustomModelSaveOption,
+    LGBMModelSaveOptions,
     SKLModelSaveOptions,
     XGBModelSaveOptions,
     SNOWModelSaveOptions,

snowflake/ml/modeling/_internal/estimator_utils.py CHANGED Viewed

@@ -195,21 +195,26 @@ def handle_inference_result(
     shape = transformed_numpy_array.shape
     if len(shape) > 1:
         if shape[1] != len(output_cols):
-            # HeterogeneousEnsemble's transform method produce results with variying shapes
-            # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes).
-            # It is hard to predict the response shape without using fragile introspection logic.
-            # So, to avoid that we are packing the results into a dataframe of shape (n_samples, 1) with
-            # each element being a list.
-            if len(output_cols) != 1:
-                raise TypeError(
-                    "expected_output_cols must be same length as transformed array or should be of length 1."
-                    f"Currently expected_output_cols shape is {len(output_cols)}, "
-                    f"transformed array shape is {shape}. "
-                )
+            # Within UDF, it is not feasible to change the output cols because we need to
+            # query the output cols after UDF by the expected output cols
             if not within_udf:
+                # The following lines are to generate the output cols to match the length of
+                # transformed_numpy_array
                 actual_output_cols = []
                 for i in range(shape[1]):
                     actual_output_cols.append(f"{output_cols[0]}_{i}")
                 output_cols = actual_output_cols
+            else:
+                # HeterogeneousEnsemble's transform method produce results with varying shapes
+                # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes).
+                # It is hard to predict the response shape without using fragile introspection logic.
+                # So, to avoid that we are packing the results into a dataframe of shape (n_samples, 1) with
+                # each element being a list.
+                if len(output_cols) != 1:
+                    raise TypeError(
+                        "expected_output_cols must be same length as transformed array or should be of length 1."
+                        f"Currently expected_output_cols shape is {len(output_cols)}, "
+                        f"transformed array shape is {shape}. "
+                    )
     return transformed_numpy_array, output_cols

snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py CHANGED Viewed

@@ -99,7 +99,10 @@ class PandasTransformHandlers:
                 original_exception=ValueError(
                     "The feature names should match with those that were passed during fit.\n"
                     f"Features seen during fit call but not present in the input: {missing_features}\n"
-                    f"Features in the input dataframe : {input_cols}\n"
+                    f"Features specified with `input_cols` in estimator "
+                    f"{self.estimator.__class__.__name__} in the input dataframe: {input_cols}\n"
+                    f"In your input dataset for current method '{inference_method}', the features are:"
+                    f" {features_in_dataset}."
                 ),
             )
         input_df = dataset[columns_to_select]

snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py CHANGED Viewed

@@ -955,22 +955,21 @@ class DistributedHPOTrainer(SnowparkModelTrainer):
                     X, y, indices, params_to_evaluate, base_estimator, fit_and_score_kwargs = _load_data_into_udf()
                     self.X = X
                     self.y = y
-                    self.indices = indices
+                    self.test_indices = indices
                     self.params_to_evaluate = params_to_evaluate
                     self.base_estimator = base_estimator
                     self.fit_and_score_kwargs = fit_and_score_kwargs
                     self.fit_score_params: List[Any] = []
+                    self.cached_train_test_indices = []
+                    # Calculate the full index here to avoid duplicate calculation (which consumes a lot of memory)
+                    full_index = np.arange(DATA_LENGTH)
+                    for i in range(n_splits):
+                        self.cached_train_test_indices.extend(
+                            [[np.setdiff1d(full_index, self.test_indices[i]), self.test_indices[i]]]
+                        )
                 def process(self, idx: int, params_idx: int, cv_idx: int) -> None:
-                    # 1. Calculate the parameter list
-                    parameters = self.params_to_evaluate[params_idx]
-                    # 2. Calculate the cross validator indices
-                    # cross validator's indices: we stored test indices only (to save space);
-                    # use the full index to re-construct each train index back.
-                    full_index = np.array([i for i in range(DATA_LENGTH)])
-                    test_index = self.indices[cv_idx]
-                    train_index = np.setdiff1d(full_index, test_index)
-                    self.fit_score_params.extend([[idx, (params_idx, parameters), (cv_idx, (train_index, test_index))]])
+                    self.fit_score_params.extend([[idx, params_idx, cv_idx]])
                 def end_partition(self) -> Iterator[Tuple[int, str]]:
                     from sklearn.base import clone
@@ -984,14 +983,14 @@ class DistributedHPOTrainer(SnowparkModelTrainer):
                             clone(self.base_estimator),
                             self.X,
                             self.y,
-                            train=train,
-                            test=test,
-                            parameters=parameters,
+                            train=self.cached_train_test_indices[split_idx][0],
+                            test=self.cached_train_test_indices[split_idx][1],
+                            parameters=self.params_to_evaluate[cand_idx],
                             split_progress=(split_idx, n_splits),
                             candidate_progress=(cand_idx, n_candidates),
                             **self.fit_and_score_kwargs,  # load sample weight here
                         )
-                        for _, (cand_idx, parameters), (split_idx, (train, test)) in self.fit_score_params
+                        for _, cand_idx, split_idx in self.fit_score_params
                     )
                     binary_cv_results = None

snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py CHANGED Viewed

@@ -136,7 +136,7 @@ class SnowparkTransformHandlers:
                 estimator.n_jobs = 1
             inference_res = getattr(estimator, inference_method)(input_df, *args, **kwargs)
-            transformed_numpy_array, output_cols = handle_inference_result(
+            transformed_numpy_array, _ = handle_inference_result(
                 inference_res=inference_res,
                 output_cols=expected_output_cols,
                 inference_method=inference_method,
@@ -144,13 +144,13 @@ class SnowparkTransformHandlers:
             )
             if len(transformed_numpy_array.shape) > 1:
-                if transformed_numpy_array.shape[1] != len(output_cols):
+                if transformed_numpy_array.shape[1] != len(expected_output_cols):
                     series = pd.Series(transformed_numpy_array.tolist())
-                    transformed_pandas_df = pd.DataFrame(series, columns=output_cols)
+                    transformed_pandas_df = pd.DataFrame(series, columns=expected_output_cols)
                 else:
-                    transformed_pandas_df = pd.DataFrame(transformed_numpy_array.tolist(), columns=output_cols)
+                    transformed_pandas_df = pd.DataFrame(transformed_numpy_array.tolist(), columns=expected_output_cols)
             else:
-                transformed_pandas_df = pd.DataFrame(transformed_numpy_array, columns=output_cols)
+                transformed_pandas_df = pd.DataFrame(transformed_numpy_array, columns=expected_output_cols)
             return transformed_pandas_df.to_dict("records")  # type: ignore[no-any-return]

snowflake-ml-python 1.4.0__py3-none-any.whl → 1.4.1__py3-none-any.whl

snowflake-ml-python 1.4.0py3-none-any.whl → 1.4.1py3-none-any.whl