PyPI - snowflake-ml-python - Versions diffs - 1.1.0__py3-none-any.whl → 1.1.2__py3-none-any.whl - Mend

snowflake-ml-python 1.1.0py3-none-any.whl → 1.1.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (225) hide show

snowflake/cortex/_complete.py +1 -1
snowflake/cortex/_extract_answer.py +1 -1
snowflake/cortex/_sentiment.py +1 -1
snowflake/cortex/_summarize.py +1 -1
snowflake/cortex/_translate.py +1 -1
snowflake/ml/_internal/env_utils.py +68 -6
snowflake/ml/_internal/file_utils.py +34 -4
snowflake/ml/_internal/telemetry.py +79 -91
snowflake/ml/_internal/utils/identifier.py +78 -72
snowflake/ml/_internal/utils/retryable_http.py +16 -4
snowflake/ml/_internal/utils/spcs_attribution_utils.py +122 -0
snowflake/ml/dataset/dataset.py +1 -1
snowflake/ml/model/_api.py +21 -14
snowflake/ml/model/_client/model/model_impl.py +176 -0
snowflake/ml/model/_client/model/model_method_info.py +19 -0
snowflake/ml/model/_client/model/model_version_impl.py +291 -0
snowflake/ml/model/_client/ops/metadata_ops.py +107 -0
snowflake/ml/model/_client/ops/model_ops.py +308 -0
snowflake/ml/model/_client/sql/model.py +75 -0
snowflake/ml/model/_client/sql/model_version.py +213 -0
snowflake/ml/model/_client/sql/stage.py +40 -0
snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +3 -4
snowflake/ml/model/_deploy_client/image_builds/templates/image_build_job_spec_template +24 -8
snowflake/ml/model/_deploy_client/image_builds/templates/kaniko_shell_script_template +23 -0
snowflake/ml/model/_deploy_client/snowservice/deploy.py +14 -2
snowflake/ml/model/_deploy_client/utils/constants.py +1 -0
snowflake/ml/model/_deploy_client/warehouse/deploy.py +2 -2
snowflake/ml/model/_model_composer/model_composer.py +31 -9
snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +25 -10
snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +2 -2
snowflake/ml/model/_model_composer/model_method/infer_function.py_template +2 -1
snowflake/ml/model/_model_composer/model_method/model_method.py +34 -3
snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +1 -1
snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +3 -1
snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +10 -28
snowflake/ml/model/_packager/model_meta/model_meta.py +18 -16
snowflake/ml/model/_signatures/snowpark_handler.py +1 -1
snowflake/ml/model/model_signature.py +108 -53
snowflake/ml/model/type_hints.py +1 -0
snowflake/ml/modeling/_internal/distributed_hpo_trainer.py +554 -0
snowflake/ml/modeling/_internal/estimator_protocols.py +1 -60
snowflake/ml/modeling/_internal/model_specifications.py +146 -0
snowflake/ml/modeling/_internal/model_trainer.py +13 -0
snowflake/ml/modeling/_internal/model_trainer_builder.py +78 -0
snowflake/ml/modeling/_internal/pandas_trainer.py +54 -0
snowflake/ml/modeling/_internal/snowpark_handlers.py +6 -760
snowflake/ml/modeling/_internal/snowpark_trainer.py +331 -0
snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +108 -135
snowflake/ml/modeling/cluster/affinity_propagation.py +106 -135
snowflake/ml/modeling/cluster/agglomerative_clustering.py +106 -135
snowflake/ml/modeling/cluster/birch.py +106 -135
snowflake/ml/modeling/cluster/bisecting_k_means.py +106 -135
snowflake/ml/modeling/cluster/dbscan.py +106 -135
snowflake/ml/modeling/cluster/feature_agglomeration.py +106 -135
snowflake/ml/modeling/cluster/k_means.py +105 -135
snowflake/ml/modeling/cluster/mean_shift.py +106 -135
snowflake/ml/modeling/cluster/mini_batch_k_means.py +105 -135
snowflake/ml/modeling/cluster/optics.py +106 -135
snowflake/ml/modeling/cluster/spectral_biclustering.py +106 -135
snowflake/ml/modeling/cluster/spectral_clustering.py +106 -135
snowflake/ml/modeling/cluster/spectral_coclustering.py +106 -135
snowflake/ml/modeling/compose/column_transformer.py +106 -135
snowflake/ml/modeling/compose/transformed_target_regressor.py +108 -135
snowflake/ml/modeling/covariance/elliptic_envelope.py +106 -135
snowflake/ml/modeling/covariance/empirical_covariance.py +99 -128
snowflake/ml/modeling/covariance/graphical_lasso.py +106 -135
snowflake/ml/modeling/covariance/graphical_lasso_cv.py +106 -135
snowflake/ml/modeling/covariance/ledoit_wolf.py +104 -133
snowflake/ml/modeling/covariance/min_cov_det.py +106 -135
snowflake/ml/modeling/covariance/oas.py +99 -128
snowflake/ml/modeling/covariance/shrunk_covariance.py +103 -132
snowflake/ml/modeling/decomposition/dictionary_learning.py +106 -135
snowflake/ml/modeling/decomposition/factor_analysis.py +106 -135
snowflake/ml/modeling/decomposition/fast_ica.py +106 -135
snowflake/ml/modeling/decomposition/incremental_pca.py +106 -135
snowflake/ml/modeling/decomposition/kernel_pca.py +106 -135
snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +106 -135
snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +106 -135
snowflake/ml/modeling/decomposition/pca.py +106 -135
snowflake/ml/modeling/decomposition/sparse_pca.py +106 -135
snowflake/ml/modeling/decomposition/truncated_svd.py +106 -135
snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +108 -135
snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +108 -135
snowflake/ml/modeling/ensemble/ada_boost_classifier.py +108 -135
snowflake/ml/modeling/ensemble/ada_boost_regressor.py +108 -135
snowflake/ml/modeling/ensemble/bagging_classifier.py +108 -135
snowflake/ml/modeling/ensemble/bagging_regressor.py +108 -135
snowflake/ml/modeling/ensemble/extra_trees_classifier.py +108 -135
snowflake/ml/modeling/ensemble/extra_trees_regressor.py +108 -135
snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +108 -135
snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +108 -135
snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +108 -135
snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +108 -135
snowflake/ml/modeling/ensemble/isolation_forest.py +106 -135
snowflake/ml/modeling/ensemble/random_forest_classifier.py +108 -135
snowflake/ml/modeling/ensemble/random_forest_regressor.py +108 -135
snowflake/ml/modeling/ensemble/stacking_regressor.py +108 -135
snowflake/ml/modeling/ensemble/voting_classifier.py +108 -135
snowflake/ml/modeling/ensemble/voting_regressor.py +108 -135
snowflake/ml/modeling/feature_selection/generic_univariate_select.py +101 -128
snowflake/ml/modeling/feature_selection/select_fdr.py +99 -126
snowflake/ml/modeling/feature_selection/select_fpr.py +99 -126
snowflake/ml/modeling/feature_selection/select_fwe.py +99 -126
snowflake/ml/modeling/feature_selection/select_k_best.py +100 -127
snowflake/ml/modeling/feature_selection/select_percentile.py +99 -126
snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +106 -135
snowflake/ml/modeling/feature_selection/variance_threshold.py +95 -124
snowflake/ml/modeling/framework/base.py +83 -1
snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +108 -135
snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +108 -135
snowflake/ml/modeling/impute/iterative_imputer.py +106 -135
snowflake/ml/modeling/impute/knn_imputer.py +106 -135
snowflake/ml/modeling/impute/missing_indicator.py +106 -135
snowflake/ml/modeling/impute/simple_imputer.py +9 -1
snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +96 -125
snowflake/ml/modeling/kernel_approximation/nystroem.py +106 -135
snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +106 -135
snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +105 -134
snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +103 -132
snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +108 -135
snowflake/ml/modeling/lightgbm/lgbm_classifier.py +90 -118
snowflake/ml/modeling/lightgbm/lgbm_regressor.py +90 -118
snowflake/ml/modeling/linear_model/ard_regression.py +108 -135
snowflake/ml/modeling/linear_model/bayesian_ridge.py +108 -135
snowflake/ml/modeling/linear_model/elastic_net.py +108 -135
snowflake/ml/modeling/linear_model/elastic_net_cv.py +108 -135
snowflake/ml/modeling/linear_model/gamma_regressor.py +108 -135
snowflake/ml/modeling/linear_model/huber_regressor.py +108 -135
snowflake/ml/modeling/linear_model/lars.py +108 -135
snowflake/ml/modeling/linear_model/lars_cv.py +108 -135
snowflake/ml/modeling/linear_model/lasso.py +108 -135
snowflake/ml/modeling/linear_model/lasso_cv.py +108 -135
snowflake/ml/modeling/linear_model/lasso_lars.py +108 -135
snowflake/ml/modeling/linear_model/lasso_lars_cv.py +108 -135
snowflake/ml/modeling/linear_model/lasso_lars_ic.py +108 -135
snowflake/ml/modeling/linear_model/linear_regression.py +108 -135
snowflake/ml/modeling/linear_model/logistic_regression.py +108 -135
snowflake/ml/modeling/linear_model/logistic_regression_cv.py +108 -135
snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +108 -135
snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +108 -135
snowflake/ml/modeling/linear_model/multi_task_lasso.py +108 -135
snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +108 -135
snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +108 -135
snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +108 -135
snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +107 -135
snowflake/ml/modeling/linear_model/perceptron.py +107 -135
snowflake/ml/modeling/linear_model/poisson_regressor.py +108 -135
snowflake/ml/modeling/linear_model/ransac_regressor.py +108 -135
snowflake/ml/modeling/linear_model/ridge.py +108 -135
snowflake/ml/modeling/linear_model/ridge_classifier.py +108 -135
snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +108 -135
snowflake/ml/modeling/linear_model/ridge_cv.py +108 -135
snowflake/ml/modeling/linear_model/sgd_classifier.py +108 -135
snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +106 -135
snowflake/ml/modeling/linear_model/sgd_regressor.py +108 -135
snowflake/ml/modeling/linear_model/theil_sen_regressor.py +108 -135
snowflake/ml/modeling/linear_model/tweedie_regressor.py +108 -135
snowflake/ml/modeling/manifold/isomap.py +106 -135
snowflake/ml/modeling/manifold/mds.py +106 -135
snowflake/ml/modeling/manifold/spectral_embedding.py +106 -135
snowflake/ml/modeling/manifold/tsne.py +106 -135
snowflake/ml/modeling/metrics/classification.py +196 -55
snowflake/ml/modeling/metrics/correlation.py +4 -2
snowflake/ml/modeling/metrics/covariance.py +7 -4
snowflake/ml/modeling/metrics/ranking.py +32 -16
snowflake/ml/modeling/metrics/regression.py +60 -32
snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +106 -135
snowflake/ml/modeling/mixture/gaussian_mixture.py +106 -135
snowflake/ml/modeling/model_selection/grid_search_cv.py +91 -148
snowflake/ml/modeling/model_selection/randomized_search_cv.py +93 -154
snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +105 -132
snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +108 -135
snowflake/ml/modeling/multiclass/output_code_classifier.py +108 -135
snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +108 -135
snowflake/ml/modeling/naive_bayes/categorical_nb.py +108 -135
snowflake/ml/modeling/naive_bayes/complement_nb.py +108 -135
snowflake/ml/modeling/naive_bayes/gaussian_nb.py +98 -125
snowflake/ml/modeling/naive_bayes/multinomial_nb.py +107 -134
snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +108 -135
snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +108 -135
snowflake/ml/modeling/neighbors/kernel_density.py +106 -135
snowflake/ml/modeling/neighbors/local_outlier_factor.py +106 -135
snowflake/ml/modeling/neighbors/nearest_centroid.py +108 -135
snowflake/ml/modeling/neighbors/nearest_neighbors.py +106 -135
snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +108 -135
snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +108 -135
snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +108 -135
snowflake/ml/modeling/neural_network/bernoulli_rbm.py +106 -135
snowflake/ml/modeling/neural_network/mlp_classifier.py +108 -135
snowflake/ml/modeling/neural_network/mlp_regressor.py +108 -135
snowflake/ml/modeling/parameters/disable_distributed_hpo.py +2 -6
snowflake/ml/modeling/preprocessing/binarizer.py +25 -8
snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +9 -4
snowflake/ml/modeling/preprocessing/label_encoder.py +31 -11
snowflake/ml/modeling/preprocessing/max_abs_scaler.py +27 -9
snowflake/ml/modeling/preprocessing/min_max_scaler.py +42 -14
snowflake/ml/modeling/preprocessing/normalizer.py +9 -4
snowflake/ml/modeling/preprocessing/one_hot_encoder.py +26 -10
snowflake/ml/modeling/preprocessing/ordinal_encoder.py +37 -13
snowflake/ml/modeling/preprocessing/polynomial_features.py +106 -135
snowflake/ml/modeling/preprocessing/robust_scaler.py +39 -13
snowflake/ml/modeling/preprocessing/standard_scaler.py +36 -12
snowflake/ml/modeling/semi_supervised/label_propagation.py +108 -135
snowflake/ml/modeling/semi_supervised/label_spreading.py +108 -135
snowflake/ml/modeling/svm/linear_svc.py +108 -135
snowflake/ml/modeling/svm/linear_svr.py +108 -135
snowflake/ml/modeling/svm/nu_svc.py +108 -135
snowflake/ml/modeling/svm/nu_svr.py +108 -135
snowflake/ml/modeling/svm/svc.py +108 -135
snowflake/ml/modeling/svm/svr.py +108 -135
snowflake/ml/modeling/tree/decision_tree_classifier.py +108 -135
snowflake/ml/modeling/tree/decision_tree_regressor.py +108 -135
snowflake/ml/modeling/tree/extra_tree_classifier.py +108 -135
snowflake/ml/modeling/tree/extra_tree_regressor.py +108 -135
snowflake/ml/modeling/xgboost/xgb_classifier.py +108 -136
snowflake/ml/modeling/xgboost/xgb_regressor.py +108 -136
snowflake/ml/modeling/xgboost/xgbrf_classifier.py +108 -136
snowflake/ml/modeling/xgboost/xgbrf_regressor.py +108 -136
snowflake/ml/registry/model_registry.py +2 -0
snowflake/ml/registry/registry.py +215 -0
snowflake/ml/version.py +1 -1
{snowflake_ml_python-1.1.0.dist-info → snowflake_ml_python-1.1.2.dist-info}/METADATA +34 -1
snowflake_ml_python-1.1.2.dist-info/RECORD +347 -0
snowflake_ml_python-1.1.0.dist-info/RECORD +0 -331
{snowflake_ml_python-1.1.0.dist-info → snowflake_ml_python-1.1.2.dist-info}/WHEEL +0 -0

snowflake/ml/modeling/model_selection/grid_search_cv.py CHANGED Viewed

@@ -2,13 +2,13 @@
 # This code is auto-generated using the sklearn_wrapper_template.py_template template.
 # Do not modify the auto-generated code(except automatic reformatting by precommit hooks).
 #
-from typing import Dict, Iterable, List, Optional, Set, Union
+from typing import Any, Dict, Iterable, List, Optional, Set, Union
 from uuid import uuid4
+import cloudpickle as cp
 import numpy as np
 import pandas as pd
 import sklearn.model_selection
-from sklearn.model_selection import ParameterGrid
 from sklearn.utils.metaestimators import available_if
 from snowflake.ml._internal import telemetry
@@ -25,13 +25,12 @@ from snowflake.ml.model.model_signature import (
 from snowflake.ml.modeling._internal.estimator_protocols import CVHandlers
 from snowflake.ml.modeling._internal.estimator_utils import (
     gather_dependencies,
-    is_single_node,
     original_estimator_has_callable,
     transform_snowml_obj_to_sklearn_obj,
     validate_sklearn_args,
 )
+from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
 from snowflake.ml.modeling._internal.snowpark_handlers import (
-    SklearnModelSelectionWrapperProvider,
     SnowparkHandlers as HandlersImpl,
 )
 from snowflake.ml.modeling.framework.base import BaseTransformer
@@ -53,19 +52,54 @@ class GridSearchCV(BaseTransformer):
     Parameters
     ----------
-    estimator : estimator object
+    estimator: estimator object
         This is assumed to implement the scikit-learn estimator interface.
         Either estimator needs to provide a ``score`` function,
         or ``scoring`` must be passed.
-    param_grid : dict or list of dictionaries
+    param_grid: dict or list of dictionaries
         Dictionary with parameters names (`str`) as keys and lists of
         parameter settings to try as values, or a list of such
         dictionaries, in which case the grids spanned by each dictionary
         in the list are explored. This enables searching over any sequence
         of parameter settings.
-    scoring : str, callable, list, tuple or dict, default=None
+    input_cols: Optional[Union[str, List[str]]]
+        A string or list of strings representing column names that contain features.
+        If this parameter is not specified, all columns in the input DataFrame except
+        the columns specified by label_cols and sample-weight_col parameters are
+        considered input columns.
+    label_cols: Optional[Union[str, List[str]]]
+        A string or list of strings representing column names that contain labels.
+        This is a required param for estimators, as there is no way to infer these
+        columns. If this parameter is not specified, then object is fitted without
+        labels(Like a transformer).
+    output_cols: Optional[Union[str, List[str]]]
+        A string or list of strings representing column names that will store the
+        output of predict and transform operations. The length of output_cols mus
+        match the expected number of output columns from the specific estimator or
+        transformer class used.
+        If this parameter is not specified, output column names are derived by
+        adding an OUTPUT_ prefix to the label column names. These inferred output
+        column names work for estimator's predict() method, but output_cols must
+        be set explicitly for transformers.
+    passthrough_cols: A string or a list of strings indicating column names to be excluded from any
+        operations (such as train, transform, or inference). These specified column(s)
+        will remain untouched throughout the process. This option is helpful in scenarios
+        requiring automatic input_cols inference, but need to avoid using specific
+        columns, like index columns, during training or inference.
+    sample_weight_col: Optional[str]
+        A string representing the column name containing the examples’ weights.
+        This argument is only required when working with weighted datasets.
+    drop_input_cols: Optional[bool], default=False
+        If set, the response of predict(), transform() methods will not contain input columns.
+    scoring: str, callable, list, tuple or dict, default=None
         Strategy to evaluate the performance of the cross-validated model on
         the test set.
@@ -83,13 +117,13 @@ class GridSearchCV(BaseTransformer):
         See :ref:`multimetric_grid_search` for an example.
-    n_jobs : int, default=None
+    n_jobs: int, default=None
         Number of jobs to run in parallel.
         ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
         ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
         for more details.
-    refit : bool, str, or callable, default=True
+    refit: bool, str, or callable, default=True
         Refit an estimator using the best found parameters on the whole
         dataset.
@@ -120,7 +154,7 @@ class GridSearchCV(BaseTransformer):
         to see how to design a custom selection strategy using a callable
         via `refit`.
-    cv : int, cross-validation generator or an iterable, default=None
+    cv: int, cross-validation generator or an iterable, default=None
         Determines the cross-validation splitting strategy.
         Possible inputs for cv are:
@@ -137,7 +171,7 @@ class GridSearchCV(BaseTransformer):
         Refer :ref:`User Guide <cross_validation>` for the various
         cross-validation strategies that can be used here.
-    verbose : int
+    verbose: int
         Controls the verbosity: the higher, the more messages.
         - >1 : the computation time for each fold and parameter candidate is
@@ -146,7 +180,7 @@ class GridSearchCV(BaseTransformer):
         - >3 : the fold and candidate parameter indexes are also displayed
           together with the starting time of the computation.
-    pre_dispatch : int, or str, default='2*n_jobs'
+    pre_dispatch: int, or str, default='2*n_jobs'
         Controls the number of jobs that get dispatched during parallel
         execution. Reducing this number can be useful to avoid an
         explosion of memory consumption when more jobs get dispatched
@@ -163,13 +197,13 @@ class GridSearchCV(BaseTransformer):
             - A str, giving an expression as a function of n_jobs,
               as in '2*n_jobs'
-    error_score : 'raise' or numeric, default=np.nan
+    error_score: 'raise' or numeric, default=np.nan
         Value to assign to the score if an error occurs in estimator fitting.
         If set to 'raise', the error is raised. If a numeric value is given,
         FitFailedWarning is raised. This parameter does not affect the refit
         step, which will always raise the error.
-    return_train_score : bool, default=False
+    return_train_score: bool, default=False
         If ``False``, the ``cv_results_`` attribute will not include training
         scores.
         Computing training scores is used to get insights on how different
@@ -177,35 +211,6 @@ class GridSearchCV(BaseTransformer):
         However computing the scores on the training set can be computationally
         expensive and is not strictly required to select the parameters that
         yield the best generalization performance.
-    input_cols : Optional[Union[str, List[str]]]
-        A string or list of strings representing column names that contain features.
-        If this parameter is not specified, all columns in the input DataFrame except
-        the columns specified by label_cols and sample-weight_col parameters are
-        considered input columns.
-    label_cols : Optional[Union[str, List[str]]]
-        A string or list of strings representing column names that contain labels.
-        This is a required param for estimators, as there is no way to infer these
-        columns. If this parameter is not specified, then object is fitted without
-        labels(Like a transformer).
-    output_cols: Optional[Union[str, List[str]]]
-        A string or list of strings representing column names that will store the
-        output of predict and transform operations. The length of output_cols mus
-        match the expected number of output columns from the specific estimator or
-        transformer class used.
-        If this parameter is not specified, output column names are derived by
-        adding an OUTPUT_ prefix to the label column names. These inferred output
-        column names work for estimator's predict() method, but output_cols must
-        be set explicitly for transformers.
-    sample_weight_col: Optional[str]
-        A string representing the column name containing the examples’ weights.
-        This argument is only required when working with weighted datasets.
-    drop_input_cols: Optional[bool], default=False
-        If set, the response of predict(), transform() methods will not contain input columns.
     """
     _ENABLE_DISTRIBUTED = True
@@ -225,11 +230,16 @@ class GridSearchCV(BaseTransformer):
         input_cols: Optional[Union[str, Iterable[str]]] = None,
         output_cols: Optional[Union[str, Iterable[str]]] = None,
         label_cols: Optional[Union[str, Iterable[str]]] = None,
+        passthrough_cols: Optional[Union[str, Iterable[str]]] = None,
         drop_input_cols: Optional[bool] = False,
         sample_weight_col: Optional[str] = None,
     ) -> None:
         super().__init__()
-        deps: Set[str] = set(SklearnModelSelectionWrapperProvider().dependencies)
+        deps: Set[str] = {
+            f"numpy=={np.__version__}",
+            f"scikit-learn=={sklearn.__version__}",
+            f"cloudpickle=={cp.__version__}",
+        }
         deps = deps | gather_dependencies(estimator)
         self._deps = list(deps)
         estimator = transform_snowml_obj_to_sklearn_obj(estimator)
@@ -246,7 +256,7 @@ class GridSearchCV(BaseTransformer):
             "return_train_score": (return_train_score, False, False),
         }
         cleaned_up_init_args = validate_sklearn_args(args=init_args, klass=sklearn.model_selection.GridSearchCV)
-        self._sklearn_object = sklearn.model_selection.GridSearchCV(
+        self._sklearn_object: Any = sklearn.model_selection.GridSearchCV(
             **cleaned_up_init_args,
         )
         self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
@@ -255,10 +265,10 @@ class GridSearchCV(BaseTransformer):
         self.set_label_cols(label_cols)
         self.set_drop_input_cols(drop_input_cols)
         self.set_sample_weight_col(sample_weight_col)
+        self.set_passthrough_cols(passthrough_cols)
         self._handlers: CVHandlers = HandlersImpl(
             class_name=self.__class__.__name__,
             subproject=_SUBPROJECT,
-            wrapper_provider=SklearnModelSelectionWrapperProvider(),
         )
     def _get_rand_id(self) -> str:
@@ -270,21 +280,6 @@ class GridSearchCV(BaseTransformer):
         """
         return str(uuid4()).replace("-", "_").upper()
-    def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
-        """
-        Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
-        Args:
-            dataset: Input dataset.
-        """
-        if not self.input_cols:
-            cols = [c for c in dataset.columns if c not in self.get_label_cols() and c != self.sample_weight_col]
-            self.set_input_cols(input_cols=cols)
-        if not self.output_cols:
-            cols = [identifier.concat_names(ids=["OUTPUT_", c]) for c in self.label_cols]
-            self.set_output_cols(output_cols=cols)
     def _get_active_columns(self) -> List[str]:
         """ "Get the list of columns that are relevant to the transformer."""
         selected_cols = (
@@ -301,10 +296,6 @@ class GridSearchCV(BaseTransformer):
         For more details on this function, see [sklearn.model_selection.GridSearchCV.fit]
         (https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html#sklearn.model_selection.GridSearchCV.fit)
-        Raises:
-            TypeError: Supported dataset types: snowpark.DataFrame, pandas.DataFrame.
         Args:
             dataset: Union[snowflake.snowpark.DataFrame, pandas.DataFrame]
                 Snowpark or Pandas DataFrame.
@@ -313,70 +304,37 @@ class GridSearchCV(BaseTransformer):
             self
         """
         self._infer_input_output_cols(dataset)
-        if isinstance(dataset, pd.DataFrame):
-            self._estimator = self._handlers.fit_pandas(
-                dataset, self._sklearn_object, self.input_cols, self.label_cols, self.sample_weight_col
-            )
-        elif isinstance(dataset, DataFrame):
-            self._fit_snowpark(dataset)
-        else:
-            raise TypeError(
-                f"Unexpected dataset type: {type(dataset)}."
-                "Supported dataset types: snowpark.DataFrame, pandas.DataFrame."
+        if self._sklearn_object.n_jobs is None:
+            self._sklearn_object.n_jobs = -1
+        if isinstance(dataset, DataFrame):
+            session = dataset._session
+            assert session is not None  # keep mypy happy
+            # Validate that key package version in user workspace are supported in snowflake conda channel
+            # If customer doesn't have package in conda channel, replace the ones have the closest versions
+            self._deps = pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
+                pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT
             )
-        self._is_fitted = True
-        self._get_model_signatures(dataset)
-        return self
-    def _fit_snowpark(self, dataset: DataFrame) -> None:
-        session = dataset._session
-        assert session is not None  # keep mypy happy
-        # Validate that key package version in user workspace are supported in snowflake conda channel
-        # If customer doesn't have package in conda channel, replace the ones have the closest versions
-        self._deps = pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
-            pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT
-        )
+            # Specify input columns so column pruning will be enforced
+            selected_cols = self._get_active_columns()
+            if len(selected_cols) > 0:
+                dataset = dataset.select(selected_cols)
-        selected_cols = self._get_active_columns()
-        if len(selected_cols) > 0:
-            dataset = dataset.select(selected_cols)
+            self._snowpark_cols = dataset.select(self.input_cols).columns
-        assert self._sklearn_object is not None
-        is_distributed = not is_single_node(session) and self._ENABLE_DISTRIBUTED is True
-        if is_distributed:
-            # Set the default value of the `n_jobs` attribute for the estimator.
-            # If minus one is set, it will not be abided by in the UDTF, so we set that to the default value as well.
-            if hasattr(self._sklearn_object.estimator, "n_jobs") and self._sklearn_object.estimator.n_jobs in [
-                None,
-                -1,
-            ]:
-                self._sklearn_object.estimator.n_jobs = DEFAULT_UDTF_NJOBS
-            self._sklearn_object = self._handlers.fit_search_snowpark(
-                param_grid=ParameterGrid(self._sklearn_object.param_grid),
-                dataset=dataset,
-                session=session,
-                estimator=self._sklearn_object,
-                dependencies=self._get_dependencies(),
-                udf_imports=["sklearn"],
-                input_cols=self.input_cols,
-                label_cols=self.label_cols,
-                sample_weight_col=self.sample_weight_col,
-            )
-        else:
-            # Fall back with stored procedure implementation
-            # set the parallel factor to default to minus one, to fully accelerate the cores in single node
-            if self._sklearn_object.n_jobs is None:
-                self._sklearn_object.n_jobs = -1
-            self._sklearn_object = self._handlers.fit_snowpark(
-                dataset,
-                session,
-                self._sklearn_object,
-                ["snowflake-snowpark-python"] + self._get_dependencies(),
-                self.input_cols,
-                self.label_cols,
-                self.sample_weight_col,
-            )
+        model_trainer = ModelTrainerBuilder.build(
+            estimator=self._sklearn_object,
+            dataset=dataset,
+            input_cols=self.input_cols,
+            label_cols=self.label_cols,
+            sample_weight_col=self.sample_weight_col,
+            autogenerated=False,
+            subproject=_SUBPROJECT,
+        )
+        self._sklearn_object = model_trainer.train()
+        self._is_fitted = True
+        self._get_model_signatures(dataset)
+        return self
     def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
         if self._drop_input_cols:
@@ -433,7 +391,7 @@ class GridSearchCV(BaseTransformer):
         # input cols need to match unquoted / quoted
         input_cols = self.input_cols
         unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
-        quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
+        quoted_input_cols = identifier.get_inferred_names(unquoted_input_cols)
         estimator = self._sklearn_object
@@ -530,10 +488,6 @@ class GridSearchCV(BaseTransformer):
         project=_PROJECT,
         subproject=_SUBPROJECT,
     )
-    @telemetry.add_stmt_params_to_df(
-        project=_PROJECT,
-        subproject=_SUBPROJECT,
-    )
     def predict(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[DataFrame, pd.DataFrame]:
         """Call predict on the estimator with the best found parameters
         For more details on this function, see [sklearn.model_selection.GridSearchCV.predict]
@@ -576,10 +530,6 @@ class GridSearchCV(BaseTransformer):
         project=_PROJECT,
         subproject=_SUBPROJECT,
     )
-    @telemetry.add_stmt_params_to_df(
-        project=_PROJECT,
-        subproject=_SUBPROJECT,
-    )
     def transform(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[DataFrame, pd.DataFrame]:
         """Call transform on the estimator with the best found parameters
         For more details on this function, see [sklearn.model_selection.GridSearchCV.transform]
@@ -643,10 +593,6 @@ class GridSearchCV(BaseTransformer):
         project=_PROJECT,
         subproject=_SUBPROJECT,
     )
-    @telemetry.add_stmt_params_to_df(
-        project=_PROJECT,
-        subproject=_SUBPROJECT,
-    )
     def predict_proba(
         self, dataset: Union[DataFrame, pd.DataFrame], output_cols_prefix: str = "predict_proba_"
     ) -> Union[DataFrame, pd.DataFrame]:
@@ -684,10 +630,6 @@ class GridSearchCV(BaseTransformer):
         project=_PROJECT,
         subproject=_SUBPROJECT,
     )
-    @telemetry.add_stmt_params_to_df(
-        project=_PROJECT,
-        subproject=_SUBPROJECT,
-    )
     def predict_log_proba(
         self, dataset: Union[DataFrame, pd.DataFrame], output_cols_prefix: str = "predict_log_proba_"
     ) -> Union[DataFrame, pd.DataFrame]:
@@ -726,10 +668,6 @@ class GridSearchCV(BaseTransformer):
         project=_PROJECT,
         subproject=_SUBPROJECT,
     )
-    @telemetry.add_stmt_params_to_df(
-        project=_PROJECT,
-        subproject=_SUBPROJECT,
-    )
     def decision_function(
         self, dataset: Union[DataFrame, pd.DataFrame], output_cols_prefix: str = "decision_function_"
     ) -> Union[DataFrame, pd.DataFrame]:
@@ -766,6 +704,8 @@ class GridSearchCV(BaseTransformer):
     @available_if(original_estimator_has_callable("score"))  # type: ignore[misc]
     def score(self, dataset: Union[DataFrame, pd.DataFrame]) -> float:
         """
+        If implemented by the original estimator, return the score for the dataset.
         Args:
             dataset: Union[snowflake.snowpark.DataFrame, pandas.DataFrame]
                 Snowpark or Pandas DataFrame.
@@ -818,9 +758,9 @@ class GridSearchCV(BaseTransformer):
             # For classifier, the type of predict is the same as the type of label
             if self._sklearn_object._estimator_type == "classifier":
                 # label columns is the desired type for output
-                outputs = _infer_signature(dataset[self.label_cols], "output")
+                outputs = list(_infer_signature(dataset[self.label_cols], "output"))
                 # rename the output columns
-                outputs = model_signature_utils.rename_features(outputs, self.output_cols)
+                outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
                 self._model_signature_dict["predict"] = ModelSignature(
                     inputs, ([] if self._drop_input_cols else inputs) + outputs
                 )
@@ -857,6 +797,9 @@ class GridSearchCV(BaseTransformer):
         return self._model_signature_dict
     def to_sklearn(self) -> sklearn.model_selection.GridSearchCV:
+        """
+        Get sklearn.model_selection.GridSearchCV object.
+        """
         assert self._sklearn_object is not None
         return self._sklearn_object

snowflake-ml-python 1.1.0__py3-none-any.whl → 1.1.2__py3-none-any.whl

snowflake-ml-python 1.1.0py3-none-any.whl → 1.1.2py3-none-any.whl