PyPI - snowflake-ml-python - Versions diffs - 1.1.0__py3-none-any.whl → 1.1.2__py3-none-any.whl - Mend

snowflake-ml-python 1.1.0py3-none-any.whl → 1.1.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (225) hide show

snowflake/cortex/_complete.py +1 -1
snowflake/cortex/_extract_answer.py +1 -1
snowflake/cortex/_sentiment.py +1 -1
snowflake/cortex/_summarize.py +1 -1
snowflake/cortex/_translate.py +1 -1
snowflake/ml/_internal/env_utils.py +68 -6
snowflake/ml/_internal/file_utils.py +34 -4
snowflake/ml/_internal/telemetry.py +79 -91
snowflake/ml/_internal/utils/identifier.py +78 -72
snowflake/ml/_internal/utils/retryable_http.py +16 -4
snowflake/ml/_internal/utils/spcs_attribution_utils.py +122 -0
snowflake/ml/dataset/dataset.py +1 -1
snowflake/ml/model/_api.py +21 -14
snowflake/ml/model/_client/model/model_impl.py +176 -0
snowflake/ml/model/_client/model/model_method_info.py +19 -0
snowflake/ml/model/_client/model/model_version_impl.py +291 -0
snowflake/ml/model/_client/ops/metadata_ops.py +107 -0
snowflake/ml/model/_client/ops/model_ops.py +308 -0
snowflake/ml/model/_client/sql/model.py +75 -0
snowflake/ml/model/_client/sql/model_version.py +213 -0
snowflake/ml/model/_client/sql/stage.py +40 -0
snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +3 -4
snowflake/ml/model/_deploy_client/image_builds/templates/image_build_job_spec_template +24 -8
snowflake/ml/model/_deploy_client/image_builds/templates/kaniko_shell_script_template +23 -0
snowflake/ml/model/_deploy_client/snowservice/deploy.py +14 -2
snowflake/ml/model/_deploy_client/utils/constants.py +1 -0
snowflake/ml/model/_deploy_client/warehouse/deploy.py +2 -2
snowflake/ml/model/_model_composer/model_composer.py +31 -9
snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +25 -10
snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +2 -2
snowflake/ml/model/_model_composer/model_method/infer_function.py_template +2 -1
snowflake/ml/model/_model_composer/model_method/model_method.py +34 -3
snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +1 -1
snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +3 -1
snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +10 -28
snowflake/ml/model/_packager/model_meta/model_meta.py +18 -16
snowflake/ml/model/_signatures/snowpark_handler.py +1 -1
snowflake/ml/model/model_signature.py +108 -53
snowflake/ml/model/type_hints.py +1 -0
snowflake/ml/modeling/_internal/distributed_hpo_trainer.py +554 -0
snowflake/ml/modeling/_internal/estimator_protocols.py +1 -60
snowflake/ml/modeling/_internal/model_specifications.py +146 -0
snowflake/ml/modeling/_internal/model_trainer.py +13 -0
snowflake/ml/modeling/_internal/model_trainer_builder.py +78 -0
snowflake/ml/modeling/_internal/pandas_trainer.py +54 -0
snowflake/ml/modeling/_internal/snowpark_handlers.py +6 -760
snowflake/ml/modeling/_internal/snowpark_trainer.py +331 -0
snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +108 -135
snowflake/ml/modeling/cluster/affinity_propagation.py +106 -135
snowflake/ml/modeling/cluster/agglomerative_clustering.py +106 -135
snowflake/ml/modeling/cluster/birch.py +106 -135
snowflake/ml/modeling/cluster/bisecting_k_means.py +106 -135
snowflake/ml/modeling/cluster/dbscan.py +106 -135
snowflake/ml/modeling/cluster/feature_agglomeration.py +106 -135
snowflake/ml/modeling/cluster/k_means.py +105 -135
snowflake/ml/modeling/cluster/mean_shift.py +106 -135
snowflake/ml/modeling/cluster/mini_batch_k_means.py +105 -135
snowflake/ml/modeling/cluster/optics.py +106 -135
snowflake/ml/modeling/cluster/spectral_biclustering.py +106 -135
snowflake/ml/modeling/cluster/spectral_clustering.py +106 -135
snowflake/ml/modeling/cluster/spectral_coclustering.py +106 -135
snowflake/ml/modeling/compose/column_transformer.py +106 -135
snowflake/ml/modeling/compose/transformed_target_regressor.py +108 -135
snowflake/ml/modeling/covariance/elliptic_envelope.py +106 -135
snowflake/ml/modeling/covariance/empirical_covariance.py +99 -128
snowflake/ml/modeling/covariance/graphical_lasso.py +106 -135
snowflake/ml/modeling/covariance/graphical_lasso_cv.py +106 -135
snowflake/ml/modeling/covariance/ledoit_wolf.py +104 -133
snowflake/ml/modeling/covariance/min_cov_det.py +106 -135
snowflake/ml/modeling/covariance/oas.py +99 -128
snowflake/ml/modeling/covariance/shrunk_covariance.py +103 -132
snowflake/ml/modeling/decomposition/dictionary_learning.py +106 -135
snowflake/ml/modeling/decomposition/factor_analysis.py +106 -135
snowflake/ml/modeling/decomposition/fast_ica.py +106 -135
snowflake/ml/modeling/decomposition/incremental_pca.py +106 -135
snowflake/ml/modeling/decomposition/kernel_pca.py +106 -135
snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +106 -135
snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +106 -135
snowflake/ml/modeling/decomposition/pca.py +106 -135
snowflake/ml/modeling/decomposition/sparse_pca.py +106 -135
snowflake/ml/modeling/decomposition/truncated_svd.py +106 -135
snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +108 -135
snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +108 -135
snowflake/ml/modeling/ensemble/ada_boost_classifier.py +108 -135
snowflake/ml/modeling/ensemble/ada_boost_regressor.py +108 -135
snowflake/ml/modeling/ensemble/bagging_classifier.py +108 -135
snowflake/ml/modeling/ensemble/bagging_regressor.py +108 -135
snowflake/ml/modeling/ensemble/extra_trees_classifier.py +108 -135
snowflake/ml/modeling/ensemble/extra_trees_regressor.py +108 -135
snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +108 -135
snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +108 -135
snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +108 -135
snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +108 -135
snowflake/ml/modeling/ensemble/isolation_forest.py +106 -135
snowflake/ml/modeling/ensemble/random_forest_classifier.py +108 -135
snowflake/ml/modeling/ensemble/random_forest_regressor.py +108 -135
snowflake/ml/modeling/ensemble/stacking_regressor.py +108 -135
snowflake/ml/modeling/ensemble/voting_classifier.py +108 -135
snowflake/ml/modeling/ensemble/voting_regressor.py +108 -135
snowflake/ml/modeling/feature_selection/generic_univariate_select.py +101 -128
snowflake/ml/modeling/feature_selection/select_fdr.py +99 -126
snowflake/ml/modeling/feature_selection/select_fpr.py +99 -126
snowflake/ml/modeling/feature_selection/select_fwe.py +99 -126
snowflake/ml/modeling/feature_selection/select_k_best.py +100 -127
snowflake/ml/modeling/feature_selection/select_percentile.py +99 -126
snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +106 -135
snowflake/ml/modeling/feature_selection/variance_threshold.py +95 -124
snowflake/ml/modeling/framework/base.py +83 -1
snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +108 -135
snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +108 -135
snowflake/ml/modeling/impute/iterative_imputer.py +106 -135
snowflake/ml/modeling/impute/knn_imputer.py +106 -135
snowflake/ml/modeling/impute/missing_indicator.py +106 -135
snowflake/ml/modeling/impute/simple_imputer.py +9 -1
snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +96 -125
snowflake/ml/modeling/kernel_approximation/nystroem.py +106 -135
snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +106 -135
snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +105 -134
snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +103 -132
snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +108 -135
snowflake/ml/modeling/lightgbm/lgbm_classifier.py +90 -118
snowflake/ml/modeling/lightgbm/lgbm_regressor.py +90 -118
snowflake/ml/modeling/linear_model/ard_regression.py +108 -135
snowflake/ml/modeling/linear_model/bayesian_ridge.py +108 -135
snowflake/ml/modeling/linear_model/elastic_net.py +108 -135
snowflake/ml/modeling/linear_model/elastic_net_cv.py +108 -135
snowflake/ml/modeling/linear_model/gamma_regressor.py +108 -135
snowflake/ml/modeling/linear_model/huber_regressor.py +108 -135
snowflake/ml/modeling/linear_model/lars.py +108 -135
snowflake/ml/modeling/linear_model/lars_cv.py +108 -135
snowflake/ml/modeling/linear_model/lasso.py +108 -135
snowflake/ml/modeling/linear_model/lasso_cv.py +108 -135
snowflake/ml/modeling/linear_model/lasso_lars.py +108 -135
snowflake/ml/modeling/linear_model/lasso_lars_cv.py +108 -135
snowflake/ml/modeling/linear_model/lasso_lars_ic.py +108 -135
snowflake/ml/modeling/linear_model/linear_regression.py +108 -135
snowflake/ml/modeling/linear_model/logistic_regression.py +108 -135
snowflake/ml/modeling/linear_model/logistic_regression_cv.py +108 -135
snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +108 -135
snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +108 -135
snowflake/ml/modeling/linear_model/multi_task_lasso.py +108 -135
snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +108 -135
snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +108 -135
snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +108 -135
snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +107 -135
snowflake/ml/modeling/linear_model/perceptron.py +107 -135
snowflake/ml/modeling/linear_model/poisson_regressor.py +108 -135
snowflake/ml/modeling/linear_model/ransac_regressor.py +108 -135
snowflake/ml/modeling/linear_model/ridge.py +108 -135
snowflake/ml/modeling/linear_model/ridge_classifier.py +108 -135
snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +108 -135
snowflake/ml/modeling/linear_model/ridge_cv.py +108 -135
snowflake/ml/modeling/linear_model/sgd_classifier.py +108 -135
snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +106 -135
snowflake/ml/modeling/linear_model/sgd_regressor.py +108 -135
snowflake/ml/modeling/linear_model/theil_sen_regressor.py +108 -135
snowflake/ml/modeling/linear_model/tweedie_regressor.py +108 -135
snowflake/ml/modeling/manifold/isomap.py +106 -135
snowflake/ml/modeling/manifold/mds.py +106 -135
snowflake/ml/modeling/manifold/spectral_embedding.py +106 -135
snowflake/ml/modeling/manifold/tsne.py +106 -135
snowflake/ml/modeling/metrics/classification.py +196 -55
snowflake/ml/modeling/metrics/correlation.py +4 -2
snowflake/ml/modeling/metrics/covariance.py +7 -4
snowflake/ml/modeling/metrics/ranking.py +32 -16
snowflake/ml/modeling/metrics/regression.py +60 -32
snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +106 -135
snowflake/ml/modeling/mixture/gaussian_mixture.py +106 -135
snowflake/ml/modeling/model_selection/grid_search_cv.py +91 -148
snowflake/ml/modeling/model_selection/randomized_search_cv.py +93 -154
snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +105 -132
snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +108 -135
snowflake/ml/modeling/multiclass/output_code_classifier.py +108 -135
snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +108 -135
snowflake/ml/modeling/naive_bayes/categorical_nb.py +108 -135
snowflake/ml/modeling/naive_bayes/complement_nb.py +108 -135
snowflake/ml/modeling/naive_bayes/gaussian_nb.py +98 -125
snowflake/ml/modeling/naive_bayes/multinomial_nb.py +107 -134
snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +108 -135
snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +108 -135
snowflake/ml/modeling/neighbors/kernel_density.py +106 -135
snowflake/ml/modeling/neighbors/local_outlier_factor.py +106 -135
snowflake/ml/modeling/neighbors/nearest_centroid.py +108 -135
snowflake/ml/modeling/neighbors/nearest_neighbors.py +106 -135
snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +108 -135
snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +108 -135
snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +108 -135
snowflake/ml/modeling/neural_network/bernoulli_rbm.py +106 -135
snowflake/ml/modeling/neural_network/mlp_classifier.py +108 -135
snowflake/ml/modeling/neural_network/mlp_regressor.py +108 -135
snowflake/ml/modeling/parameters/disable_distributed_hpo.py +2 -6
snowflake/ml/modeling/preprocessing/binarizer.py +25 -8
snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +9 -4
snowflake/ml/modeling/preprocessing/label_encoder.py +31 -11
snowflake/ml/modeling/preprocessing/max_abs_scaler.py +27 -9
snowflake/ml/modeling/preprocessing/min_max_scaler.py +42 -14
snowflake/ml/modeling/preprocessing/normalizer.py +9 -4
snowflake/ml/modeling/preprocessing/one_hot_encoder.py +26 -10
snowflake/ml/modeling/preprocessing/ordinal_encoder.py +37 -13
snowflake/ml/modeling/preprocessing/polynomial_features.py +106 -135
snowflake/ml/modeling/preprocessing/robust_scaler.py +39 -13
snowflake/ml/modeling/preprocessing/standard_scaler.py +36 -12
snowflake/ml/modeling/semi_supervised/label_propagation.py +108 -135
snowflake/ml/modeling/semi_supervised/label_spreading.py +108 -135
snowflake/ml/modeling/svm/linear_svc.py +108 -135
snowflake/ml/modeling/svm/linear_svr.py +108 -135
snowflake/ml/modeling/svm/nu_svc.py +108 -135
snowflake/ml/modeling/svm/nu_svr.py +108 -135
snowflake/ml/modeling/svm/svc.py +108 -135
snowflake/ml/modeling/svm/svr.py +108 -135
snowflake/ml/modeling/tree/decision_tree_classifier.py +108 -135
snowflake/ml/modeling/tree/decision_tree_regressor.py +108 -135
snowflake/ml/modeling/tree/extra_tree_classifier.py +108 -135
snowflake/ml/modeling/tree/extra_tree_regressor.py +108 -135
snowflake/ml/modeling/xgboost/xgb_classifier.py +108 -136
snowflake/ml/modeling/xgboost/xgb_regressor.py +108 -136
snowflake/ml/modeling/xgboost/xgbrf_classifier.py +108 -136
snowflake/ml/modeling/xgboost/xgbrf_regressor.py +108 -136
snowflake/ml/registry/model_registry.py +2 -0
snowflake/ml/registry/registry.py +215 -0
snowflake/ml/version.py +1 -1
{snowflake_ml_python-1.1.0.dist-info → snowflake_ml_python-1.1.2.dist-info}/METADATA +34 -1
snowflake_ml_python-1.1.2.dist-info/RECORD +347 -0
snowflake_ml_python-1.1.0.dist-info/RECORD +0 -331
{snowflake_ml_python-1.1.0.dist-info → snowflake_ml_python-1.1.2.dist-info}/WHEEL +0 -0

snowflake/ml/modeling/model_selection/randomized_search_cv.py CHANGED Viewed

@@ -1,11 +1,11 @@
-from typing import Dict, Iterable, List, Optional, Set, Union
+from typing import Any, Dict, Iterable, List, Optional, Set, Union
 from uuid import uuid4
+import cloudpickle as cp
 import numpy as np
 import pandas as pd
 import sklearn
 import sklearn.model_selection
-from sklearn.model_selection import ParameterSampler
 from sklearn.utils.metaestimators import available_if
 from snowflake.ml._internal import telemetry
@@ -22,13 +22,12 @@ from snowflake.ml.model.model_signature import (
 from snowflake.ml.modeling._internal.estimator_protocols import CVHandlers
 from snowflake.ml.modeling._internal.estimator_utils import (
     gather_dependencies,
-    is_single_node,
     original_estimator_has_callable,
     transform_snowml_obj_to_sklearn_obj,
     validate_sklearn_args,
 )
+from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
 from snowflake.ml.modeling._internal.snowpark_handlers import (
-    SklearnModelSelectionWrapperProvider,
     SnowparkHandlers as HandlersImpl,
 )
 from snowflake.ml.modeling.framework.base import BaseTransformer
@@ -50,13 +49,13 @@ class RandomizedSearchCV(BaseTransformer):
     Parameters
     ----------
-    estimator : estimator object
+    estimator: estimator object
         An object of that type is instantiated for each grid point.
         This is assumed to implement the scikit-learn estimator interface.
         Either estimator needs to provide a ``score`` function,
         or ``scoring`` must be passed.
-    param_distributions : dict or list of dicts
+    param_distributions: dict or list of dicts
         Dictionary with parameters names (`str`) as keys and distributions
         or lists of parameters to try. Distributions must provide a ``rvs``
         method for sampling (such as those from scipy.stats.distributions).
@@ -64,11 +63,46 @@ class RandomizedSearchCV(BaseTransformer):
         If a list of dicts is given, first a dict is sampled uniformly, and
         then a parameter is sampled using that dict as above.
-    n_iter : int, default=10
+    input_cols: Optional[Union[str, List[str]]]
+        A string or list of strings representing column names that contain features.
+        If this parameter is not specified, all columns in the input DataFrame except
+        the columns specified by label_cols and sample-weight_col parameters are
+        considered input columns.
+    label_cols: Optional[Union[str, List[str]]]
+        A string or list of strings representing column names that contain labels.
+        This is a required param for estimators, as there is no way to infer these
+        columns. If this parameter is not specified, then object is fitted without
+        labels(Like a transformer).
+    output_cols: Optional[Union[str, List[str]]]
+        A string or list of strings representing column names that will store the
+        output of predict and transform operations. The length of output_cols mus
+        match the expected number of output columns from the specific estimator or
+        transformer class used.
+        If this parameter is not specified, output column names are derived by
+        adding an OUTPUT_ prefix to the label column names. These inferred output
+        column names work for estimator's predict() method, but output_cols must
+        be set explicitly for transformers.
+    passthrough_cols: A string or a list of strings indicating column names to be excluded from any
+        operations (such as train, transform, or inference). These specified column(s)
+        will remain untouched throughout the process. This option is helpful in scenarios
+        requiring automatic input_cols inference, but need to avoid using specific
+        columns, like index columns, during training or inference.
+    sample_weight_col: Optional[str]
+        A string representing the column name containing the examples’ weights.
+        This argument is only required when working with weighted datasets.
+    drop_input_cols: Optional[bool], default=False
+        If set, the response of predict(), transform() methods will not contain input columns.
+    n_iter: int, default=10
         Number of parameter settings that are sampled. n_iter trades
         off runtime vs quality of the solution.
-    scoring : str, callable, list, tuple or dict, default=None
+    scoring: str, callable, list, tuple or dict, default=None
         Strategy to evaluate the performance of the cross-validated model on
         the test set.
@@ -88,13 +122,13 @@ class RandomizedSearchCV(BaseTransformer):
         If None, the estimator's score method is used.
-    n_jobs : int, default=None
+    n_jobs: int, default=None
         Number of jobs to run in parallel.
         ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
         ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
         for more details.
-    refit : bool, str, or callable, default=True
+    refit: bool, str, or callable, default=True
         Refit an estimator using the best found parameters on the whole
         dataset.
@@ -121,7 +155,7 @@ class RandomizedSearchCV(BaseTransformer):
         See ``scoring`` parameter to know more about multiple metric
         evaluation.
-    cv : int, cross-validation generator or an iterable, default=None
+    cv: int, cross-validation generator or an iterable, default=None
         Determines the cross-validation splitting strategy.
         Possible inputs for cv are:
@@ -138,7 +172,7 @@ class RandomizedSearchCV(BaseTransformer):
         Refer :ref:`User Guide <cross_validation>` for the various
         cross-validation strategies that can be used here.
-    verbose : int
+    verbose: int
         Controls the verbosity: the higher, the more messages.
         - >1 : the computation time for each fold and parameter candidate is
@@ -147,7 +181,7 @@ class RandomizedSearchCV(BaseTransformer):
         - >3 : the fold and candidate parameter indexes are also displayed
           together with the starting time of the computation.
-    pre_dispatch : int, or str, default='2*n_jobs'
+    pre_dispatch: int, or str, default='2*n_jobs'
         Controls the number of jobs that get dispatched during parallel
         execution. Reducing this number can be useful to avoid an
         explosion of memory consumption when more jobs get dispatched
@@ -164,20 +198,20 @@ class RandomizedSearchCV(BaseTransformer):
             - A str, giving an expression as a function of n_jobs,
               as in '2*n_jobs'
-    random_state : int, RandomState instance or None, default=None
+    random_state: int, RandomState instance or None, default=None
         Pseudo random number generator state used for random uniform sampling
         from lists of possible values instead of scipy.stats distributions.
         Pass an int for reproducible output across multiple
         function calls.
         See :term:`Glossary <random_state>`.
-    error_score : 'raise' or numeric, default=np.nan
+    error_score: 'raise' or numeric, default=np.nan
         Value to assign to the score if an error occurs in estimator fitting.
         If set to 'raise', the error is raised. If a numeric value is given,
         FitFailedWarning is raised. This parameter does not affect the refit
         step, which will always raise the error.
-    return_train_score : bool, default=False
+    return_train_score: bool, default=False
         If ``False``, the ``cv_results_`` attribute will not include training
         scores.
         Computing training scores is used to get insights on how different
@@ -185,35 +219,6 @@ class RandomizedSearchCV(BaseTransformer):
         However computing the scores on the training set can be computationally
         expensive and is not strictly required to select the parameters that
         yield the best generalization performance.
-    input_cols : Optional[Union[str, List[str]]]
-        A string or list of strings representing column names that contain features.
-        If this parameter is not specified, all columns in the input DataFrame except
-        the columns specified by label_cols and sample-weight_col parameters are
-        considered input columns.
-    label_cols : Optional[Union[str, List[str]]]
-        A string or list of strings representing column names that contain labels.
-        This is a required param for estimators, as there is no way to infer these
-        columns. If this parameter is not specified, then object is fitted without
-        labels(Like a transformer).
-    output_cols: Optional[Union[str, List[str]]]
-        A string or list of strings representing column names that will store the
-        output of predict and transform operations. The length of output_cols mus
-        match the expected number of output columns from the specific estimator or
-        transformer class used.
-        If this parameter is not specified, output column names are derived by
-        adding an OUTPUT_ prefix to the label column names. These inferred output
-        column names work for estimator's predict() method, but output_cols must
-        be set explicitly for transformers.
-    sample_weight_col: Optional[str]
-        A string representing the column name containing the examples’ weights.
-        This argument is only required when working with weighted datasets.
-    drop_input_cols: Optional[bool], default=False
-        If set, the response of predict(), transform() methods will not contain input columns.
     """
     _ENABLE_DISTRIBUTED = True
@@ -235,11 +240,16 @@ class RandomizedSearchCV(BaseTransformer):
         input_cols: Optional[Union[str, Iterable[str]]] = None,
         output_cols: Optional[Union[str, Iterable[str]]] = None,
         label_cols: Optional[Union[str, Iterable[str]]] = None,
+        passthrough_cols: Optional[Union[str, Iterable[str]]] = None,
         drop_input_cols: Optional[bool] = False,
         sample_weight_col: Optional[str] = None,
     ) -> None:
         super().__init__()
-        deps: Set[str] = set(SklearnModelSelectionWrapperProvider().dependencies)
+        deps: Set[str] = {
+            f"numpy=={np.__version__}",
+            f"scikit-learn=={sklearn.__version__}",
+            f"cloudpickle=={cp.__version__}",
+        }
         deps = deps | gather_dependencies(estimator)
         self._deps = list(deps)
         estimator = transform_snowml_obj_to_sklearn_obj(estimator)
@@ -258,7 +268,7 @@ class RandomizedSearchCV(BaseTransformer):
             "return_train_score": (return_train_score, False, False),
         }
         cleaned_up_init_args = validate_sklearn_args(args=init_args, klass=sklearn.model_selection.RandomizedSearchCV)
-        self._sklearn_object = sklearn.model_selection.RandomizedSearchCV(
+        self._sklearn_object: Any = sklearn.model_selection.RandomizedSearchCV(
             **cleaned_up_init_args,
         )
         self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
@@ -267,10 +277,10 @@ class RandomizedSearchCV(BaseTransformer):
         self.set_label_cols(label_cols)
         self.set_drop_input_cols(drop_input_cols)
         self.set_sample_weight_col(sample_weight_col)
+        self.set_passthrough_cols(passthrough_cols)
         self._handlers: CVHandlers = HandlersImpl(
             class_name=self.__class__.__name__,
             subproject=_SUBPROJECT,
-            wrapper_provider=SklearnModelSelectionWrapperProvider(),
         )
     def _get_rand_id(self) -> str:
@@ -282,21 +292,6 @@ class RandomizedSearchCV(BaseTransformer):
         """
         return str(uuid4()).replace("-", "_").upper()
-    def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
-        """
-        Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
-        Args:
-            dataset: Input dataset.
-        """
-        if not self.input_cols:
-            cols = [c for c in dataset.columns if c not in self.get_label_cols() and c != self.sample_weight_col]
-            self.set_input_cols(input_cols=cols)
-        if not self.output_cols:
-            cols = [identifier.concat_names(ids=["OUTPUT_", c]) for c in self.label_cols]
-            self.set_output_cols(output_cols=cols)
     def _get_active_columns(self) -> List[str]:
         """ "Get the list of columns that are relevant to the transformer."""
         selected_cols = (
@@ -313,10 +308,6 @@ class RandomizedSearchCV(BaseTransformer):
         For more details on this function, see [sklearn.model_selection.RandomizedSearchCV.fit]
         (https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.RandomizedSearchCV.html#sklearn.model_selection.RandomizedSearchCV.fit)
-        Raises:
-            TypeError: Supported dataset types: snowpark.DataFrame, pandas.DataFrame.
         Args:
             dataset: Union[snowflake.snowpark.DataFrame, pandas.DataFrame]
                 Snowpark or Pandas DataFrame.
@@ -325,74 +316,37 @@ class RandomizedSearchCV(BaseTransformer):
             self
         """
         self._infer_input_output_cols(dataset)
-        if isinstance(dataset, pd.DataFrame):
-            self._estimator = self._handlers.fit_pandas(
-                dataset, self._sklearn_object, self.input_cols, self.label_cols, self.sample_weight_col
-            )
-        elif isinstance(dataset, DataFrame):
-            self._fit_snowpark(dataset)
-        else:
-            raise TypeError(
-                f"Unexpected dataset type: {type(dataset)}."
-                "Supported dataset types: snowpark.DataFrame, pandas.DataFrame."
+        if hasattr(self._sklearn_object, "n_jobs") and self._sklearn_object.n_jobs is None:
+            self._sklearn_object.n_jobs = -1
+        if isinstance(dataset, DataFrame):
+            session = dataset._session
+            assert session is not None  # keep mypy happy
+            # Validate that key package version in user workspace are supported in snowflake conda channel
+            # If customer doesn't have package in conda channel, replace the ones have the closest versions
+            self._deps = pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
+                pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT
             )
-        self._is_fitted = True
-        self._get_model_signatures(dataset)
-        return self
-    def _fit_snowpark(self, dataset: DataFrame) -> None:
-        session = dataset._session
-        assert session is not None  # keep mypy happy
-        # Validate that key package version in user workspace are supported in snowflake conda channel
-        # If customer doesn't have package in conda channel, replace the ones have the closest versions
-        self._deps = pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
-            pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT
-        )
+            # Specify input columns so column pruning will be enforced
+            selected_cols = self._get_active_columns()
+            if len(selected_cols) > 0:
+                dataset = dataset.select(selected_cols)
-        selected_cols = self._get_active_columns()
-        if len(selected_cols) > 0:
-            dataset = dataset.select(selected_cols)
+            self._snowpark_cols = dataset.select(self.input_cols).columns
-        assert self._sklearn_object is not None
-        is_distributed = not is_single_node(session) and self._ENABLE_DISTRIBUTED is True
-        if is_distributed:
-            # Set the default value of the `n_jobs` attribute for the estimator.
-            # If minus one is set, it will not be abided by in the UDTF, so we set that to the default value as well.
-            if hasattr(self._sklearn_object.estimator, "n_jobs") and self._sklearn_object.estimator.n_jobs in [
-                None,
-                -1,
-            ]:
-                self._sklearn_object.estimator.n_jobs = DEFAULT_UDTF_NJOBS
-            self._sklearn_object = self._handlers.fit_search_snowpark(
-                param_grid=ParameterSampler(
-                    self._sklearn_object.param_distributions,
-                    n_iter=self._sklearn_object.n_iter,
-                    random_state=self._sklearn_object.random_state,
-                ),
-                dataset=dataset,
-                session=session,
-                estimator=self._sklearn_object,
-                dependencies=self._get_dependencies(),
-                udf_imports=["sklearn"],
-                input_cols=self.input_cols,
-                label_cols=self.label_cols,
-                sample_weight_col=self.sample_weight_col,
-            )
-        else:
-            # Fall back with stored procedure implementation
-            # set the parallel factor to default to minus one, to fully accelerate the cores in single node
-            if self._sklearn_object.n_jobs is None:
-                self._sklearn_object.n_jobs = -1
-            self._sklearn_object = self._handlers.fit_snowpark(
-                dataset,
-                session,
-                self._sklearn_object,
-                ["snowflake-snowpark-python"] + self._get_dependencies(),
-                self.input_cols,
-                self.label_cols,
-                self.sample_weight_col,
-            )
+        model_trainer = ModelTrainerBuilder.build(
+            estimator=self._sklearn_object,
+            dataset=dataset,
+            input_cols=self.input_cols,
+            label_cols=self.label_cols,
+            sample_weight_col=self.sample_weight_col,
+            autogenerated=False,
+            subproject=_SUBPROJECT,
+        )
+        self._sklearn_object = model_trainer.train()
+        self._is_fitted = True
+        self._get_model_signatures(dataset)
+        return self
     def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
         if self._drop_input_cols:
@@ -449,7 +403,7 @@ class RandomizedSearchCV(BaseTransformer):
         # input cols need to match unquoted / quoted
         input_cols = self.input_cols
         unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
-        quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
+        quoted_input_cols = identifier.get_inferred_names(unquoted_input_cols)
         estimator = self._sklearn_object
@@ -546,10 +500,6 @@ class RandomizedSearchCV(BaseTransformer):
         project=_PROJECT,
         subproject=_SUBPROJECT,
     )
-    @telemetry.add_stmt_params_to_df(
-        project=_PROJECT,
-        subproject=_SUBPROJECT,
-    )
     def predict(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[DataFrame, pd.DataFrame]:
         """Call predict on the estimator with the best found parameters
         For more details on this function, see [sklearn.model_selection.RandomizedSearchCV.predict]
@@ -591,10 +541,6 @@ class RandomizedSearchCV(BaseTransformer):
         project=_PROJECT,
         subproject=_SUBPROJECT,
     )
-    @telemetry.add_stmt_params_to_df(
-        project=_PROJECT,
-        subproject=_SUBPROJECT,
-    )
     def transform(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[DataFrame, pd.DataFrame]:
         """Call transform on the estimator with the best found parameters
         For more details on this function, see [sklearn.model_selection.RandomizedSearchCV.transform]
@@ -658,10 +604,6 @@ class RandomizedSearchCV(BaseTransformer):
         project=_PROJECT,
         subproject=_SUBPROJECT,
     )
-    @telemetry.add_stmt_params_to_df(
-        project=_PROJECT,
-        subproject=_SUBPROJECT,
-    )
     def predict_proba(
         self, dataset: Union[DataFrame, pd.DataFrame], output_cols_prefix: str = "predict_proba_"
     ) -> Union[DataFrame, pd.DataFrame]:
@@ -699,10 +641,6 @@ class RandomizedSearchCV(BaseTransformer):
         project=_PROJECT,
         subproject=_SUBPROJECT,
     )
-    @telemetry.add_stmt_params_to_df(
-        project=_PROJECT,
-        subproject=_SUBPROJECT,
-    )
     def predict_log_proba(
         self, dataset: Union[DataFrame, pd.DataFrame], output_cols_prefix: str = "predict_log_proba_"
     ) -> Union[DataFrame, pd.DataFrame]:
@@ -741,10 +679,6 @@ class RandomizedSearchCV(BaseTransformer):
         project=_PROJECT,
         subproject=_SUBPROJECT,
     )
-    @telemetry.add_stmt_params_to_df(
-        project=_PROJECT,
-        subproject=_SUBPROJECT,
-    )
     def decision_function(
         self, dataset: Union[DataFrame, pd.DataFrame], output_cols_prefix: str = "decision_function_"
     ) -> Union[DataFrame, pd.DataFrame]:
@@ -781,6 +715,8 @@ class RandomizedSearchCV(BaseTransformer):
     @available_if(original_estimator_has_callable("score"))  # type: ignore[misc]
     def score(self, dataset: Union[DataFrame, pd.DataFrame]) -> float:
         """
+        If implemented by the original estimator, return the score for the dataset.
         Args:
             dataset: Union[snowflake.snowpark.DataFrame, pandas.DataFrame]
                 Snowpark or Pandas DataFrame.
@@ -833,9 +769,9 @@ class RandomizedSearchCV(BaseTransformer):
             # For classifier, the type of predict is the same as the type of label
             if self._sklearn_object._estimator_type == "classifier":
                 # label columns is the desired type for output
-                outputs = _infer_signature(dataset[self.label_cols], "output")
+                outputs = list(_infer_signature(dataset[self.label_cols], "output"))
                 # rename the output columns
-                outputs = model_signature_utils.rename_features(outputs, self.output_cols)
+                outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
                 self._model_signature_dict["predict"] = ModelSignature(
                     inputs, ([] if self._drop_input_cols else inputs) + outputs
                 )
@@ -872,6 +808,9 @@ class RandomizedSearchCV(BaseTransformer):
         return self._model_signature_dict
     def to_sklearn(self) -> sklearn.model_selection.RandomizedSearchCV:
+        """
+        Get sklearn.model_selection.RandomizedSearchCV object.
+        """
         assert self._sklearn_object is not None
         return self._sklearn_object

snowflake-ml-python 1.1.0__py3-none-any.whl → 1.1.2__py3-none-any.whl

snowflake-ml-python 1.1.0py3-none-any.whl → 1.1.2py3-none-any.whl