PyPI - snowflake-ml-python - Versions diffs - 1.6.4__py3-none-any.whl → 1.7.0__py3-none-any.whl - Mend

snowflake-ml-python 1.6.4py3-none-any.whl → 1.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (149) hide show

snowflake/ml/modeling/cluster/feature_agglomeration.py CHANGED Viewed

@@ -113,28 +113,18 @@ class FeatureAgglomeration(BaseTransformer):
         The number of clusters to find. It must be ``None`` if
         ``distance_threshold`` is not ``None``.
-    affinity: str or callable, default='euclidean'
-        The metric to use when calculating distance between instances in a
-        feature array. If metric is a string or callable, it must be one of
-        the options allowed by :func:`sklearn.metrics.pairwise_distances` for
-        its metric parameter.
-        If linkage is "ward", only "euclidean" is accepted.
-        If "precomputed", a distance matrix (instead of a similarity matrix)
-        is needed as input for the fit method.
-    metric: str or callable, default=None
+    metric: str or callable, default="euclidean"
         Metric used to compute the linkage. Can be "euclidean", "l1", "l2",
-        "manhattan", "cosine", or "precomputed". If set to `None` then
-        "euclidean" is used. If linkage is "ward", only "euclidean" is
-        accepted. If "precomputed", a distance matrix is needed as input for
-        the fit method.
+        "manhattan", "cosine", or "precomputed". If linkage is "ward", only
+        "euclidean" is accepted. If "precomputed", a distance matrix is needed
+        as input for the fit method.
     memory: str or object with the joblib.Memory interface, default=None
         Used to cache the output of the computation of the tree.
         By default, no caching is done. If a string is given, it is the
         path to the caching directory.
-    connectivity: array-like or callable, default=None
+    connectivity: array-like, sparse matrix, or callable, default=None
         Connectivity matrix. Defines for each feature the neighboring
         features following a given structure of the data.
         This can be a connectivity matrix itself or a callable that transforms
@@ -187,8 +177,7 @@ class FeatureAgglomeration(BaseTransformer):
         self,
         *,
         n_clusters=2,
-        affinity="deprecated",
-        metric=None,
+        metric="euclidean",
         memory=None,
         connectivity=None,
         compute_full_tree="auto",
@@ -218,8 +207,7 @@ class FeatureAgglomeration(BaseTransformer):
         self._deps = list(deps)
         init_args = {'n_clusters':(n_clusters, 2, False),
-            'affinity':(affinity, "deprecated", False),
-            'metric':(metric, None, False),
+            'metric':(metric, "euclidean", False),
             'memory':(memory, None, False),
             'connectivity':(connectivity, None, False),
             'compute_full_tree':(compute_full_tree, "auto", False),

snowflake/ml/modeling/cluster/k_means.py CHANGED Viewed

@@ -113,26 +113,24 @@ class KMeans(BaseTransformer):
         The number of clusters to form as well as the number of
         centroids to generate.
+        For an example of how to choose an optimal value for `n_clusters` refer to
+        :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_silhouette_analysis.py`.
     init: {'k-means++', 'random'}, callable or array-like of shape             (n_clusters, n_features), default='k-means++'
         Method for initialization:
-        'k-means++': selects initial cluster centroids using sampling based on
-        an empirical probability distribution of the points' contribution to the
-        overall inertia. This technique speeds up convergence. The algorithm
-        implemented is "greedy k-means++". It differs from the vanilla k-means++
-        by making several trials at each sampling step and choosing the best centroid
-        among them.
+        * 'k-means++': selects initial cluster centroids using sampling             based on an empirical probability distribution of the points'             contribution to the overall inertia. This technique speeds up             convergence. The algorithm implemented is "greedy k-means++". It             differs from the vanilla k-means++ by making several trials at             each sampling step and choosing the best centroid among them.
+        * 'random': choose `n_clusters` observations (rows) at random from         data for the initial centroids.
-        'random': choose `n_clusters` observations (rows) at random from data
-        for the initial centroids.
+        * If an array is passed, it should be of shape (n_clusters, n_features)        and gives the initial centers.
-        If an array is passed, it should be of shape (n_clusters, n_features)
-        and gives the initial centers.
+        * If a callable is passed, it should take arguments X, n_clusters and a        random state and return an initialization.
-        If a callable is passed, it should take arguments X, n_clusters and a
-        random state and return an initialization.
+        For an example of how to use the different `init` strategy, see the example
+        entitled :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_digits.py`.
-    n_init: 'auto' or int, default=10
+    n_init: 'auto' or int, default='auto'
         Number of times the k-means algorithm is run with different centroid
         seeds. The final results is the best output of `n_init` consecutive runs
         in terms of inertia. Several runs are recommended for sparse
@@ -169,15 +167,12 @@ class KMeans(BaseTransformer):
         copy_x is False. If the original data is sparse, but not in CSR format,
         a copy will be made even if copy_x is False.
-    algorithm: {"lloyd", "elkan", "auto", "full"}, default="lloyd"
+    algorithm: {"lloyd", "elkan"}, default="lloyd"
         K-means algorithm to use. The classical EM-style algorithm is `"lloyd"`.
         The `"elkan"` variation can be more efficient on some datasets with
         well-defined clusters, by using the triangle inequality. However it's
         more memory intensive due to the allocation of an extra array of shape
         `(n_samples, n_clusters)`.
-        `"auto"` and `"full"` are deprecated and they will be removed in
-        Scikit-Learn 1.3. They are both aliases for `"lloyd"`.
     """
     def __init__(  # type: ignore[no-untyped-def]
@@ -185,7 +180,7 @@ class KMeans(BaseTransformer):
         *,
         n_clusters=8,
         init="k-means++",
-        n_init="warn",
+        n_init="auto",
         max_iter=300,
         tol=0.0001,
         verbose=0,
@@ -215,7 +210,7 @@ class KMeans(BaseTransformer):
         init_args = {'n_clusters':(n_clusters, 8, False),
             'init':(init, "k-means++", False),
-            'n_init':(n_init, "warn", False),
+            'n_init':(n_init, "auto", False),
             'max_iter':(max_iter, 300, False),
             'tol':(tol, 0.0001, False),
             'verbose':(verbose, 0, False),

snowflake/ml/modeling/cluster/mini_batch_k_means.py CHANGED Viewed

@@ -180,7 +180,7 @@ class MiniBatchKMeans(BaseTransformer):
         If `None`, the heuristic is `init_size = 3 * batch_size` if
         `3 * batch_size < n_clusters`, else `init_size = 3 * n_clusters`.
-    n_init: 'auto' or int, default=3
+    n_init: 'auto' or int, default="auto"
         Number of random initializations that are tried.
         In contrast to KMeans, the algorithm is only run once, using the best of
         the `n_init` initializations as measured by inertia. Several runs are
@@ -213,7 +213,7 @@ class MiniBatchKMeans(BaseTransformer):
         tol=0.0,
         max_no_improvement=10,
         init_size=None,
-        n_init="warn",
+        n_init="auto",
         reassignment_ratio=0.01,
         input_cols: Optional[Union[str, Iterable[str]]] = None,
         output_cols: Optional[Union[str, Iterable[str]]] = None,
@@ -246,7 +246,7 @@ class MiniBatchKMeans(BaseTransformer):
             'tol':(tol, 0.0, False),
             'max_no_improvement':(max_no_improvement, 10, False),
             'init_size':(init_size, None, False),
-            'n_init':(n_init, "warn", False),
+            'n_init':(n_init, "auto", False),
             'reassignment_ratio':(reassignment_ratio, 0.01, False),}
         cleaned_up_init_args = validate_sklearn_args(
             args=init_args,

snowflake/ml/modeling/cluster/optics.py CHANGED Viewed

@@ -189,8 +189,8 @@ class OPTICS(BaseTransformer):
     algorithm: {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'
         Algorithm used to compute the nearest neighbors:
-        - 'ball_tree' will use :class:`BallTree`.
-        - 'kd_tree' will use :class:`KDTree`.
+        - 'ball_tree' will use :class:`~sklearn.neighbors.BallTree`.
+        - 'kd_tree' will use :class:`~sklearn.neighbors.KDTree`.
         - 'brute' will use a brute-force search.
         - 'auto' (default) will attempt to decide the most appropriate
           algorithm based on the values passed to :meth:`fit` method.
@@ -199,10 +199,10 @@ class OPTICS(BaseTransformer):
         this parameter, using brute force.
     leaf_size: int, default=30
-        Leaf size passed to :class:`BallTree` or :class:`KDTree`. This can
-        affect the speed of the construction and query, as well as the memory
-        required to store the tree. The optimal value depends on the
-        nature of the problem.
+        Leaf size passed to :class:`~sklearn.neighbors.BallTree` or
+        :class:`~sklearn.neighbors.KDTree`. This can affect the speed of the
+        construction and query, as well as the memory required to store the
+        tree. The optimal value depends on the nature of the problem.
     memory: str or object with the joblib.Memory interface, default=None
         Used to cache the output of the computation of the tree.

snowflake/ml/modeling/cluster/spectral_clustering.py CHANGED Viewed

@@ -137,7 +137,8 @@ class SpectralClustering(BaseTransformer):
     gamma: float, default=1.0
         Kernel coefficient for rbf, poly, sigmoid, laplacian and chi2 kernels.
-        Ignored for ``affinity='nearest_neighbors'``.
+        Ignored for ``affinity='nearest_neighbors'``, ``affinity='precomputed'``
+        or ``affinity='precomputed_nearest_neighbors'``.
     affinity: str or callable, default='rbf'
         How to construct the affinity matrix.
@@ -151,7 +152,7 @@ class SpectralClustering(BaseTransformer):
            of precomputed distances, and construct a binary affinity matrix
            from the ``n_neighbors`` nearest neighbors of each instance.
          - one of the kernels supported by
-           :func:`~sklearn.metrics.pairwise_kernels`.
+           :func:`~sklearn.metrics.pairwise.pairwise_kernels`.
         Only kernels that produce similarity scores (non-negative values that
         increase with similarity) should be used. This property is not checked
@@ -162,7 +163,7 @@ class SpectralClustering(BaseTransformer):
         the nearest neighbors method. Ignored for ``affinity='rbf'``.
     eigen_tol: float, default="auto"
-        Stopping criterion for eigendecomposition of the Laplacian matrix.
+        Stopping criterion for eigen decomposition of the Laplacian matrix.
         If `eigen_tol="auto"` then the passed tolerance will depend on the
         `eigen_solver`:

snowflake/ml/modeling/compose/column_transformer.py CHANGED Viewed

@@ -171,10 +171,18 @@ class ColumnTransformer(BaseTransformer):
         printed as it is completed.
     verbose_feature_names_out: bool, default=True
-        If True, :meth:`get_feature_names_out` will prefix all feature names
-        with the name of the transformer that generated that feature.
-        If False, :meth:`get_feature_names_out` will not prefix any feature
-        names and will error if feature names are not unique.
+        If True, :meth:`ColumnTransformer.get_feature_names_out` will prefix
+        all feature names with the name of the transformer that generated that
+        feature.
+        If False, :meth:`ColumnTransformer.get_feature_names_out` will not
+        prefix any feature names and will error if feature names are not
+        unique.
+    force_int_remainder_cols: bool, default=True
+        Force the columns of the last entry of `transformers_`, which
+        corresponds to the "remainder" transformer, to always be stored as
+        indices (int) rather than column names (str). See description of the
+        `transformers_` attribute for details.
     """
     def __init__(  # type: ignore[no-untyped-def]
@@ -187,6 +195,7 @@ class ColumnTransformer(BaseTransformer):
         transformer_weights=None,
         verbose=False,
         verbose_feature_names_out=True,
+        force_int_remainder_cols=True,
         input_cols: Optional[Union[str, Iterable[str]]] = None,
         output_cols: Optional[Union[str, Iterable[str]]] = None,
         label_cols: Optional[Union[str, Iterable[str]]] = None,
@@ -214,7 +223,8 @@ class ColumnTransformer(BaseTransformer):
             'n_jobs':(n_jobs, None, False),
             'transformer_weights':(transformer_weights, None, False),
             'verbose':(verbose, False, False),
-            'verbose_feature_names_out':(verbose_feature_names_out, True, False),}
+            'verbose_feature_names_out':(verbose_feature_names_out, True, False),
+            'force_int_remainder_cols':(force_int_remainder_cols, True, False),}
         cleaned_up_init_args = validate_sklearn_args(
             args=init_args,
             klass=sklearn.compose.ColumnTransformer

snowflake/ml/modeling/compose/transformed_target_regressor.py CHANGED Viewed

@@ -125,15 +125,16 @@ class TransformedTargetRegressor(BaseTransformer):
     func: function, default=None
         Function to apply to `y` before passing to :meth:`fit`. Cannot be set
-        at the same time as `transformer`. The function needs to return a
-        2-dimensional array. If `func is None`, the function used will be the
-        identity function.
+        at the same time as `transformer`. If `func is None`, the function used will be
+        the identity function. If `func` is set, `inverse_func` also needs to be
+        provided. The function needs to return a 2-dimensional array.
     inverse_func: function, default=None
         Function to apply to the prediction of the regressor. Cannot be set at
-        the same time as `transformer`. The function needs to return a
-        2-dimensional array. The inverse function is used to return
-        predictions to the same space of the original training labels.
+        the same time as `transformer`. The inverse function is used to return
+        predictions to the same space of the original training labels. If
+        `inverse_func` is set, `func` also needs to be provided. The inverse
+        function needs to return a 2-dimensional array.
     check_inverse: bool, default=True
         Whether to check that `transform` followed by `inverse_transform`

snowflake/ml/modeling/covariance/elliptic_envelope.py CHANGED Viewed

@@ -124,7 +124,7 @@ class EllipticEnvelope(BaseTransformer):
     support_fraction: float, default=None
         The proportion of points to be included in the support of the raw
         MCD estimate. If None, the minimum value of support_fraction will
-        be used within the algorithm: `[n_sample + n_features + 1] / 2`.
+        be used within the algorithm: `(n_samples + n_features + 1) / 2 * n_samples`.
         Range is (0, 1).
     contamination: float, default=0.1

snowflake/ml/modeling/covariance/graphical_lasso_cv.py CHANGED Viewed

@@ -129,7 +129,7 @@ class GraphicalLassoCV(BaseTransformer):
         - :term:`CV splitter`,
         - An iterable yielding (train, test) splits as arrays of indices.
-        For integer/None inputs :class:`KFold` is used.
+        For integer/None inputs :class:`~sklearn.model_selection.KFold` is used.
         Refer :ref:`User Guide <cross_validation>` for the various
         cross-validation strategies that can be used here.

snowflake/ml/modeling/covariance/min_cov_det.py CHANGED Viewed

@@ -125,8 +125,8 @@ class MinCovDet(BaseTransformer):
         The proportion of points to be included in the support of the raw
         MCD estimate. Default is None, which implies that the minimum
         value of support_fraction will be used within the algorithm:
-        `(n_sample + n_features + 1) / 2`. The parameter must be in the range
-        (0, 1].
+        `(n_samples + n_features + 1) / 2 * n_samples`. The parameter must be
+        in the range (0, 1].
     random_state: int, RandomState instance or None, default=None
         Determines the pseudo random number generator for shuffling the data.

snowflake/ml/modeling/covariance/oas.py CHANGED Viewed

@@ -58,7 +58,7 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.covariance".replace("skl
 DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
 class OAS(BaseTransformer):
-    r"""Oracle Approximating Shrinkage Estimator as proposed in [1]_
+    r"""Oracle Approximating Shrinkage Estimator
     For more details on this class, see [sklearn.covariance.OAS]
     (https://scikit-learn.org/stable/modules/generated/sklearn.covariance.OAS.html)

snowflake/ml/modeling/decomposition/kernel_pca.py CHANGED Viewed

@@ -58,7 +58,7 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.decomposition".replace("
 DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
 class KernelPCA(BaseTransformer):
-    r"""Kernel Principal component analysis (KPCA) [1]_
+    r"""Kernel Principal component analysis (KPCA)
     For more details on this class, see [sklearn.decomposition.KernelPCA]
     (https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.KernelPCA.html)
@@ -119,7 +119,7 @@ class KernelPCA(BaseTransformer):
         Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other
         kernels. If ``gamma`` is ``None``, then it is set to ``1/n_features``.
-    degree: int, default=3
+    degree: float, default=3
         Degree for poly kernels. Ignored by other kernels.
     coef0: float, default=1

snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py CHANGED Viewed

@@ -115,13 +115,9 @@ class MiniBatchDictionaryLearning(BaseTransformer):
     alpha: float, default=1
         Sparsity controlling parameter.
-    n_iter: int, default=1000
-        Total number of iterations over data batches to perform.
-    max_iter: int, default=None
+    max_iter: int, default=1_000
         Maximum number of iterations over the complete dataset before
         stopping independently of any early stopping criterion heuristics.
-        If ``max_iter`` is not None, ``n_iter`` is ignored.
     fit_algorithm: {'lars', 'cd'}, default='lars'
         The algorithm used:
@@ -204,15 +200,14 @@ class MiniBatchDictionaryLearning(BaseTransformer):
     tol: float, default=1e-3
         Control early stopping based on the norm of the differences in the
-        dictionary between 2 steps. Used only if `max_iter` is not None.
+        dictionary between 2 steps.
         To disable early stopping based on changes in the dictionary, set
         `tol` to 0.0.
     max_no_improvement: int, default=10
         Control early stopping based on the consecutive number of mini batches
-        that does not yield an improvement on the smoothed cost function. Used only if
-        `max_iter` is not None.
+        that does not yield an improvement on the smoothed cost function.
         To disable convergence detection based on cost function, set
         `max_no_improvement` to None.
@@ -223,8 +218,7 @@ class MiniBatchDictionaryLearning(BaseTransformer):
         *,
         n_components=None,
         alpha=1,
-        n_iter="deprecated",
-        max_iter=None,
+        max_iter=1000,
         fit_algorithm="lars",
         n_jobs=None,
         batch_size=256,
@@ -265,8 +259,7 @@ class MiniBatchDictionaryLearning(BaseTransformer):
         init_args = {'n_components':(n_components, None, False),
             'alpha':(alpha, 1, False),
-            'n_iter':(n_iter, "deprecated", False),
-            'max_iter':(max_iter, None, False),
+            'max_iter':(max_iter, 1000, False),
             'fit_algorithm':(fit_algorithm, "lars", False),
             'n_jobs':(n_jobs, None, False),
             'batch_size':(batch_size, 256, False),

snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py CHANGED Viewed

@@ -121,13 +121,9 @@ class MiniBatchSparsePCA(BaseTransformer):
         Amount of ridge shrinkage to apply in order to improve
         conditioning when calling the transform method.
-    n_iter: int, default=100
-        Number of iterations to perform for each mini batch.
-    max_iter: int, default=None
+    max_iter: int, default=1_000
         Maximum number of iterations over the complete dataset before
         stopping independently of any early stopping criterion heuristics.
-        If `max_iter` is not `None`, `n_iter` is ignored.
     callback: callable, default=None
         Callable that gets invoked every five iterations.
@@ -163,15 +159,14 @@ class MiniBatchSparsePCA(BaseTransformer):
     tol: float, default=1e-3
         Control early stopping based on the norm of the differences in the
-        dictionary between 2 steps. Used only if `max_iter` is not None.
+        dictionary between 2 steps.
         To disable early stopping based on changes in the dictionary, set
         `tol` to 0.0.
     max_no_improvement: int or None, default=10
         Control early stopping based on the consecutive number of mini batches
-        that does not yield an improvement on the smoothed cost function. Used only if
-        `max_iter` is not None.
+        that does not yield an improvement on the smoothed cost function.
         To disable convergence detection based on cost function, set
         `max_no_improvement` to `None`.
@@ -183,8 +178,7 @@ class MiniBatchSparsePCA(BaseTransformer):
         n_components=None,
         alpha=1,
         ridge_alpha=0.01,
-        n_iter="deprecated",
-        max_iter=None,
+        max_iter=1000,
         callback=None,
         batch_size=3,
         verbose=False,
@@ -218,8 +212,7 @@ class MiniBatchSparsePCA(BaseTransformer):
         init_args = {'n_components':(n_components, None, False),
             'alpha':(alpha, 1, False),
             'ridge_alpha':(ridge_alpha, 0.01, False),
-            'n_iter':(n_iter, "deprecated", False),
-            'max_iter':(max_iter, None, False),
+            'max_iter':(max_iter, 1000, False),
             'callback':(callback, None, False),
             'batch_size':(batch_size, 3, False),
             'verbose':(verbose, False, False),

snowflake/ml/modeling/decomposition/pca.py CHANGED Viewed

@@ -145,23 +145,36 @@ class PCA(BaseTransformer):
         improve the predictive accuracy of the downstream estimators by
         making their data respect some hard-wired assumptions.
-    svd_solver: {'auto', 'full', 'arpack', 'randomized'}, default='auto'
-        If auto :
-            The solver is selected by a default policy based on `X.shape` and
-            `n_components`: if the input data is larger than 500x500 and the
-            number of components to extract is lower than 80% of the smallest
-            dimension of the data, then the more efficient 'randomized'
-            method is enabled. Otherwise the exact full SVD is computed and
-            optionally truncated afterwards.
-        If full :
-            run exact full SVD calling the standard LAPACK solver via
+    svd_solver: {'auto', 'full', 'covariance_eigh', 'arpack', 'randomized'},            default='auto'
+        "auto" :
+            The solver is selected by a default 'auto' policy is based on `X.shape` and
+            `n_components`: if the input data has fewer than 1000 features and
+            more than 10 times as many samples, then the "covariance_eigh"
+            solver is used. Otherwise, if the input data is larger than 500x500
+            and the number of components to extract is lower than 80% of the
+            smallest dimension of the data, then the more efficient
+            "randomized" method is selected. Otherwise the exact "full" SVD is
+            computed and optionally truncated afterwards.
+        "full" :
+            Run exact full SVD calling the standard LAPACK solver via
             `scipy.linalg.svd` and select the components by postprocessing
-        If arpack :
-            run SVD truncated to n_components calling ARPACK solver via
+        "covariance_eigh" :
+            Precompute the covariance matrix (on centered data), run a
+            classical eigenvalue decomposition on the covariance matrix
+            typically using LAPACK and select the components by postprocessing.
+            This solver is very efficient for n_samples >> n_features and small
+            n_features. It is, however, not tractable otherwise for large
+            n_features (large memory footprint required to materialize the
+            covariance matrix). Also note that compared to the "full" solver,
+            this solver effectively doubles the condition number and is
+            therefore less numerical stable (e.g. on input data with a large
+            range of singular values).
+        "arpack" :
+            Run SVD truncated to `n_components` calling ARPACK solver via
             `scipy.sparse.linalg.svds`. It requires strictly
-            0 < n_components < min(X.shape)
-        If randomized :
-            run randomized SVD by the method of Halko et al.
+            `0 < n_components < min(X.shape)`
+        "randomized" :
+            Run randomized SVD by the method of Halko et al.
     tol: float, default=0.0
         Tolerance for singular values computed by svd_solver == 'arpack'.

snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py CHANGED Viewed

@@ -128,6 +128,9 @@ class LinearDiscriminantAnalysis(BaseTransformer):
         This should be left to None if `covariance_estimator` is used.
         Note that shrinkage works only with 'lsqr' and 'eigen' solvers.
+        For a usage example, see
+        :ref:`sphx_glr_auto_examples_classification_plot_lda.py`.
     priors: array-like of shape (n_classes,), default=None
         The class prior probabilities. By default, the class proportions are
         inferred from the training data.
@@ -138,6 +141,9 @@ class LinearDiscriminantAnalysis(BaseTransformer):
         min(n_classes - 1, n_features). This parameter only affects the
         `transform` method.
+        For a usage example, see
+        :ref:`sphx_glr_auto_examples_decomposition_plot_pca_vs_lda.py`.
     store_covariance: bool, default=False
         If True, explicitly compute the weighted within-class covariance
         matrix when solver is 'svd'. The matrix is always computed

snowflake/ml/modeling/ensemble/ada_boost_classifier.py CHANGED Viewed

@@ -140,13 +140,6 @@ class AdaBoostClassifier(BaseTransformer):
         Thus, it is only used when `estimator` exposes a `random_state`.
         Pass an int for reproducible output across multiple function calls.
         See :term:`Glossary <random_state>`.
-    base_estimator: object, default=None
-        The base estimator from which the boosted ensemble is built.
-        Support for sample weighting is required, as well as proper
-        ``classes_`` and ``n_classes_`` attributes. If ``None``, then
-        the base estimator is :class:`~sklearn.tree.DecisionTreeClassifier`
-        initialized with `max_depth=1`.
     """
     def __init__(  # type: ignore[no-untyped-def]
@@ -157,7 +150,6 @@ class AdaBoostClassifier(BaseTransformer):
         learning_rate=1.0,
         algorithm="SAMME.R",
         random_state=None,
-        base_estimator="deprecated",
         input_cols: Optional[Union[str, Iterable[str]]] = None,
         output_cols: Optional[Union[str, Iterable[str]]] = None,
         label_cols: Optional[Union[str, Iterable[str]]] = None,
@@ -177,16 +169,13 @@ class AdaBoostClassifier(BaseTransformer):
         self._batch_size = -1
         deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
         deps = deps | gather_dependencies(estimator)
-        deps = deps | gather_dependencies(base_estimator)
         self._deps = list(deps)
         estimator = transform_snowml_obj_to_sklearn_obj(estimator)
-        base_estimator = transform_snowml_obj_to_sklearn_obj(base_estimator)
         init_args = {'estimator':(estimator, None, False),
             'n_estimators':(n_estimators, 50, False),
             'learning_rate':(learning_rate, 1.0, False),
             'algorithm':(algorithm, "SAMME.R", False),
-            'random_state':(random_state, None, False),
-            'base_estimator':(base_estimator, "deprecated", False),}
+            'random_state':(random_state, None, False),}
         cleaned_up_init_args = validate_sklearn_args(
             args=init_args,
             klass=sklearn.ensemble.AdaBoostClassifier

snowflake/ml/modeling/ensemble/ada_boost_regressor.py CHANGED Viewed

@@ -138,12 +138,6 @@ class AdaBoostRegressor(BaseTransformer):
         `estimator` at each boosting iteration.
         Pass an int for reproducible output across multiple function calls.
         See :term:`Glossary <random_state>`.
-    base_estimator: object, default=None
-        The base estimator from which the boosted ensemble is built.
-        If ``None``, then the base estimator is
-        :class:`~sklearn.tree.DecisionTreeRegressor` initialized with
-        `max_depth=3`.
     """
     def __init__(  # type: ignore[no-untyped-def]
@@ -154,7 +148,6 @@ class AdaBoostRegressor(BaseTransformer):
         learning_rate=1.0,
         loss="linear",
         random_state=None,
-        base_estimator="deprecated",
         input_cols: Optional[Union[str, Iterable[str]]] = None,
         output_cols: Optional[Union[str, Iterable[str]]] = None,
         label_cols: Optional[Union[str, Iterable[str]]] = None,
@@ -174,16 +167,13 @@ class AdaBoostRegressor(BaseTransformer):
         self._batch_size = -1
         deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
         deps = deps | gather_dependencies(estimator)
-        deps = deps | gather_dependencies(base_estimator)
         self._deps = list(deps)
         estimator = transform_snowml_obj_to_sklearn_obj(estimator)
-        base_estimator = transform_snowml_obj_to_sklearn_obj(base_estimator)
         init_args = {'estimator':(estimator, None, False),
             'n_estimators':(n_estimators, 50, False),
             'learning_rate':(learning_rate, 1.0, False),
             'loss':(loss, "linear", False),
-            'random_state':(random_state, None, False),
-            'base_estimator':(base_estimator, "deprecated", False),}
+            'random_state':(random_state, None, False),}
         cleaned_up_init_args = validate_sklearn_args(
             args=init_args,
             klass=sklearn.ensemble.AdaBoostRegressor

snowflake/ml/modeling/ensemble/bagging_classifier.py CHANGED Viewed

@@ -164,9 +164,6 @@ class BaggingClassifier(BaseTransformer):
     verbose: int, default=0
         Controls the verbosity when fitting and predicting.
-    base_estimator: object, default="deprecated"
-        Use `estimator` instead.
     """
     def __init__(  # type: ignore[no-untyped-def]
@@ -183,7 +180,6 @@ class BaggingClassifier(BaseTransformer):
         n_jobs=None,
         random_state=None,
         verbose=0,
-        base_estimator="deprecated",
         input_cols: Optional[Union[str, Iterable[str]]] = None,
         output_cols: Optional[Union[str, Iterable[str]]] = None,
         label_cols: Optional[Union[str, Iterable[str]]] = None,
@@ -203,10 +199,8 @@ class BaggingClassifier(BaseTransformer):
         self._batch_size = -1
         deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
         deps = deps | gather_dependencies(estimator)
-        deps = deps | gather_dependencies(base_estimator)
         self._deps = list(deps)
         estimator = transform_snowml_obj_to_sklearn_obj(estimator)
-        base_estimator = transform_snowml_obj_to_sklearn_obj(base_estimator)
         init_args = {'estimator':(estimator, None, False),
             'n_estimators':(n_estimators, 10, False),
             'max_samples':(max_samples, 1.0, False),
@@ -217,8 +211,7 @@ class BaggingClassifier(BaseTransformer):
             'warm_start':(warm_start, False, False),
             'n_jobs':(n_jobs, None, False),
             'random_state':(random_state, None, False),
-            'verbose':(verbose, 0, False),
-            'base_estimator':(base_estimator, "deprecated", False),}
+            'verbose':(verbose, 0, False),}
         cleaned_up_init_args = validate_sklearn_args(
             args=init_args,
             klass=sklearn.ensemble.BaggingClassifier

snowflake-ml-python 1.6.4__py3-none-any.whl → 1.7.0__py3-none-any.whl

snowflake-ml-python 1.6.4py3-none-any.whl → 1.7.0py3-none-any.whl