snowflake-ml-python 1.6.4__py3-none-any.whl → 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/telemetry.py +4 -2
- snowflake/ml/_internal/utils/import_utils.py +31 -0
- snowflake/ml/_internal/utils/snowpark_dataframe_utils.py +13 -0
- snowflake/ml/data/_internal/arrow_ingestor.py +8 -0
- snowflake/ml/data/data_connector.py +1 -1
- snowflake/ml/data/torch_utils.py +33 -14
- snowflake/ml/feature_store/examples/airline_features/features/plane_features.py +5 -3
- snowflake/ml/feature_store/examples/airline_features/features/weather_features.py +7 -5
- snowflake/ml/feature_store/examples/citibike_trip_features/features/station_feature.py +4 -2
- snowflake/ml/feature_store/examples/citibike_trip_features/features/trip_feature.py +3 -1
- snowflake/ml/feature_store/examples/example_helper.py +6 -3
- snowflake/ml/feature_store/examples/new_york_taxi_features/features/location_features.py +4 -2
- snowflake/ml/feature_store/examples/new_york_taxi_features/features/trip_features.py +4 -2
- snowflake/ml/feature_store/examples/wine_quality_features/features/managed_wine_features.py +3 -1
- snowflake/ml/feature_store/examples/wine_quality_features/features/static_wine_features.py +3 -1
- snowflake/ml/feature_store/feature_store.py +1 -2
- snowflake/ml/feature_store/feature_view.py +5 -1
- snowflake/ml/model/_client/model/model_version_impl.py +144 -10
- snowflake/ml/model/_client/ops/model_ops.py +25 -6
- snowflake/ml/model/_client/ops/service_ops.py +33 -28
- snowflake/ml/model/_client/service/model_deployment_spec.py +19 -8
- snowflake/ml/model/_client/service/model_deployment_spec_schema.py +3 -1
- snowflake/ml/model/_client/sql/model.py +14 -0
- snowflake/ml/model/_model_composer/model_composer.py +2 -0
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +4 -0
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +1 -0
- snowflake/ml/model/_model_composer/model_method/model_method.py +1 -1
- snowflake/ml/model/_packager/model_handlers/_utils.py +5 -1
- snowflake/ml/model/_packager/model_handlers/catboost.py +3 -6
- snowflake/ml/model/_packager/model_handlers/custom.py +2 -0
- snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +10 -1
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +3 -6
- snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +8 -1
- snowflake/ml/model/_packager/model_handlers/sklearn.py +3 -6
- snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +7 -65
- snowflake/ml/model/_packager/model_handlers/xgboost.py +10 -40
- snowflake/ml/model/_packager/model_packager.py +0 -11
- snowflake/ml/model/_packager/{model_handlers/model_objective_utils.py → model_task/model_task_utils.py} +13 -25
- snowflake/ml/model/_signatures/pandas_handler.py +16 -0
- snowflake/ml/model/custom_model.py +47 -7
- snowflake/ml/model/model_signature.py +2 -0
- snowflake/ml/model/type_hints.py +8 -0
- snowflake/ml/modeling/_internal/estimator_utils.py +13 -0
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +7 -2
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +16 -5
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +8 -2
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +9 -3
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +1 -8
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +17 -19
- snowflake/ml/modeling/cluster/dbscan.py +5 -2
- snowflake/ml/modeling/cluster/feature_agglomeration.py +7 -19
- snowflake/ml/modeling/cluster/k_means.py +14 -19
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +3 -3
- snowflake/ml/modeling/cluster/optics.py +6 -6
- snowflake/ml/modeling/cluster/spectral_clustering.py +4 -3
- snowflake/ml/modeling/compose/column_transformer.py +15 -5
- snowflake/ml/modeling/compose/transformed_target_regressor.py +7 -6
- snowflake/ml/modeling/covariance/elliptic_envelope.py +1 -1
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +1 -1
- snowflake/ml/modeling/covariance/min_cov_det.py +2 -2
- snowflake/ml/modeling/covariance/oas.py +1 -1
- snowflake/ml/modeling/decomposition/kernel_pca.py +2 -2
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +5 -12
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +5 -12
- snowflake/ml/modeling/decomposition/pca.py +28 -15
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +6 -0
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +1 -12
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +1 -11
- snowflake/ml/modeling/ensemble/bagging_classifier.py +1 -8
- snowflake/ml/modeling/ensemble/bagging_regressor.py +1 -8
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +21 -2
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +18 -2
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +2 -0
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +2 -0
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +21 -8
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +21 -11
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +21 -2
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +18 -2
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +2 -1
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +5 -3
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +2 -2
- snowflake/ml/modeling/linear_model/ard_regression.py +5 -10
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +5 -11
- snowflake/ml/modeling/linear_model/elastic_net.py +3 -0
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +1 -1
- snowflake/ml/modeling/linear_model/lars.py +0 -10
- snowflake/ml/modeling/linear_model/lars_cv.py +1 -11
- snowflake/ml/modeling/linear_model/lasso_cv.py +1 -1
- snowflake/ml/modeling/linear_model/lasso_lars.py +0 -10
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +1 -11
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +0 -10
- snowflake/ml/modeling/linear_model/logistic_regression.py +28 -22
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +30 -24
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +1 -1
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +1 -1
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +4 -13
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +4 -4
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/perceptron.py +3 -3
- snowflake/ml/modeling/linear_model/ransac_regressor.py +3 -2
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +14 -6
- snowflake/ml/modeling/linear_model/ridge_cv.py +17 -11
- snowflake/ml/modeling/linear_model/sgd_classifier.py +2 -2
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +5 -1
- snowflake/ml/modeling/linear_model/sgd_regressor.py +12 -3
- snowflake/ml/modeling/manifold/isomap.py +1 -1
- snowflake/ml/modeling/manifold/mds.py +3 -3
- snowflake/ml/modeling/manifold/tsne.py +10 -4
- snowflake/ml/modeling/metrics/classification.py +12 -16
- snowflake/ml/modeling/metrics/ranking.py +3 -3
- snowflake/ml/modeling/metrics/regression.py +3 -3
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +3 -3
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +3 -3
- snowflake/ml/modeling/naive_bayes/complement_nb.py +3 -3
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +3 -3
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +10 -4
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +5 -2
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +2 -2
- snowflake/ml/modeling/neighbors/nearest_centroid.py +7 -14
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +1 -1
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +6 -1
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +1 -1
- snowflake/ml/modeling/neural_network/mlp_classifier.py +7 -1
- snowflake/ml/modeling/neural_network/mlp_regressor.py +3 -0
- snowflake/ml/modeling/pipeline/pipeline.py +16 -14
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +8 -4
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +9 -7
- snowflake/ml/modeling/svm/linear_svc.py +25 -16
- snowflake/ml/modeling/svm/linear_svr.py +23 -17
- snowflake/ml/modeling/svm/nu_svc.py +5 -3
- snowflake/ml/modeling/svm/nu_svr.py +3 -1
- snowflake/ml/modeling/svm/svc.py +9 -5
- snowflake/ml/modeling/svm/svr.py +3 -1
- snowflake/ml/modeling/tree/decision_tree_classifier.py +21 -2
- snowflake/ml/modeling/tree/decision_tree_regressor.py +18 -2
- snowflake/ml/modeling/tree/extra_tree_classifier.py +28 -9
- snowflake/ml/modeling/tree/extra_tree_regressor.py +18 -2
- snowflake/ml/monitoring/_client/{monitor_sql_client.py → model_monitor_sql_client.py} +1 -1
- snowflake/ml/monitoring/{_client → _manager}/model_monitor_manager.py +9 -8
- snowflake/ml/monitoring/{_client/model_monitor.py → model_monitor.py} +3 -3
- snowflake/ml/registry/_manager/model_manager.py +15 -1
- snowflake/ml/registry/registry.py +15 -8
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.6.4.dist-info → snowflake_ml_python-1.7.0.dist-info}/METADATA +75 -9
- {snowflake_ml_python-1.6.4.dist-info → snowflake_ml_python-1.7.0.dist-info}/RECORD +149 -149
- /snowflake/ml/monitoring/{_client/model_monitor_version.py → model_monitor_version.py} +0 -0
- {snowflake_ml_python-1.6.4.dist-info → snowflake_ml_python-1.7.0.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.6.4.dist-info → snowflake_ml_python-1.7.0.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.6.4.dist-info → snowflake_ml_python-1.7.0.dist-info}/top_level.txt +0 -0
@@ -113,28 +113,18 @@ class FeatureAgglomeration(BaseTransformer):
|
|
113
113
|
The number of clusters to find. It must be ``None`` if
|
114
114
|
``distance_threshold`` is not ``None``.
|
115
115
|
|
116
|
-
|
117
|
-
The metric to use when calculating distance between instances in a
|
118
|
-
feature array. If metric is a string or callable, it must be one of
|
119
|
-
the options allowed by :func:`sklearn.metrics.pairwise_distances` for
|
120
|
-
its metric parameter.
|
121
|
-
If linkage is "ward", only "euclidean" is accepted.
|
122
|
-
If "precomputed", a distance matrix (instead of a similarity matrix)
|
123
|
-
is needed as input for the fit method.
|
124
|
-
|
125
|
-
metric: str or callable, default=None
|
116
|
+
metric: str or callable, default="euclidean"
|
126
117
|
Metric used to compute the linkage. Can be "euclidean", "l1", "l2",
|
127
|
-
"manhattan", "cosine", or "precomputed". If
|
128
|
-
"euclidean" is
|
129
|
-
|
130
|
-
the fit method.
|
118
|
+
"manhattan", "cosine", or "precomputed". If linkage is "ward", only
|
119
|
+
"euclidean" is accepted. If "precomputed", a distance matrix is needed
|
120
|
+
as input for the fit method.
|
131
121
|
|
132
122
|
memory: str or object with the joblib.Memory interface, default=None
|
133
123
|
Used to cache the output of the computation of the tree.
|
134
124
|
By default, no caching is done. If a string is given, it is the
|
135
125
|
path to the caching directory.
|
136
126
|
|
137
|
-
connectivity: array-like or callable, default=None
|
127
|
+
connectivity: array-like, sparse matrix, or callable, default=None
|
138
128
|
Connectivity matrix. Defines for each feature the neighboring
|
139
129
|
features following a given structure of the data.
|
140
130
|
This can be a connectivity matrix itself or a callable that transforms
|
@@ -187,8 +177,7 @@ class FeatureAgglomeration(BaseTransformer):
|
|
187
177
|
self,
|
188
178
|
*,
|
189
179
|
n_clusters=2,
|
190
|
-
|
191
|
-
metric=None,
|
180
|
+
metric="euclidean",
|
192
181
|
memory=None,
|
193
182
|
connectivity=None,
|
194
183
|
compute_full_tree="auto",
|
@@ -218,8 +207,7 @@ class FeatureAgglomeration(BaseTransformer):
|
|
218
207
|
self._deps = list(deps)
|
219
208
|
|
220
209
|
init_args = {'n_clusters':(n_clusters, 2, False),
|
221
|
-
'
|
222
|
-
'metric':(metric, None, False),
|
210
|
+
'metric':(metric, "euclidean", False),
|
223
211
|
'memory':(memory, None, False),
|
224
212
|
'connectivity':(connectivity, None, False),
|
225
213
|
'compute_full_tree':(compute_full_tree, "auto", False),
|
@@ -113,26 +113,24 @@ class KMeans(BaseTransformer):
|
|
113
113
|
The number of clusters to form as well as the number of
|
114
114
|
centroids to generate.
|
115
115
|
|
116
|
+
For an example of how to choose an optimal value for `n_clusters` refer to
|
117
|
+
:ref:`sphx_glr_auto_examples_cluster_plot_kmeans_silhouette_analysis.py`.
|
118
|
+
|
116
119
|
init: {'k-means++', 'random'}, callable or array-like of shape (n_clusters, n_features), default='k-means++'
|
117
120
|
Method for initialization:
|
118
121
|
|
119
|
-
'k-means++': selects initial cluster centroids using sampling
|
120
|
-
|
121
|
-
|
122
|
-
implemented is "greedy k-means++". It differs from the vanilla k-means++
|
123
|
-
by making several trials at each sampling step and choosing the best centroid
|
124
|
-
among them.
|
122
|
+
* 'k-means++': selects initial cluster centroids using sampling based on an empirical probability distribution of the points' contribution to the overall inertia. This technique speeds up convergence. The algorithm implemented is "greedy k-means++". It differs from the vanilla k-means++ by making several trials at each sampling step and choosing the best centroid among them.
|
123
|
+
|
124
|
+
* 'random': choose `n_clusters` observations (rows) at random from data for the initial centroids.
|
125
125
|
|
126
|
-
|
127
|
-
for the initial centroids.
|
126
|
+
* If an array is passed, it should be of shape (n_clusters, n_features) and gives the initial centers.
|
128
127
|
|
129
|
-
If
|
130
|
-
and gives the initial centers.
|
128
|
+
* If a callable is passed, it should take arguments X, n_clusters and a random state and return an initialization.
|
131
129
|
|
132
|
-
|
133
|
-
|
130
|
+
For an example of how to use the different `init` strategy, see the example
|
131
|
+
entitled :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_digits.py`.
|
134
132
|
|
135
|
-
n_init: 'auto' or int, default=
|
133
|
+
n_init: 'auto' or int, default='auto'
|
136
134
|
Number of times the k-means algorithm is run with different centroid
|
137
135
|
seeds. The final results is the best output of `n_init` consecutive runs
|
138
136
|
in terms of inertia. Several runs are recommended for sparse
|
@@ -169,15 +167,12 @@ class KMeans(BaseTransformer):
|
|
169
167
|
copy_x is False. If the original data is sparse, but not in CSR format,
|
170
168
|
a copy will be made even if copy_x is False.
|
171
169
|
|
172
|
-
algorithm: {"lloyd", "elkan"
|
170
|
+
algorithm: {"lloyd", "elkan"}, default="lloyd"
|
173
171
|
K-means algorithm to use. The classical EM-style algorithm is `"lloyd"`.
|
174
172
|
The `"elkan"` variation can be more efficient on some datasets with
|
175
173
|
well-defined clusters, by using the triangle inequality. However it's
|
176
174
|
more memory intensive due to the allocation of an extra array of shape
|
177
175
|
`(n_samples, n_clusters)`.
|
178
|
-
|
179
|
-
`"auto"` and `"full"` are deprecated and they will be removed in
|
180
|
-
Scikit-Learn 1.3. They are both aliases for `"lloyd"`.
|
181
176
|
"""
|
182
177
|
|
183
178
|
def __init__( # type: ignore[no-untyped-def]
|
@@ -185,7 +180,7 @@ class KMeans(BaseTransformer):
|
|
185
180
|
*,
|
186
181
|
n_clusters=8,
|
187
182
|
init="k-means++",
|
188
|
-
n_init="
|
183
|
+
n_init="auto",
|
189
184
|
max_iter=300,
|
190
185
|
tol=0.0001,
|
191
186
|
verbose=0,
|
@@ -215,7 +210,7 @@ class KMeans(BaseTransformer):
|
|
215
210
|
|
216
211
|
init_args = {'n_clusters':(n_clusters, 8, False),
|
217
212
|
'init':(init, "k-means++", False),
|
218
|
-
'n_init':(n_init, "
|
213
|
+
'n_init':(n_init, "auto", False),
|
219
214
|
'max_iter':(max_iter, 300, False),
|
220
215
|
'tol':(tol, 0.0001, False),
|
221
216
|
'verbose':(verbose, 0, False),
|
@@ -180,7 +180,7 @@ class MiniBatchKMeans(BaseTransformer):
|
|
180
180
|
If `None`, the heuristic is `init_size = 3 * batch_size` if
|
181
181
|
`3 * batch_size < n_clusters`, else `init_size = 3 * n_clusters`.
|
182
182
|
|
183
|
-
n_init: 'auto' or int, default=
|
183
|
+
n_init: 'auto' or int, default="auto"
|
184
184
|
Number of random initializations that are tried.
|
185
185
|
In contrast to KMeans, the algorithm is only run once, using the best of
|
186
186
|
the `n_init` initializations as measured by inertia. Several runs are
|
@@ -213,7 +213,7 @@ class MiniBatchKMeans(BaseTransformer):
|
|
213
213
|
tol=0.0,
|
214
214
|
max_no_improvement=10,
|
215
215
|
init_size=None,
|
216
|
-
n_init="
|
216
|
+
n_init="auto",
|
217
217
|
reassignment_ratio=0.01,
|
218
218
|
input_cols: Optional[Union[str, Iterable[str]]] = None,
|
219
219
|
output_cols: Optional[Union[str, Iterable[str]]] = None,
|
@@ -246,7 +246,7 @@ class MiniBatchKMeans(BaseTransformer):
|
|
246
246
|
'tol':(tol, 0.0, False),
|
247
247
|
'max_no_improvement':(max_no_improvement, 10, False),
|
248
248
|
'init_size':(init_size, None, False),
|
249
|
-
'n_init':(n_init, "
|
249
|
+
'n_init':(n_init, "auto", False),
|
250
250
|
'reassignment_ratio':(reassignment_ratio, 0.01, False),}
|
251
251
|
cleaned_up_init_args = validate_sklearn_args(
|
252
252
|
args=init_args,
|
@@ -189,8 +189,8 @@ class OPTICS(BaseTransformer):
|
|
189
189
|
algorithm: {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'
|
190
190
|
Algorithm used to compute the nearest neighbors:
|
191
191
|
|
192
|
-
- 'ball_tree' will use :class
|
193
|
-
- 'kd_tree' will use :class
|
192
|
+
- 'ball_tree' will use :class:`~sklearn.neighbors.BallTree`.
|
193
|
+
- 'kd_tree' will use :class:`~sklearn.neighbors.KDTree`.
|
194
194
|
- 'brute' will use a brute-force search.
|
195
195
|
- 'auto' (default) will attempt to decide the most appropriate
|
196
196
|
algorithm based on the values passed to :meth:`fit` method.
|
@@ -199,10 +199,10 @@ class OPTICS(BaseTransformer):
|
|
199
199
|
this parameter, using brute force.
|
200
200
|
|
201
201
|
leaf_size: int, default=30
|
202
|
-
Leaf size passed to :class
|
203
|
-
affect the speed of the
|
204
|
-
|
205
|
-
nature of the problem.
|
202
|
+
Leaf size passed to :class:`~sklearn.neighbors.BallTree` or
|
203
|
+
:class:`~sklearn.neighbors.KDTree`. This can affect the speed of the
|
204
|
+
construction and query, as well as the memory required to store the
|
205
|
+
tree. The optimal value depends on the nature of the problem.
|
206
206
|
|
207
207
|
memory: str or object with the joblib.Memory interface, default=None
|
208
208
|
Used to cache the output of the computation of the tree.
|
@@ -137,7 +137,8 @@ class SpectralClustering(BaseTransformer):
|
|
137
137
|
|
138
138
|
gamma: float, default=1.0
|
139
139
|
Kernel coefficient for rbf, poly, sigmoid, laplacian and chi2 kernels.
|
140
|
-
Ignored for ``affinity='nearest_neighbors'
|
140
|
+
Ignored for ``affinity='nearest_neighbors'``, ``affinity='precomputed'``
|
141
|
+
or ``affinity='precomputed_nearest_neighbors'``.
|
141
142
|
|
142
143
|
affinity: str or callable, default='rbf'
|
143
144
|
How to construct the affinity matrix.
|
@@ -151,7 +152,7 @@ class SpectralClustering(BaseTransformer):
|
|
151
152
|
of precomputed distances, and construct a binary affinity matrix
|
152
153
|
from the ``n_neighbors`` nearest neighbors of each instance.
|
153
154
|
- one of the kernels supported by
|
154
|
-
:func:`~sklearn.metrics.pairwise_kernels`.
|
155
|
+
:func:`~sklearn.metrics.pairwise.pairwise_kernels`.
|
155
156
|
|
156
157
|
Only kernels that produce similarity scores (non-negative values that
|
157
158
|
increase with similarity) should be used. This property is not checked
|
@@ -162,7 +163,7 @@ class SpectralClustering(BaseTransformer):
|
|
162
163
|
the nearest neighbors method. Ignored for ``affinity='rbf'``.
|
163
164
|
|
164
165
|
eigen_tol: float, default="auto"
|
165
|
-
Stopping criterion for
|
166
|
+
Stopping criterion for eigen decomposition of the Laplacian matrix.
|
166
167
|
If `eigen_tol="auto"` then the passed tolerance will depend on the
|
167
168
|
`eigen_solver`:
|
168
169
|
|
@@ -171,10 +171,18 @@ class ColumnTransformer(BaseTransformer):
|
|
171
171
|
printed as it is completed.
|
172
172
|
|
173
173
|
verbose_feature_names_out: bool, default=True
|
174
|
-
If True, :meth:`get_feature_names_out` will prefix
|
175
|
-
with the name of the transformer that generated that
|
176
|
-
|
177
|
-
|
174
|
+
If True, :meth:`ColumnTransformer.get_feature_names_out` will prefix
|
175
|
+
all feature names with the name of the transformer that generated that
|
176
|
+
feature.
|
177
|
+
If False, :meth:`ColumnTransformer.get_feature_names_out` will not
|
178
|
+
prefix any feature names and will error if feature names are not
|
179
|
+
unique.
|
180
|
+
|
181
|
+
force_int_remainder_cols: bool, default=True
|
182
|
+
Force the columns of the last entry of `transformers_`, which
|
183
|
+
corresponds to the "remainder" transformer, to always be stored as
|
184
|
+
indices (int) rather than column names (str). See description of the
|
185
|
+
`transformers_` attribute for details.
|
178
186
|
"""
|
179
187
|
|
180
188
|
def __init__( # type: ignore[no-untyped-def]
|
@@ -187,6 +195,7 @@ class ColumnTransformer(BaseTransformer):
|
|
187
195
|
transformer_weights=None,
|
188
196
|
verbose=False,
|
189
197
|
verbose_feature_names_out=True,
|
198
|
+
force_int_remainder_cols=True,
|
190
199
|
input_cols: Optional[Union[str, Iterable[str]]] = None,
|
191
200
|
output_cols: Optional[Union[str, Iterable[str]]] = None,
|
192
201
|
label_cols: Optional[Union[str, Iterable[str]]] = None,
|
@@ -214,7 +223,8 @@ class ColumnTransformer(BaseTransformer):
|
|
214
223
|
'n_jobs':(n_jobs, None, False),
|
215
224
|
'transformer_weights':(transformer_weights, None, False),
|
216
225
|
'verbose':(verbose, False, False),
|
217
|
-
'verbose_feature_names_out':(verbose_feature_names_out, True, False),
|
226
|
+
'verbose_feature_names_out':(verbose_feature_names_out, True, False),
|
227
|
+
'force_int_remainder_cols':(force_int_remainder_cols, True, False),}
|
218
228
|
cleaned_up_init_args = validate_sklearn_args(
|
219
229
|
args=init_args,
|
220
230
|
klass=sklearn.compose.ColumnTransformer
|
@@ -125,15 +125,16 @@ class TransformedTargetRegressor(BaseTransformer):
|
|
125
125
|
|
126
126
|
func: function, default=None
|
127
127
|
Function to apply to `y` before passing to :meth:`fit`. Cannot be set
|
128
|
-
at the same time as `transformer`.
|
129
|
-
|
130
|
-
|
128
|
+
at the same time as `transformer`. If `func is None`, the function used will be
|
129
|
+
the identity function. If `func` is set, `inverse_func` also needs to be
|
130
|
+
provided. The function needs to return a 2-dimensional array.
|
131
131
|
|
132
132
|
inverse_func: function, default=None
|
133
133
|
Function to apply to the prediction of the regressor. Cannot be set at
|
134
|
-
the same time as `transformer`. The function
|
135
|
-
|
136
|
-
|
134
|
+
the same time as `transformer`. The inverse function is used to return
|
135
|
+
predictions to the same space of the original training labels. If
|
136
|
+
`inverse_func` is set, `func` also needs to be provided. The inverse
|
137
|
+
function needs to return a 2-dimensional array.
|
137
138
|
|
138
139
|
check_inverse: bool, default=True
|
139
140
|
Whether to check that `transform` followed by `inverse_transform`
|
@@ -124,7 +124,7 @@ class EllipticEnvelope(BaseTransformer):
|
|
124
124
|
support_fraction: float, default=None
|
125
125
|
The proportion of points to be included in the support of the raw
|
126
126
|
MCD estimate. If None, the minimum value of support_fraction will
|
127
|
-
be used within the algorithm: `
|
127
|
+
be used within the algorithm: `(n_samples + n_features + 1) / 2 * n_samples`.
|
128
128
|
Range is (0, 1).
|
129
129
|
|
130
130
|
contamination: float, default=0.1
|
@@ -129,7 +129,7 @@ class GraphicalLassoCV(BaseTransformer):
|
|
129
129
|
- :term:`CV splitter`,
|
130
130
|
- An iterable yielding (train, test) splits as arrays of indices.
|
131
131
|
|
132
|
-
For integer/None inputs :class
|
132
|
+
For integer/None inputs :class:`~sklearn.model_selection.KFold` is used.
|
133
133
|
|
134
134
|
Refer :ref:`User Guide <cross_validation>` for the various
|
135
135
|
cross-validation strategies that can be used here.
|
@@ -125,8 +125,8 @@ class MinCovDet(BaseTransformer):
|
|
125
125
|
The proportion of points to be included in the support of the raw
|
126
126
|
MCD estimate. Default is None, which implies that the minimum
|
127
127
|
value of support_fraction will be used within the algorithm:
|
128
|
-
`(
|
129
|
-
(0, 1].
|
128
|
+
`(n_samples + n_features + 1) / 2 * n_samples`. The parameter must be
|
129
|
+
in the range (0, 1].
|
130
130
|
|
131
131
|
random_state: int, RandomState instance or None, default=None
|
132
132
|
Determines the pseudo random number generator for shuffling the data.
|
@@ -58,7 +58,7 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.covariance".replace("skl
|
|
58
58
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
59
|
|
60
60
|
class OAS(BaseTransformer):
|
61
|
-
r"""Oracle Approximating Shrinkage Estimator
|
61
|
+
r"""Oracle Approximating Shrinkage Estimator
|
62
62
|
For more details on this class, see [sklearn.covariance.OAS]
|
63
63
|
(https://scikit-learn.org/stable/modules/generated/sklearn.covariance.OAS.html)
|
64
64
|
|
@@ -58,7 +58,7 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.decomposition".replace("
|
|
58
58
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
59
|
|
60
60
|
class KernelPCA(BaseTransformer):
|
61
|
-
r"""Kernel Principal component analysis (KPCA)
|
61
|
+
r"""Kernel Principal component analysis (KPCA)
|
62
62
|
For more details on this class, see [sklearn.decomposition.KernelPCA]
|
63
63
|
(https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.KernelPCA.html)
|
64
64
|
|
@@ -119,7 +119,7 @@ class KernelPCA(BaseTransformer):
|
|
119
119
|
Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other
|
120
120
|
kernels. If ``gamma`` is ``None``, then it is set to ``1/n_features``.
|
121
121
|
|
122
|
-
degree:
|
122
|
+
degree: float, default=3
|
123
123
|
Degree for poly kernels. Ignored by other kernels.
|
124
124
|
|
125
125
|
coef0: float, default=1
|
@@ -115,13 +115,9 @@ class MiniBatchDictionaryLearning(BaseTransformer):
|
|
115
115
|
alpha: float, default=1
|
116
116
|
Sparsity controlling parameter.
|
117
117
|
|
118
|
-
|
119
|
-
Total number of iterations over data batches to perform.
|
120
|
-
|
121
|
-
max_iter: int, default=None
|
118
|
+
max_iter: int, default=1_000
|
122
119
|
Maximum number of iterations over the complete dataset before
|
123
120
|
stopping independently of any early stopping criterion heuristics.
|
124
|
-
If ``max_iter`` is not None, ``n_iter`` is ignored.
|
125
121
|
|
126
122
|
fit_algorithm: {'lars', 'cd'}, default='lars'
|
127
123
|
The algorithm used:
|
@@ -204,15 +200,14 @@ class MiniBatchDictionaryLearning(BaseTransformer):
|
|
204
200
|
|
205
201
|
tol: float, default=1e-3
|
206
202
|
Control early stopping based on the norm of the differences in the
|
207
|
-
dictionary between 2 steps.
|
203
|
+
dictionary between 2 steps.
|
208
204
|
|
209
205
|
To disable early stopping based on changes in the dictionary, set
|
210
206
|
`tol` to 0.0.
|
211
207
|
|
212
208
|
max_no_improvement: int, default=10
|
213
209
|
Control early stopping based on the consecutive number of mini batches
|
214
|
-
that does not yield an improvement on the smoothed cost function.
|
215
|
-
`max_iter` is not None.
|
210
|
+
that does not yield an improvement on the smoothed cost function.
|
216
211
|
|
217
212
|
To disable convergence detection based on cost function, set
|
218
213
|
`max_no_improvement` to None.
|
@@ -223,8 +218,7 @@ class MiniBatchDictionaryLearning(BaseTransformer):
|
|
223
218
|
*,
|
224
219
|
n_components=None,
|
225
220
|
alpha=1,
|
226
|
-
|
227
|
-
max_iter=None,
|
221
|
+
max_iter=1000,
|
228
222
|
fit_algorithm="lars",
|
229
223
|
n_jobs=None,
|
230
224
|
batch_size=256,
|
@@ -265,8 +259,7 @@ class MiniBatchDictionaryLearning(BaseTransformer):
|
|
265
259
|
|
266
260
|
init_args = {'n_components':(n_components, None, False),
|
267
261
|
'alpha':(alpha, 1, False),
|
268
|
-
'
|
269
|
-
'max_iter':(max_iter, None, False),
|
262
|
+
'max_iter':(max_iter, 1000, False),
|
270
263
|
'fit_algorithm':(fit_algorithm, "lars", False),
|
271
264
|
'n_jobs':(n_jobs, None, False),
|
272
265
|
'batch_size':(batch_size, 256, False),
|
@@ -121,13 +121,9 @@ class MiniBatchSparsePCA(BaseTransformer):
|
|
121
121
|
Amount of ridge shrinkage to apply in order to improve
|
122
122
|
conditioning when calling the transform method.
|
123
123
|
|
124
|
-
|
125
|
-
Number of iterations to perform for each mini batch.
|
126
|
-
|
127
|
-
max_iter: int, default=None
|
124
|
+
max_iter: int, default=1_000
|
128
125
|
Maximum number of iterations over the complete dataset before
|
129
126
|
stopping independently of any early stopping criterion heuristics.
|
130
|
-
If `max_iter` is not `None`, `n_iter` is ignored.
|
131
127
|
|
132
128
|
callback: callable, default=None
|
133
129
|
Callable that gets invoked every five iterations.
|
@@ -163,15 +159,14 @@ class MiniBatchSparsePCA(BaseTransformer):
|
|
163
159
|
|
164
160
|
tol: float, default=1e-3
|
165
161
|
Control early stopping based on the norm of the differences in the
|
166
|
-
dictionary between 2 steps.
|
162
|
+
dictionary between 2 steps.
|
167
163
|
|
168
164
|
To disable early stopping based on changes in the dictionary, set
|
169
165
|
`tol` to 0.0.
|
170
166
|
|
171
167
|
max_no_improvement: int or None, default=10
|
172
168
|
Control early stopping based on the consecutive number of mini batches
|
173
|
-
that does not yield an improvement on the smoothed cost function.
|
174
|
-
`max_iter` is not None.
|
169
|
+
that does not yield an improvement on the smoothed cost function.
|
175
170
|
|
176
171
|
To disable convergence detection based on cost function, set
|
177
172
|
`max_no_improvement` to `None`.
|
@@ -183,8 +178,7 @@ class MiniBatchSparsePCA(BaseTransformer):
|
|
183
178
|
n_components=None,
|
184
179
|
alpha=1,
|
185
180
|
ridge_alpha=0.01,
|
186
|
-
|
187
|
-
max_iter=None,
|
181
|
+
max_iter=1000,
|
188
182
|
callback=None,
|
189
183
|
batch_size=3,
|
190
184
|
verbose=False,
|
@@ -218,8 +212,7 @@ class MiniBatchSparsePCA(BaseTransformer):
|
|
218
212
|
init_args = {'n_components':(n_components, None, False),
|
219
213
|
'alpha':(alpha, 1, False),
|
220
214
|
'ridge_alpha':(ridge_alpha, 0.01, False),
|
221
|
-
'
|
222
|
-
'max_iter':(max_iter, None, False),
|
215
|
+
'max_iter':(max_iter, 1000, False),
|
223
216
|
'callback':(callback, None, False),
|
224
217
|
'batch_size':(batch_size, 3, False),
|
225
218
|
'verbose':(verbose, False, False),
|
@@ -145,23 +145,36 @@ class PCA(BaseTransformer):
|
|
145
145
|
improve the predictive accuracy of the downstream estimators by
|
146
146
|
making their data respect some hard-wired assumptions.
|
147
147
|
|
148
|
-
svd_solver: {'auto', 'full', 'arpack', 'randomized'},
|
149
|
-
|
150
|
-
The solver is selected by a default policy based on `X.shape` and
|
151
|
-
`n_components`: if the input data
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
148
|
+
svd_solver: {'auto', 'full', 'covariance_eigh', 'arpack', 'randomized'}, default='auto'
|
149
|
+
"auto" :
|
150
|
+
The solver is selected by a default 'auto' policy is based on `X.shape` and
|
151
|
+
`n_components`: if the input data has fewer than 1000 features and
|
152
|
+
more than 10 times as many samples, then the "covariance_eigh"
|
153
|
+
solver is used. Otherwise, if the input data is larger than 500x500
|
154
|
+
and the number of components to extract is lower than 80% of the
|
155
|
+
smallest dimension of the data, then the more efficient
|
156
|
+
"randomized" method is selected. Otherwise the exact "full" SVD is
|
157
|
+
computed and optionally truncated afterwards.
|
158
|
+
"full" :
|
159
|
+
Run exact full SVD calling the standard LAPACK solver via
|
158
160
|
`scipy.linalg.svd` and select the components by postprocessing
|
159
|
-
|
160
|
-
|
161
|
+
"covariance_eigh" :
|
162
|
+
Precompute the covariance matrix (on centered data), run a
|
163
|
+
classical eigenvalue decomposition on the covariance matrix
|
164
|
+
typically using LAPACK and select the components by postprocessing.
|
165
|
+
This solver is very efficient for n_samples >> n_features and small
|
166
|
+
n_features. It is, however, not tractable otherwise for large
|
167
|
+
n_features (large memory footprint required to materialize the
|
168
|
+
covariance matrix). Also note that compared to the "full" solver,
|
169
|
+
this solver effectively doubles the condition number and is
|
170
|
+
therefore less numerical stable (e.g. on input data with a large
|
171
|
+
range of singular values).
|
172
|
+
"arpack" :
|
173
|
+
Run SVD truncated to `n_components` calling ARPACK solver via
|
161
174
|
`scipy.sparse.linalg.svds`. It requires strictly
|
162
|
-
0 < n_components < min(X.shape)
|
163
|
-
|
164
|
-
|
175
|
+
`0 < n_components < min(X.shape)`
|
176
|
+
"randomized" :
|
177
|
+
Run randomized SVD by the method of Halko et al.
|
165
178
|
|
166
179
|
tol: float, default=0.0
|
167
180
|
Tolerance for singular values computed by svd_solver == 'arpack'.
|
@@ -128,6 +128,9 @@ class LinearDiscriminantAnalysis(BaseTransformer):
|
|
128
128
|
This should be left to None if `covariance_estimator` is used.
|
129
129
|
Note that shrinkage works only with 'lsqr' and 'eigen' solvers.
|
130
130
|
|
131
|
+
For a usage example, see
|
132
|
+
:ref:`sphx_glr_auto_examples_classification_plot_lda.py`.
|
133
|
+
|
131
134
|
priors: array-like of shape (n_classes,), default=None
|
132
135
|
The class prior probabilities. By default, the class proportions are
|
133
136
|
inferred from the training data.
|
@@ -138,6 +141,9 @@ class LinearDiscriminantAnalysis(BaseTransformer):
|
|
138
141
|
min(n_classes - 1, n_features). This parameter only affects the
|
139
142
|
`transform` method.
|
140
143
|
|
144
|
+
For a usage example, see
|
145
|
+
:ref:`sphx_glr_auto_examples_decomposition_plot_pca_vs_lda.py`.
|
146
|
+
|
141
147
|
store_covariance: bool, default=False
|
142
148
|
If True, explicitly compute the weighted within-class covariance
|
143
149
|
matrix when solver is 'svd'. The matrix is always computed
|
@@ -140,13 +140,6 @@ class AdaBoostClassifier(BaseTransformer):
|
|
140
140
|
Thus, it is only used when `estimator` exposes a `random_state`.
|
141
141
|
Pass an int for reproducible output across multiple function calls.
|
142
142
|
See :term:`Glossary <random_state>`.
|
143
|
-
|
144
|
-
base_estimator: object, default=None
|
145
|
-
The base estimator from which the boosted ensemble is built.
|
146
|
-
Support for sample weighting is required, as well as proper
|
147
|
-
``classes_`` and ``n_classes_`` attributes. If ``None``, then
|
148
|
-
the base estimator is :class:`~sklearn.tree.DecisionTreeClassifier`
|
149
|
-
initialized with `max_depth=1`.
|
150
143
|
"""
|
151
144
|
|
152
145
|
def __init__( # type: ignore[no-untyped-def]
|
@@ -157,7 +150,6 @@ class AdaBoostClassifier(BaseTransformer):
|
|
157
150
|
learning_rate=1.0,
|
158
151
|
algorithm="SAMME.R",
|
159
152
|
random_state=None,
|
160
|
-
base_estimator="deprecated",
|
161
153
|
input_cols: Optional[Union[str, Iterable[str]]] = None,
|
162
154
|
output_cols: Optional[Union[str, Iterable[str]]] = None,
|
163
155
|
label_cols: Optional[Union[str, Iterable[str]]] = None,
|
@@ -177,16 +169,13 @@ class AdaBoostClassifier(BaseTransformer):
|
|
177
169
|
self._batch_size = -1
|
178
170
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
179
171
|
deps = deps | gather_dependencies(estimator)
|
180
|
-
deps = deps | gather_dependencies(base_estimator)
|
181
172
|
self._deps = list(deps)
|
182
173
|
estimator = transform_snowml_obj_to_sklearn_obj(estimator)
|
183
|
-
base_estimator = transform_snowml_obj_to_sklearn_obj(base_estimator)
|
184
174
|
init_args = {'estimator':(estimator, None, False),
|
185
175
|
'n_estimators':(n_estimators, 50, False),
|
186
176
|
'learning_rate':(learning_rate, 1.0, False),
|
187
177
|
'algorithm':(algorithm, "SAMME.R", False),
|
188
|
-
'random_state':(random_state, None, False),
|
189
|
-
'base_estimator':(base_estimator, "deprecated", False),}
|
178
|
+
'random_state':(random_state, None, False),}
|
190
179
|
cleaned_up_init_args = validate_sklearn_args(
|
191
180
|
args=init_args,
|
192
181
|
klass=sklearn.ensemble.AdaBoostClassifier
|
@@ -138,12 +138,6 @@ class AdaBoostRegressor(BaseTransformer):
|
|
138
138
|
`estimator` at each boosting iteration.
|
139
139
|
Pass an int for reproducible output across multiple function calls.
|
140
140
|
See :term:`Glossary <random_state>`.
|
141
|
-
|
142
|
-
base_estimator: object, default=None
|
143
|
-
The base estimator from which the boosted ensemble is built.
|
144
|
-
If ``None``, then the base estimator is
|
145
|
-
:class:`~sklearn.tree.DecisionTreeRegressor` initialized with
|
146
|
-
`max_depth=3`.
|
147
141
|
"""
|
148
142
|
|
149
143
|
def __init__( # type: ignore[no-untyped-def]
|
@@ -154,7 +148,6 @@ class AdaBoostRegressor(BaseTransformer):
|
|
154
148
|
learning_rate=1.0,
|
155
149
|
loss="linear",
|
156
150
|
random_state=None,
|
157
|
-
base_estimator="deprecated",
|
158
151
|
input_cols: Optional[Union[str, Iterable[str]]] = None,
|
159
152
|
output_cols: Optional[Union[str, Iterable[str]]] = None,
|
160
153
|
label_cols: Optional[Union[str, Iterable[str]]] = None,
|
@@ -174,16 +167,13 @@ class AdaBoostRegressor(BaseTransformer):
|
|
174
167
|
self._batch_size = -1
|
175
168
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
176
169
|
deps = deps | gather_dependencies(estimator)
|
177
|
-
deps = deps | gather_dependencies(base_estimator)
|
178
170
|
self._deps = list(deps)
|
179
171
|
estimator = transform_snowml_obj_to_sklearn_obj(estimator)
|
180
|
-
base_estimator = transform_snowml_obj_to_sklearn_obj(base_estimator)
|
181
172
|
init_args = {'estimator':(estimator, None, False),
|
182
173
|
'n_estimators':(n_estimators, 50, False),
|
183
174
|
'learning_rate':(learning_rate, 1.0, False),
|
184
175
|
'loss':(loss, "linear", False),
|
185
|
-
'random_state':(random_state, None, False),
|
186
|
-
'base_estimator':(base_estimator, "deprecated", False),}
|
176
|
+
'random_state':(random_state, None, False),}
|
187
177
|
cleaned_up_init_args = validate_sklearn_args(
|
188
178
|
args=init_args,
|
189
179
|
klass=sklearn.ensemble.AdaBoostRegressor
|
@@ -164,9 +164,6 @@ class BaggingClassifier(BaseTransformer):
|
|
164
164
|
|
165
165
|
verbose: int, default=0
|
166
166
|
Controls the verbosity when fitting and predicting.
|
167
|
-
|
168
|
-
base_estimator: object, default="deprecated"
|
169
|
-
Use `estimator` instead.
|
170
167
|
"""
|
171
168
|
|
172
169
|
def __init__( # type: ignore[no-untyped-def]
|
@@ -183,7 +180,6 @@ class BaggingClassifier(BaseTransformer):
|
|
183
180
|
n_jobs=None,
|
184
181
|
random_state=None,
|
185
182
|
verbose=0,
|
186
|
-
base_estimator="deprecated",
|
187
183
|
input_cols: Optional[Union[str, Iterable[str]]] = None,
|
188
184
|
output_cols: Optional[Union[str, Iterable[str]]] = None,
|
189
185
|
label_cols: Optional[Union[str, Iterable[str]]] = None,
|
@@ -203,10 +199,8 @@ class BaggingClassifier(BaseTransformer):
|
|
203
199
|
self._batch_size = -1
|
204
200
|
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
205
201
|
deps = deps | gather_dependencies(estimator)
|
206
|
-
deps = deps | gather_dependencies(base_estimator)
|
207
202
|
self._deps = list(deps)
|
208
203
|
estimator = transform_snowml_obj_to_sklearn_obj(estimator)
|
209
|
-
base_estimator = transform_snowml_obj_to_sklearn_obj(base_estimator)
|
210
204
|
init_args = {'estimator':(estimator, None, False),
|
211
205
|
'n_estimators':(n_estimators, 10, False),
|
212
206
|
'max_samples':(max_samples, 1.0, False),
|
@@ -217,8 +211,7 @@ class BaggingClassifier(BaseTransformer):
|
|
217
211
|
'warm_start':(warm_start, False, False),
|
218
212
|
'n_jobs':(n_jobs, None, False),
|
219
213
|
'random_state':(random_state, None, False),
|
220
|
-
'verbose':(verbose, 0, False),
|
221
|
-
'base_estimator':(base_estimator, "deprecated", False),}
|
214
|
+
'verbose':(verbose, 0, False),}
|
222
215
|
cleaned_up_init_args = validate_sklearn_args(
|
223
216
|
args=init_args,
|
224
217
|
klass=sklearn.ensemble.BaggingClassifier
|