snowflake-ml-python 1.6.3__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. snowflake/ml/_internal/telemetry.py +4 -2
  2. snowflake/ml/_internal/utils/import_utils.py +31 -0
  3. snowflake/ml/_internal/utils/snowpark_dataframe_utils.py +13 -0
  4. snowflake/ml/data/_internal/arrow_ingestor.py +8 -0
  5. snowflake/ml/data/data_connector.py +1 -1
  6. snowflake/ml/data/torch_utils.py +33 -14
  7. snowflake/ml/feature_store/examples/airline_features/features/plane_features.py +5 -3
  8. snowflake/ml/feature_store/examples/airline_features/features/weather_features.py +7 -5
  9. snowflake/ml/feature_store/examples/citibike_trip_features/features/station_feature.py +4 -2
  10. snowflake/ml/feature_store/examples/citibike_trip_features/features/trip_feature.py +3 -1
  11. snowflake/ml/feature_store/examples/example_helper.py +6 -3
  12. snowflake/ml/feature_store/examples/new_york_taxi_features/features/location_features.py +4 -2
  13. snowflake/ml/feature_store/examples/new_york_taxi_features/features/trip_features.py +4 -2
  14. snowflake/ml/feature_store/examples/wine_quality_features/features/managed_wine_features.py +3 -1
  15. snowflake/ml/feature_store/examples/wine_quality_features/features/static_wine_features.py +3 -1
  16. snowflake/ml/feature_store/feature_store.py +1 -2
  17. snowflake/ml/feature_store/feature_view.py +5 -1
  18. snowflake/ml/model/_client/model/model_version_impl.py +144 -10
  19. snowflake/ml/model/_client/ops/model_ops.py +25 -6
  20. snowflake/ml/model/_client/ops/service_ops.py +33 -28
  21. snowflake/ml/model/_client/service/model_deployment_spec.py +19 -8
  22. snowflake/ml/model/_client/service/model_deployment_spec_schema.py +3 -1
  23. snowflake/ml/model/_client/sql/model.py +14 -0
  24. snowflake/ml/model/_client/sql/service.py +6 -18
  25. snowflake/ml/model/_model_composer/model_composer.py +2 -0
  26. snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +4 -0
  27. snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +1 -0
  28. snowflake/ml/model/_model_composer/model_method/model_method.py +1 -1
  29. snowflake/ml/model/_packager/model_handlers/_utils.py +5 -1
  30. snowflake/ml/model/_packager/model_handlers/catboost.py +3 -6
  31. snowflake/ml/model/_packager/model_handlers/custom.py +2 -0
  32. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +10 -1
  33. snowflake/ml/model/_packager/model_handlers/lightgbm.py +3 -6
  34. snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +8 -1
  35. snowflake/ml/model/_packager/model_handlers/sklearn.py +3 -6
  36. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +7 -65
  37. snowflake/ml/model/_packager/model_handlers/xgboost.py +10 -40
  38. snowflake/ml/model/_packager/model_packager.py +0 -11
  39. snowflake/ml/model/_packager/{model_handlers/model_objective_utils.py → model_task/model_task_utils.py} +13 -25
  40. snowflake/ml/model/_signatures/pandas_handler.py +16 -0
  41. snowflake/ml/model/custom_model.py +47 -7
  42. snowflake/ml/model/model_signature.py +2 -0
  43. snowflake/ml/model/type_hints.py +8 -0
  44. snowflake/ml/modeling/_internal/estimator_utils.py +13 -0
  45. snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +7 -2
  46. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +16 -5
  47. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +8 -2
  48. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +9 -3
  49. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +1 -8
  50. snowflake/ml/modeling/cluster/agglomerative_clustering.py +17 -19
  51. snowflake/ml/modeling/cluster/dbscan.py +5 -2
  52. snowflake/ml/modeling/cluster/feature_agglomeration.py +7 -19
  53. snowflake/ml/modeling/cluster/k_means.py +14 -19
  54. snowflake/ml/modeling/cluster/mini_batch_k_means.py +3 -3
  55. snowflake/ml/modeling/cluster/optics.py +6 -6
  56. snowflake/ml/modeling/cluster/spectral_clustering.py +4 -3
  57. snowflake/ml/modeling/compose/column_transformer.py +15 -5
  58. snowflake/ml/modeling/compose/transformed_target_regressor.py +7 -6
  59. snowflake/ml/modeling/covariance/elliptic_envelope.py +1 -1
  60. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +1 -1
  61. snowflake/ml/modeling/covariance/min_cov_det.py +2 -2
  62. snowflake/ml/modeling/covariance/oas.py +1 -1
  63. snowflake/ml/modeling/decomposition/kernel_pca.py +2 -2
  64. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +5 -12
  65. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +5 -12
  66. snowflake/ml/modeling/decomposition/pca.py +28 -15
  67. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +6 -0
  68. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +1 -12
  69. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +1 -11
  70. snowflake/ml/modeling/ensemble/bagging_classifier.py +1 -8
  71. snowflake/ml/modeling/ensemble/bagging_regressor.py +1 -8
  72. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +21 -2
  73. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +18 -2
  74. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +2 -0
  75. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +2 -0
  76. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +21 -8
  77. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +21 -11
  78. snowflake/ml/modeling/ensemble/random_forest_classifier.py +21 -2
  79. snowflake/ml/modeling/ensemble/random_forest_regressor.py +18 -2
  80. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +2 -1
  81. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +5 -3
  82. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +2 -2
  83. snowflake/ml/modeling/linear_model/ard_regression.py +5 -10
  84. snowflake/ml/modeling/linear_model/bayesian_ridge.py +5 -11
  85. snowflake/ml/modeling/linear_model/elastic_net.py +3 -0
  86. snowflake/ml/modeling/linear_model/elastic_net_cv.py +1 -1
  87. snowflake/ml/modeling/linear_model/lars.py +0 -10
  88. snowflake/ml/modeling/linear_model/lars_cv.py +1 -11
  89. snowflake/ml/modeling/linear_model/lasso_cv.py +1 -1
  90. snowflake/ml/modeling/linear_model/lasso_lars.py +0 -10
  91. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +1 -11
  92. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +0 -10
  93. snowflake/ml/modeling/linear_model/logistic_regression.py +28 -22
  94. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +30 -24
  95. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +1 -1
  96. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +1 -1
  97. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +4 -13
  98. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +4 -4
  99. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +1 -1
  100. snowflake/ml/modeling/linear_model/perceptron.py +3 -3
  101. snowflake/ml/modeling/linear_model/ransac_regressor.py +3 -2
  102. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +14 -6
  103. snowflake/ml/modeling/linear_model/ridge_cv.py +17 -11
  104. snowflake/ml/modeling/linear_model/sgd_classifier.py +2 -2
  105. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +5 -1
  106. snowflake/ml/modeling/linear_model/sgd_regressor.py +12 -3
  107. snowflake/ml/modeling/manifold/isomap.py +1 -1
  108. snowflake/ml/modeling/manifold/mds.py +3 -3
  109. snowflake/ml/modeling/manifold/tsne.py +10 -4
  110. snowflake/ml/modeling/metrics/classification.py +12 -16
  111. snowflake/ml/modeling/metrics/ranking.py +3 -3
  112. snowflake/ml/modeling/metrics/regression.py +3 -3
  113. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +3 -3
  114. snowflake/ml/modeling/naive_bayes/categorical_nb.py +3 -3
  115. snowflake/ml/modeling/naive_bayes/complement_nb.py +3 -3
  116. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +3 -3
  117. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +10 -4
  118. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +5 -2
  119. snowflake/ml/modeling/neighbors/local_outlier_factor.py +2 -2
  120. snowflake/ml/modeling/neighbors/nearest_centroid.py +7 -14
  121. snowflake/ml/modeling/neighbors/nearest_neighbors.py +1 -1
  122. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +6 -1
  123. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +1 -1
  124. snowflake/ml/modeling/neural_network/mlp_classifier.py +7 -1
  125. snowflake/ml/modeling/neural_network/mlp_regressor.py +3 -0
  126. snowflake/ml/modeling/pipeline/pipeline.py +16 -14
  127. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +8 -4
  128. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +9 -7
  129. snowflake/ml/modeling/svm/linear_svc.py +25 -16
  130. snowflake/ml/modeling/svm/linear_svr.py +23 -17
  131. snowflake/ml/modeling/svm/nu_svc.py +5 -3
  132. snowflake/ml/modeling/svm/nu_svr.py +3 -1
  133. snowflake/ml/modeling/svm/svc.py +9 -5
  134. snowflake/ml/modeling/svm/svr.py +3 -1
  135. snowflake/ml/modeling/tree/decision_tree_classifier.py +21 -2
  136. snowflake/ml/modeling/tree/decision_tree_regressor.py +18 -2
  137. snowflake/ml/modeling/tree/extra_tree_classifier.py +28 -9
  138. snowflake/ml/modeling/tree/extra_tree_regressor.py +18 -2
  139. snowflake/ml/monitoring/_client/{monitor_sql_client.py → model_monitor_sql_client.py} +1 -1
  140. snowflake/ml/monitoring/{_client → _manager}/model_monitor_manager.py +9 -8
  141. snowflake/ml/monitoring/{_client/model_monitor.py → model_monitor.py} +3 -3
  142. snowflake/ml/registry/_manager/model_manager.py +15 -1
  143. snowflake/ml/registry/registry.py +15 -8
  144. snowflake/ml/version.py +1 -1
  145. {snowflake_ml_python-1.6.3.dist-info → snowflake_ml_python-1.7.0.dist-info}/METADATA +81 -9
  146. {snowflake_ml_python-1.6.3.dist-info → snowflake_ml_python-1.7.0.dist-info}/RECORD +150 -150
  147. {snowflake_ml_python-1.6.3.dist-info → snowflake_ml_python-1.7.0.dist-info}/WHEEL +1 -1
  148. /snowflake/ml/monitoring/{_client/model_monitor_version.py → model_monitor_version.py} +0 -0
  149. {snowflake_ml_python-1.6.3.dist-info → snowflake_ml_python-1.7.0.dist-info}/LICENSE.txt +0 -0
  150. {snowflake_ml_python-1.6.3.dist-info → snowflake_ml_python-1.7.0.dist-info}/top_level.txt +0 -0
@@ -164,9 +164,6 @@ class BaggingRegressor(BaseTransformer):
164
164
 
165
165
  verbose: int, default=0
166
166
  Controls the verbosity when fitting and predicting.
167
-
168
- base_estimator: object, default="deprecated"
169
- Use `estimator` instead.
170
167
  """
171
168
 
172
169
  def __init__( # type: ignore[no-untyped-def]
@@ -183,7 +180,6 @@ class BaggingRegressor(BaseTransformer):
183
180
  n_jobs=None,
184
181
  random_state=None,
185
182
  verbose=0,
186
- base_estimator="deprecated",
187
183
  input_cols: Optional[Union[str, Iterable[str]]] = None,
188
184
  output_cols: Optional[Union[str, Iterable[str]]] = None,
189
185
  label_cols: Optional[Union[str, Iterable[str]]] = None,
@@ -203,10 +199,8 @@ class BaggingRegressor(BaseTransformer):
203
199
  self._batch_size = -1
204
200
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
205
201
  deps = deps | gather_dependencies(estimator)
206
- deps = deps | gather_dependencies(base_estimator)
207
202
  self._deps = list(deps)
208
203
  estimator = transform_snowml_obj_to_sklearn_obj(estimator)
209
- base_estimator = transform_snowml_obj_to_sklearn_obj(base_estimator)
210
204
  init_args = {'estimator':(estimator, None, False),
211
205
  'n_estimators':(n_estimators, 10, False),
212
206
  'max_samples':(max_samples, 1.0, False),
@@ -217,8 +211,7 @@ class BaggingRegressor(BaseTransformer):
217
211
  'warm_start':(warm_start, False, False),
218
212
  'n_jobs':(n_jobs, None, False),
219
213
  'random_state':(random_state, None, False),
220
- 'verbose':(verbose, 0, False),
221
- 'base_estimator':(base_estimator, "deprecated", False),}
214
+ 'verbose':(verbose, 0, False),}
222
215
  cleaned_up_init_args = validate_sklearn_args(
223
216
  args=init_args,
224
217
  klass=sklearn.ensemble.BaggingRegressor
@@ -219,7 +219,7 @@ class ExtraTreesClassifier(BaseTransformer):
219
219
  When set to ``True``, reuse the solution of the previous call to fit
220
220
  and add more estimators to the ensemble, otherwise, just fit a whole
221
221
  new forest. See :term:`Glossary <warm_start>` and
222
- :ref:`gradient_boosting_warm_start` for details.
222
+ :ref:`tree_ensemble_warm_start` for details.
223
223
 
224
224
  class_weight: {"balanced", "balanced_subsample"}, dict or list of dicts, default=None
225
225
  Weights associated with classes in the form ``{class_label: weight}``.
@@ -260,6 +260,23 @@ class ExtraTreesClassifier(BaseTransformer):
260
260
  - If int, then draw `max_samples` samples.
261
261
  - If float, then draw `max_samples * X.shape[0]` samples. Thus,
262
262
  `max_samples` should be in the interval `(0.0, 1.0]`.
263
+
264
+ monotonic_cst: array-like of int of shape (n_features), default=None
265
+ Indicates the monotonicity constraint to enforce on each feature.
266
+ - 1: monotonically increasing
267
+ - 0: no constraint
268
+ - -1: monotonically decreasing
269
+
270
+ If monotonic_cst is None, no constraints are applied.
271
+
272
+ Monotonicity constraints are not supported for:
273
+ - multiclass classifications (i.e. when `n_classes > 2`),
274
+ - multioutput classifications (i.e. when `n_outputs_ > 1`),
275
+ - classifications trained on data with missing values.
276
+
277
+ The constraints hold over the probability of the positive class.
278
+
279
+ Read more in the :ref:`User Guide <monotonic_cst_gbdt>`.
263
280
  """
264
281
 
265
282
  def __init__( # type: ignore[no-untyped-def]
@@ -283,6 +300,7 @@ class ExtraTreesClassifier(BaseTransformer):
283
300
  class_weight=None,
284
301
  ccp_alpha=0.0,
285
302
  max_samples=None,
303
+ monotonic_cst=None,
286
304
  input_cols: Optional[Union[str, Iterable[str]]] = None,
287
305
  output_cols: Optional[Union[str, Iterable[str]]] = None,
288
306
  label_cols: Optional[Union[str, Iterable[str]]] = None,
@@ -321,7 +339,8 @@ class ExtraTreesClassifier(BaseTransformer):
321
339
  'warm_start':(warm_start, False, False),
322
340
  'class_weight':(class_weight, None, False),
323
341
  'ccp_alpha':(ccp_alpha, 0.0, False),
324
- 'max_samples':(max_samples, None, False),}
342
+ 'max_samples':(max_samples, None, False),
343
+ 'monotonic_cst':(monotonic_cst, None, False),}
325
344
  cleaned_up_init_args = validate_sklearn_args(
326
345
  args=init_args,
327
346
  klass=sklearn.ensemble.ExtraTreesClassifier
@@ -225,7 +225,7 @@ class ExtraTreesRegressor(BaseTransformer):
225
225
  When set to ``True``, reuse the solution of the previous call to fit
226
226
  and add more estimators to the ensemble, otherwise, just fit a whole
227
227
  new forest. See :term:`Glossary <warm_start>` and
228
- :ref:`gradient_boosting_warm_start` for details.
228
+ :ref:`tree_ensemble_warm_start` for details.
229
229
 
230
230
  ccp_alpha: non-negative float, default=0.0
231
231
  Complexity parameter used for Minimal Cost-Complexity Pruning. The
@@ -241,6 +241,20 @@ class ExtraTreesRegressor(BaseTransformer):
241
241
  - If int, then draw `max_samples` samples.
242
242
  - If float, then draw `max_samples * X.shape[0]` samples. Thus,
243
243
  `max_samples` should be in the interval `(0.0, 1.0]`.
244
+
245
+ monotonic_cst: array-like of int of shape (n_features), default=None
246
+ Indicates the monotonicity constraint to enforce on each feature.
247
+ - 1: monotonically increasing
248
+ - 0: no constraint
249
+ - -1: monotonically decreasing
250
+
251
+ If monotonic_cst is None, no constraints are applied.
252
+
253
+ Monotonicity constraints are not supported for:
254
+ - multioutput regressions (i.e. when `n_outputs_ > 1`),
255
+ - regressions trained on data with missing values.
256
+
257
+ Read more in the :ref:`User Guide <monotonic_cst_gbdt>`.
244
258
  """
245
259
 
246
260
  def __init__( # type: ignore[no-untyped-def]
@@ -263,6 +277,7 @@ class ExtraTreesRegressor(BaseTransformer):
263
277
  warm_start=False,
264
278
  ccp_alpha=0.0,
265
279
  max_samples=None,
280
+ monotonic_cst=None,
266
281
  input_cols: Optional[Union[str, Iterable[str]]] = None,
267
282
  output_cols: Optional[Union[str, Iterable[str]]] = None,
268
283
  label_cols: Optional[Union[str, Iterable[str]]] = None,
@@ -300,7 +315,8 @@ class ExtraTreesRegressor(BaseTransformer):
300
315
  'verbose':(verbose, 0, False),
301
316
  'warm_start':(warm_start, False, False),
302
317
  'ccp_alpha':(ccp_alpha, 0.0, False),
303
- 'max_samples':(max_samples, None, False),}
318
+ 'max_samples':(max_samples, None, False),
319
+ 'monotonic_cst':(monotonic_cst, None, False),}
304
320
  cleaned_up_init_args = validate_sklearn_args(
305
321
  args=init_args,
306
322
  klass=sklearn.ensemble.ExtraTreesRegressor
@@ -255,6 +255,8 @@ class GradientBoostingClassifier(BaseTransformer):
255
255
  improving in all of the previous ``n_iter_no_change`` numbers of
256
256
  iterations. The split is stratified.
257
257
  Values must be in the range `[1, inf)`.
258
+ See
259
+ :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_early_stopping.py`.
258
260
 
259
261
  tol: float, default=1e-4
260
262
  Tolerance for the early stopping. When the loss is not improving
@@ -262,6 +262,8 @@ class GradientBoostingRegressor(BaseTransformer):
262
262
  improving in all of the previous ``n_iter_no_change`` numbers of
263
263
  iterations.
264
264
  Values must be in the range `[1, inf)`.
265
+ See
266
+ :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_early_stopping.py`.
265
267
 
266
268
  tol: float, default=1e-4
267
269
  Tolerance for the early stopping. When the loss is not improving
@@ -142,7 +142,15 @@ class HistGradientBoostingClassifier(BaseTransformer):
142
142
  than a few hundred samples, it is recommended to lower this value
143
143
  since only very shallow trees would be built.
144
144
  l2_regularization: float, default=0
145
- The L2 regularization parameter. Use 0 for no regularization.
145
+ The L2 regularization parameter penalizing leaves with small hessians.
146
+ Use ``0`` for no regularization (default).
147
+ max_features: float, default=1.0
148
+ Proportion of randomly chosen features in each and every node split.
149
+ This is a form of regularization, smaller values make the trees weaker
150
+ learners and might prevent overfitting.
151
+ If interaction constraints from `interaction_cst` are present, only allowed
152
+ features are taken into account for the subsampling.
153
+
146
154
  max_bins: int, default=255
147
155
  The maximum number of bins to use for non-missing values. Before
148
156
  training, each feature of the input array `X` is binned into
@@ -159,13 +167,16 @@ class HistGradientBoostingClassifier(BaseTransformer):
159
167
  features.
160
168
  - str array-like: names of categorical features (assuming the training
161
169
  data has feature names).
170
+ - `"from_dtype"`: dataframe columns with dtype "category" are
171
+ considered to be categorical features. The input must be an object
172
+ exposing a ``__dataframe__`` method such as pandas or polars
173
+ DataFrames to use this feature.
162
174
 
163
175
  For each categorical feature, there must be at most `max_bins` unique
164
- categories, and each categorical value must be less then `max_bins - 1`.
165
- Negative values for categorical features are treated as missing values.
166
- All categorical values are converted to floating point numbers.
167
- This means that categorical values of 1.0 and 1 are treated as
168
- the same category.
176
+ categories. Negative values for categorical features encoded as numeric
177
+ dtypes are treated as missing values. All categorical values are
178
+ converted to floating point numbers. This means that categorical values
179
+ of 1.0 and 1 are treated as the same category.
169
180
 
170
181
  Read more in the :ref:`User Guide <categorical_support_gbdt>`.
171
182
 
@@ -261,8 +272,9 @@ class HistGradientBoostingClassifier(BaseTransformer):
261
272
  max_depth=None,
262
273
  min_samples_leaf=20,
263
274
  l2_regularization=0.0,
275
+ max_features=1.0,
264
276
  max_bins=255,
265
- categorical_features=None,
277
+ categorical_features="warn",
266
278
  monotonic_cst=None,
267
279
  interaction_cst=None,
268
280
  warm_start=False,
@@ -302,8 +314,9 @@ class HistGradientBoostingClassifier(BaseTransformer):
302
314
  'max_depth':(max_depth, None, False),
303
315
  'min_samples_leaf':(min_samples_leaf, 20, False),
304
316
  'l2_regularization':(l2_regularization, 0.0, False),
317
+ 'max_features':(max_features, 1.0, False),
305
318
  'max_bins':(max_bins, 255, False),
306
- 'categorical_features':(categorical_features, None, False),
319
+ 'categorical_features':(categorical_features, "warn", False),
307
320
  'monotonic_cst':(monotonic_cst, None, False),
308
321
  'interaction_cst':(interaction_cst, None, False),
309
322
  'warm_start':(warm_start, False, False),
@@ -140,8 +140,15 @@ class HistGradientBoostingRegressor(BaseTransformer):
140
140
  than a few hundred samples, it is recommended to lower this value
141
141
  since only very shallow trees would be built.
142
142
  l2_regularization: float, default=0
143
- The L2 regularization parameter. Use ``0`` for no regularization
144
- (default).
143
+ The L2 regularization parameter penalizing leaves with small hessians.
144
+ Use ``0`` for no regularization (default).
145
+ max_features: float, default=1.0
146
+ Proportion of randomly chosen features in each and every node split.
147
+ This is a form of regularization, smaller values make the trees weaker
148
+ learners and might prevent overfitting.
149
+ If interaction constraints from `interaction_cst` are present, only allowed
150
+ features are taken into account for the subsampling.
151
+
145
152
  max_bins: int, default=255
146
153
  The maximum number of bins to use for non-missing values. Before
147
154
  training, each feature of the input array `X` is binned into
@@ -158,13 +165,16 @@ class HistGradientBoostingRegressor(BaseTransformer):
158
165
  features.
159
166
  - str array-like: names of categorical features (assuming the training
160
167
  data has feature names).
168
+ - `"from_dtype"`: dataframe columns with dtype "category" are
169
+ considered to be categorical features. The input must be an object
170
+ exposing a ``__dataframe__`` method such as pandas or polars
171
+ DataFrames to use this feature.
161
172
 
162
173
  For each categorical feature, there must be at most `max_bins` unique
163
- categories, and each categorical value must be less then `max_bins - 1`.
164
- Negative values for categorical features are treated as missing values.
165
- All categorical values are converted to floating point numbers.
166
- This means that categorical values of 1.0 and 1 are treated as
167
- the same category.
174
+ categories. Negative values for categorical features encoded as numeric
175
+ dtypes are treated as missing values. All categorical values are
176
+ converted to floating point numbers. This means that categorical values
177
+ of 1.0 and 1 are treated as the same category.
168
178
 
169
179
  Read more in the :ref:`User Guide <categorical_support_gbdt>`.
170
180
 
@@ -180,8 +190,6 @@ class HistGradientBoostingRegressor(BaseTransformer):
180
190
  If an array, the features are mapped to constraints by position. See
181
191
  :ref:`monotonic_cst_features_names` for a usage example.
182
192
 
183
- The constraints are only valid for binary classifications and hold
184
- over the probability of the positive class.
185
193
  Read more in the :ref:`User Guide <monotonic_cst_gbdt>`.
186
194
 
187
195
  interaction_cst: {"pairwise", "no_interactions"} or sequence of lists/tuples/sets of int, default=None
@@ -253,8 +261,9 @@ class HistGradientBoostingRegressor(BaseTransformer):
253
261
  max_depth=None,
254
262
  min_samples_leaf=20,
255
263
  l2_regularization=0.0,
264
+ max_features=1.0,
256
265
  max_bins=255,
257
- categorical_features=None,
266
+ categorical_features="warn",
258
267
  monotonic_cst=None,
259
268
  interaction_cst=None,
260
269
  warm_start=False,
@@ -294,8 +303,9 @@ class HistGradientBoostingRegressor(BaseTransformer):
294
303
  'max_depth':(max_depth, None, False),
295
304
  'min_samples_leaf':(min_samples_leaf, 20, False),
296
305
  'l2_regularization':(l2_regularization, 0.0, False),
306
+ 'max_features':(max_features, 1.0, False),
297
307
  'max_bins':(max_bins, 255, False),
298
- 'categorical_features':(categorical_features, None, False),
308
+ 'categorical_features':(categorical_features, "warn", False),
299
309
  'monotonic_cst':(monotonic_cst, None, False),
300
310
  'interaction_cst':(interaction_cst, None, False),
301
311
  'warm_start':(warm_start, False, False),
@@ -215,7 +215,7 @@ class RandomForestClassifier(BaseTransformer):
215
215
  When set to ``True``, reuse the solution of the previous call to fit
216
216
  and add more estimators to the ensemble, otherwise, just fit a whole
217
217
  new forest. See :term:`Glossary <warm_start>` and
218
- :ref:`gradient_boosting_warm_start` for details.
218
+ :ref:`tree_ensemble_warm_start` for details.
219
219
 
220
220
  class_weight: {"balanced", "balanced_subsample"}, dict or list of dicts, default=None
221
221
  Weights associated with classes in the form ``{class_label: weight}``.
@@ -256,6 +256,23 @@ class RandomForestClassifier(BaseTransformer):
256
256
  - If int, then draw `max_samples` samples.
257
257
  - If float, then draw `max(round(n_samples * max_samples), 1)` samples. Thus,
258
258
  `max_samples` should be in the interval `(0.0, 1.0]`.
259
+
260
+ monotonic_cst: array-like of int of shape (n_features), default=None
261
+ Indicates the monotonicity constraint to enforce on each feature.
262
+ - 1: monotonic increase
263
+ - 0: no constraint
264
+ - -1: monotonic decrease
265
+
266
+ If monotonic_cst is None, no constraints are applied.
267
+
268
+ Monotonicity constraints are not supported for:
269
+ - multiclass classifications (i.e. when `n_classes > 2`),
270
+ - multioutput classifications (i.e. when `n_outputs_ > 1`),
271
+ - classifications trained on data with missing values.
272
+
273
+ The constraints hold over the probability of the positive class.
274
+
275
+ Read more in the :ref:`User Guide <monotonic_cst_gbdt>`.
259
276
  """
260
277
 
261
278
  def __init__( # type: ignore[no-untyped-def]
@@ -279,6 +296,7 @@ class RandomForestClassifier(BaseTransformer):
279
296
  class_weight=None,
280
297
  ccp_alpha=0.0,
281
298
  max_samples=None,
299
+ monotonic_cst=None,
282
300
  input_cols: Optional[Union[str, Iterable[str]]] = None,
283
301
  output_cols: Optional[Union[str, Iterable[str]]] = None,
284
302
  label_cols: Optional[Union[str, Iterable[str]]] = None,
@@ -317,7 +335,8 @@ class RandomForestClassifier(BaseTransformer):
317
335
  'warm_start':(warm_start, False, False),
318
336
  'class_weight':(class_weight, None, False),
319
337
  'ccp_alpha':(ccp_alpha, 0.0, False),
320
- 'max_samples':(max_samples, None, False),}
338
+ 'max_samples':(max_samples, None, False),
339
+ 'monotonic_cst':(monotonic_cst, None, False),}
321
340
  cleaned_up_init_args = validate_sklearn_args(
322
341
  args=init_args,
323
342
  klass=sklearn.ensemble.RandomForestClassifier
@@ -221,7 +221,7 @@ class RandomForestRegressor(BaseTransformer):
221
221
  When set to ``True``, reuse the solution of the previous call to fit
222
222
  and add more estimators to the ensemble, otherwise, just fit a whole
223
223
  new forest. See :term:`Glossary <warm_start>` and
224
- :ref:`gradient_boosting_warm_start` for details.
224
+ :ref:`tree_ensemble_warm_start` for details.
225
225
 
226
226
  ccp_alpha: non-negative float, default=0.0
227
227
  Complexity parameter used for Minimal Cost-Complexity Pruning. The
@@ -237,6 +237,20 @@ class RandomForestRegressor(BaseTransformer):
237
237
  - If int, then draw `max_samples` samples.
238
238
  - If float, then draw `max(round(n_samples * max_samples), 1)` samples. Thus,
239
239
  `max_samples` should be in the interval `(0.0, 1.0]`.
240
+
241
+ monotonic_cst: array-like of int of shape (n_features), default=None
242
+ Indicates the monotonicity constraint to enforce on each feature.
243
+ - 1: monotonically increasing
244
+ - 0: no constraint
245
+ - -1: monotonically decreasing
246
+
247
+ If monotonic_cst is None, no constraints are applied.
248
+
249
+ Monotonicity constraints are not supported for:
250
+ - multioutput regressions (i.e. when `n_outputs_ > 1`),
251
+ - regressions trained on data with missing values.
252
+
253
+ Read more in the :ref:`User Guide <monotonic_cst_gbdt>`.
240
254
  """
241
255
 
242
256
  def __init__( # type: ignore[no-untyped-def]
@@ -259,6 +273,7 @@ class RandomForestRegressor(BaseTransformer):
259
273
  warm_start=False,
260
274
  ccp_alpha=0.0,
261
275
  max_samples=None,
276
+ monotonic_cst=None,
262
277
  input_cols: Optional[Union[str, Iterable[str]]] = None,
263
278
  output_cols: Optional[Union[str, Iterable[str]]] = None,
264
279
  label_cols: Optional[Union[str, Iterable[str]]] = None,
@@ -296,7 +311,8 @@ class RandomForestRegressor(BaseTransformer):
296
311
  'verbose':(verbose, 0, False),
297
312
  'warm_start':(warm_start, False, False),
298
313
  'ccp_alpha':(ccp_alpha, 0.0, False),
299
- 'max_samples':(max_samples, None, False),}
314
+ 'max_samples':(max_samples, None, False),
315
+ 'monotonic_cst':(monotonic_cst, None, False),}
300
316
  cleaned_up_init_args = validate_sklearn_args(
301
317
  args=init_args,
302
318
  klass=sklearn.ensemble.RandomForestRegressor
@@ -116,7 +116,8 @@ class GenericUnivariateSelect(BaseTransformer):
116
116
  a single array scores.
117
117
 
118
118
  mode: {'percentile', 'k_best', 'fpr', 'fdr', 'fwe'}, default='percentile'
119
- Feature selection mode.
119
+ Feature selection mode. Note that the `'percentile'` and `'kbest'`
120
+ modes are supporting unsupervised feature selection (when `y` is `None`).
120
121
 
121
122
  param: "all", float or int, default=1e-5
122
123
  Parameter of the corresponding mode.
@@ -154,9 +154,11 @@ class SequentialFeatureSelector(BaseTransformer):
154
154
  - An iterable yielding (train, test) splits as arrays of indices.
155
155
 
156
156
  For integer/None inputs, if the estimator is a classifier and ``y`` is
157
- either binary or multiclass, :class:`StratifiedKFold` is used. In all
158
- other cases, :class:`KFold` is used. These splitters are instantiated
159
- with `shuffle=False` so the splits will be the same across calls.
157
+ either binary or multiclass,
158
+ :class:`~sklearn.model_selection.StratifiedKFold` is used. In all other
159
+ cases, :class:`~sklearn.model_selection.KFold` is used. These splitters
160
+ are instantiated with `shuffle=False` so the splits will be the same
161
+ across calls.
160
162
 
161
163
  Refer :ref:`User Guide <cross_validation>` for the various
162
164
  cross-validation strategies that can be used here.
@@ -121,7 +121,7 @@ class KernelRidge(BaseTransformer):
121
121
 
122
122
  kernel: str or callable, default="linear"
123
123
  Kernel mapping used internally. This parameter is directly passed to
124
- :class:`~sklearn.metrics.pairwise.pairwise_kernel`.
124
+ :class:`~sklearn.metrics.pairwise.pairwise_kernels`.
125
125
  If `kernel` is a string, it must be one of the metrics
126
126
  in `pairwise.PAIRWISE_KERNEL_FUNCTIONS` or "precomputed".
127
127
  If `kernel` is "precomputed", X is assumed to be a kernel matrix.
@@ -139,7 +139,7 @@ class KernelRidge(BaseTransformer):
139
139
  the kernel; see the documentation for sklearn.metrics.pairwise.
140
140
  Ignored by other kernels.
141
141
 
142
- degree: int, default=3
142
+ degree: float, default=3
143
143
  Degree of the polynomial kernel. Ignored by other kernels.
144
144
 
145
145
  coef0: float, default=1
@@ -109,8 +109,8 @@ class ARDRegression(BaseTransformer):
109
109
  drop_input_cols: Optional[bool], default=False
110
110
  If set, the response of predict(), transform() methods will not contain input columns.
111
111
 
112
- max_iter: int, default=None
113
- Maximum number of iterations. If `None`, it corresponds to `max_iter=300`.
112
+ max_iter: int, default=300
113
+ Maximum number of iterations.
114
114
 
115
115
  tol: float, default=1e-3
116
116
  Stop the algorithm if w has converged.
@@ -148,15 +148,12 @@ class ARDRegression(BaseTransformer):
148
148
 
149
149
  verbose: bool, default=False
150
150
  Verbose mode when fitting the model.
151
-
152
- n_iter: int
153
- Maximum number of iterations.
154
151
  """
155
152
 
156
153
  def __init__( # type: ignore[no-untyped-def]
157
154
  self,
158
155
  *,
159
- max_iter=None,
156
+ max_iter=300,
160
157
  tol=0.001,
161
158
  alpha_1=1e-06,
162
159
  alpha_2=1e-06,
@@ -167,7 +164,6 @@ class ARDRegression(BaseTransformer):
167
164
  fit_intercept=True,
168
165
  copy_X=True,
169
166
  verbose=False,
170
- n_iter="deprecated",
171
167
  input_cols: Optional[Union[str, Iterable[str]]] = None,
172
168
  output_cols: Optional[Union[str, Iterable[str]]] = None,
173
169
  label_cols: Optional[Union[str, Iterable[str]]] = None,
@@ -189,7 +185,7 @@ class ARDRegression(BaseTransformer):
189
185
 
190
186
  self._deps = list(deps)
191
187
 
192
- init_args = {'max_iter':(max_iter, None, False),
188
+ init_args = {'max_iter':(max_iter, 300, False),
193
189
  'tol':(tol, 0.001, False),
194
190
  'alpha_1':(alpha_1, 1e-06, False),
195
191
  'alpha_2':(alpha_2, 1e-06, False),
@@ -199,8 +195,7 @@ class ARDRegression(BaseTransformer):
199
195
  'threshold_lambda':(threshold_lambda, 10000.0, False),
200
196
  'fit_intercept':(fit_intercept, True, False),
201
197
  'copy_X':(copy_X, True, False),
202
- 'verbose':(verbose, False, False),
203
- 'n_iter':(n_iter, "deprecated", False),}
198
+ 'verbose':(verbose, False, False),}
204
199
  cleaned_up_init_args = validate_sklearn_args(
205
200
  args=init_args,
206
201
  klass=sklearn.linear_model.ARDRegression
@@ -109,10 +109,9 @@ class BayesianRidge(BaseTransformer):
109
109
  drop_input_cols: Optional[bool], default=False
110
110
  If set, the response of predict(), transform() methods will not contain input columns.
111
111
 
112
- max_iter: int, default=None
112
+ max_iter: int, default=300
113
113
  Maximum number of iterations over the complete dataset before
114
- stopping independently of any early stopping criterion. If `None`, it
115
- corresponds to `max_iter=300`.
114
+ stopping independently of any early stopping criterion.
116
115
 
117
116
  tol: float, default=1e-3
118
117
  Stop the algorithm if w has converged.
@@ -157,15 +156,12 @@ class BayesianRidge(BaseTransformer):
157
156
 
158
157
  verbose: bool, default=False
159
158
  Verbose mode when fitting the model.
160
-
161
- n_iter: int
162
- Maximum number of iterations. Should be greater than or equal to 1.
163
159
  """
164
160
 
165
161
  def __init__( # type: ignore[no-untyped-def]
166
162
  self,
167
163
  *,
168
- max_iter=None,
164
+ max_iter=300,
169
165
  tol=0.001,
170
166
  alpha_1=1e-06,
171
167
  alpha_2=1e-06,
@@ -177,7 +173,6 @@ class BayesianRidge(BaseTransformer):
177
173
  fit_intercept=True,
178
174
  copy_X=True,
179
175
  verbose=False,
180
- n_iter="deprecated",
181
176
  input_cols: Optional[Union[str, Iterable[str]]] = None,
182
177
  output_cols: Optional[Union[str, Iterable[str]]] = None,
183
178
  label_cols: Optional[Union[str, Iterable[str]]] = None,
@@ -199,7 +194,7 @@ class BayesianRidge(BaseTransformer):
199
194
 
200
195
  self._deps = list(deps)
201
196
 
202
- init_args = {'max_iter':(max_iter, None, False),
197
+ init_args = {'max_iter':(max_iter, 300, False),
203
198
  'tol':(tol, 0.001, False),
204
199
  'alpha_1':(alpha_1, 1e-06, False),
205
200
  'alpha_2':(alpha_2, 1e-06, False),
@@ -210,8 +205,7 @@ class BayesianRidge(BaseTransformer):
210
205
  'compute_score':(compute_score, False, False),
211
206
  'fit_intercept':(fit_intercept, True, False),
212
207
  'copy_X':(copy_X, True, False),
213
- 'verbose':(verbose, False, False),
214
- 'n_iter':(n_iter, "deprecated", False),}
208
+ 'verbose':(verbose, False, False),}
215
209
  cleaned_up_init_args = validate_sklearn_args(
216
210
  args=init_args,
217
211
  klass=sklearn.linear_model.BayesianRidge
@@ -131,6 +131,9 @@ class ElasticNet(BaseTransformer):
131
131
  Whether to use a precomputed Gram matrix to speed up
132
132
  calculations. The Gram matrix can also be passed as argument.
133
133
  For sparse input this option is always ``False`` to preserve sparsity.
134
+ Check :ref:`an example on how to use a precomputed Gram Matrix in ElasticNet
135
+ <sphx_glr_auto_examples_linear_model_plot_elastic_net_precomputed_gram_matrix_with_weighted_samples.py>`
136
+ for details.
134
137
 
135
138
  max_iter: int, default=1000
136
139
  The maximum number of iterations.
@@ -160,7 +160,7 @@ class ElasticNetCV(BaseTransformer):
160
160
  - :term:`CV splitter`,
161
161
  - An iterable yielding (train, test) splits as arrays of indices.
162
162
 
163
- For int/None inputs, :class:`KFold` is used.
163
+ For int/None inputs, :class:`~sklearn.model_selection.KFold` is used.
164
164
 
165
165
  Refer :ref:`User Guide <cross_validation>` for the various
166
166
  cross-validation strategies that can be used here.
@@ -117,14 +117,6 @@ class Lars(BaseTransformer):
117
117
  verbose: bool or int, default=False
118
118
  Sets the verbosity amount.
119
119
 
120
- normalize: bool, default=False
121
- This parameter is ignored when ``fit_intercept`` is set to False.
122
- If True, the regressors X will be normalized before regression by
123
- subtracting the mean and dividing by the l2-norm.
124
- If you wish to standardize, please use
125
- :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``
126
- on an estimator with ``normalize=False``.
127
-
128
120
  precompute: bool, 'auto' or array-like , default='auto'
129
121
  Whether to use a precomputed Gram matrix to speed up
130
122
  calculations. If set to ``'auto'`` let us decide. The Gram
@@ -165,7 +157,6 @@ class Lars(BaseTransformer):
165
157
  *,
166
158
  fit_intercept=True,
167
159
  verbose=False,
168
- normalize="deprecated",
169
160
  precompute="auto",
170
161
  n_nonzero_coefs=500,
171
162
  eps=2.220446049250313e-16,
@@ -196,7 +187,6 @@ class Lars(BaseTransformer):
196
187
 
197
188
  init_args = {'fit_intercept':(fit_intercept, True, False),
198
189
  'verbose':(verbose, False, False),
199
- 'normalize':(normalize, "deprecated", False),
200
190
  'precompute':(precompute, "auto", False),
201
191
  'n_nonzero_coefs':(n_nonzero_coefs, 500, False),
202
192
  'eps':(eps, 2.220446049250313e-16, False),
@@ -120,14 +120,6 @@ class LarsCV(BaseTransformer):
120
120
  max_iter: int, default=500
121
121
  Maximum number of iterations to perform.
122
122
 
123
- normalize: bool, default=False
124
- This parameter is ignored when ``fit_intercept`` is set to False.
125
- If True, the regressors X will be normalized before regression by
126
- subtracting the mean and dividing by the l2-norm.
127
- If you wish to standardize, please use
128
- :class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``
129
- on an estimator with ``normalize=False``.
130
-
131
123
  precompute: bool, 'auto' or array-like , default='auto'
132
124
  Whether to use a precomputed Gram matrix to speed up
133
125
  calculations. If set to ``'auto'`` let us decide. The Gram matrix
@@ -142,7 +134,7 @@ class LarsCV(BaseTransformer):
142
134
  - :term:`CV splitter`,
143
135
  - An iterable yielding (train, test) splits as arrays of indices.
144
136
 
145
- For integer/None inputs, :class:`KFold` is used.
137
+ For integer/None inputs, :class:`~sklearn.model_selection.KFold` is used.
146
138
 
147
139
  Refer :ref:`User Guide <cross_validation>` for the various
148
140
  cross-validation strategies that can be used here.
@@ -174,7 +166,6 @@ class LarsCV(BaseTransformer):
174
166
  fit_intercept=True,
175
167
  verbose=False,
176
168
  max_iter=500,
177
- normalize="deprecated",
178
169
  precompute="auto",
179
170
  cv=None,
180
171
  max_n_alphas=1000,
@@ -205,7 +196,6 @@ class LarsCV(BaseTransformer):
205
196
  init_args = {'fit_intercept':(fit_intercept, True, False),
206
197
  'verbose':(verbose, False, False),
207
198
  'max_iter':(max_iter, 500, False),
208
- 'normalize':(normalize, "deprecated", False),
209
199
  'precompute':(precompute, "auto", False),
210
200
  'cv':(cv, None, False),
211
201
  'max_n_alphas':(max_n_alphas, 1000, False),
@@ -151,7 +151,7 @@ class LassoCV(BaseTransformer):
151
151
  - :term:`CV splitter`,
152
152
  - An iterable yielding (train, test) splits as arrays of indices.
153
153
 
154
- For int/None inputs, :class:`KFold` is used.
154
+ For int/None inputs, :class:`~sklearn.model_selection.KFold` is used.
155
155
 
156
156
  Refer :ref:`User Guide <cross_validation>` for the various
157
157
  cross-validation strategies that can be used here.