snowflake-ml-python 1.6.4__py3-none-any.whl → 1.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (176) hide show
  1. snowflake/cortex/__init__.py +4 -0
  2. snowflake/cortex/_complete.py +107 -64
  3. snowflake/cortex/_finetune.py +273 -0
  4. snowflake/cortex/_sse_client.py +91 -28
  5. snowflake/cortex/_util.py +30 -1
  6. snowflake/ml/_internal/telemetry.py +4 -2
  7. snowflake/ml/_internal/type_utils.py +3 -3
  8. snowflake/ml/_internal/utils/import_utils.py +31 -0
  9. snowflake/ml/_internal/utils/snowpark_dataframe_utils.py +13 -0
  10. snowflake/ml/data/__init__.py +5 -0
  11. snowflake/ml/data/_internal/arrow_ingestor.py +8 -0
  12. snowflake/ml/data/data_connector.py +1 -1
  13. snowflake/ml/data/torch_utils.py +33 -14
  14. snowflake/ml/feature_store/examples/airline_features/features/plane_features.py +5 -3
  15. snowflake/ml/feature_store/examples/airline_features/features/weather_features.py +7 -5
  16. snowflake/ml/feature_store/examples/citibike_trip_features/features/station_feature.py +4 -2
  17. snowflake/ml/feature_store/examples/citibike_trip_features/features/trip_feature.py +3 -1
  18. snowflake/ml/feature_store/examples/example_helper.py +6 -3
  19. snowflake/ml/feature_store/examples/new_york_taxi_features/features/location_features.py +4 -2
  20. snowflake/ml/feature_store/examples/new_york_taxi_features/features/trip_features.py +4 -2
  21. snowflake/ml/feature_store/examples/wine_quality_features/features/managed_wine_features.py +3 -1
  22. snowflake/ml/feature_store/examples/wine_quality_features/features/static_wine_features.py +3 -1
  23. snowflake/ml/feature_store/feature_store.py +1 -2
  24. snowflake/ml/feature_store/feature_view.py +5 -1
  25. snowflake/ml/model/_client/model/model_version_impl.py +145 -11
  26. snowflake/ml/model/_client/ops/model_ops.py +56 -16
  27. snowflake/ml/model/_client/ops/service_ops.py +46 -30
  28. snowflake/ml/model/_client/service/model_deployment_spec.py +19 -8
  29. snowflake/ml/model/_client/service/model_deployment_spec_schema.py +3 -1
  30. snowflake/ml/model/_client/sql/service.py +25 -1
  31. snowflake/ml/model/_model_composer/model_composer.py +2 -0
  32. snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +4 -0
  33. snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +1 -0
  34. snowflake/ml/model/_model_composer/model_method/infer_function.py_template +2 -1
  35. snowflake/ml/model/_model_composer/model_method/model_method.py +1 -1
  36. snowflake/ml/model/_packager/model_env/model_env.py +12 -0
  37. snowflake/ml/model/_packager/model_handlers/_utils.py +6 -2
  38. snowflake/ml/model/_packager/model_handlers/catboost.py +4 -7
  39. snowflake/ml/model/_packager/model_handlers/custom.py +5 -1
  40. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +10 -1
  41. snowflake/ml/model/_packager/model_handlers/lightgbm.py +5 -7
  42. snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +8 -1
  43. snowflake/ml/model/_packager/model_handlers/sklearn.py +51 -7
  44. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +8 -66
  45. snowflake/ml/model/_packager/model_handlers/tensorflow.py +23 -6
  46. snowflake/ml/model/_packager/model_handlers/torchscript.py +14 -14
  47. snowflake/ml/model/_packager/model_handlers/xgboost.py +10 -40
  48. snowflake/ml/model/_packager/model_meta/_packaging_requirements.py +2 -3
  49. snowflake/ml/model/_packager/model_meta/model_meta_schema.py +5 -0
  50. snowflake/ml/model/_packager/model_packager.py +0 -11
  51. snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +2 -10
  52. snowflake/ml/model/_packager/model_runtime/model_runtime.py +4 -9
  53. snowflake/ml/model/_packager/{model_handlers/model_objective_utils.py → model_task/model_task_utils.py} +14 -26
  54. snowflake/ml/model/_signatures/core.py +63 -16
  55. snowflake/ml/model/_signatures/pandas_handler.py +87 -27
  56. snowflake/ml/model/_signatures/pytorch_handler.py +2 -2
  57. snowflake/ml/model/_signatures/snowpark_handler.py +2 -1
  58. snowflake/ml/model/_signatures/tensorflow_handler.py +2 -2
  59. snowflake/ml/model/_signatures/utils.py +4 -0
  60. snowflake/ml/model/custom_model.py +47 -7
  61. snowflake/ml/model/model_signature.py +40 -9
  62. snowflake/ml/model/type_hints.py +9 -1
  63. snowflake/ml/modeling/_internal/estimator_utils.py +13 -0
  64. snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +7 -2
  65. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +16 -5
  66. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +8 -2
  67. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +9 -3
  68. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +1 -8
  69. snowflake/ml/modeling/cluster/agglomerative_clustering.py +17 -19
  70. snowflake/ml/modeling/cluster/dbscan.py +5 -2
  71. snowflake/ml/modeling/cluster/feature_agglomeration.py +7 -19
  72. snowflake/ml/modeling/cluster/k_means.py +14 -19
  73. snowflake/ml/modeling/cluster/mini_batch_k_means.py +3 -3
  74. snowflake/ml/modeling/cluster/optics.py +6 -6
  75. snowflake/ml/modeling/cluster/spectral_clustering.py +4 -3
  76. snowflake/ml/modeling/compose/column_transformer.py +15 -5
  77. snowflake/ml/modeling/compose/transformed_target_regressor.py +7 -6
  78. snowflake/ml/modeling/covariance/elliptic_envelope.py +1 -1
  79. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +1 -1
  80. snowflake/ml/modeling/covariance/min_cov_det.py +2 -2
  81. snowflake/ml/modeling/covariance/oas.py +1 -1
  82. snowflake/ml/modeling/decomposition/kernel_pca.py +2 -2
  83. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +5 -12
  84. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +5 -12
  85. snowflake/ml/modeling/decomposition/pca.py +28 -15
  86. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +6 -0
  87. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +1 -12
  88. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +1 -11
  89. snowflake/ml/modeling/ensemble/bagging_classifier.py +1 -8
  90. snowflake/ml/modeling/ensemble/bagging_regressor.py +1 -8
  91. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +21 -2
  92. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +18 -2
  93. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +2 -0
  94. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +2 -0
  95. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +21 -8
  96. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +21 -11
  97. snowflake/ml/modeling/ensemble/random_forest_classifier.py +21 -2
  98. snowflake/ml/modeling/ensemble/random_forest_regressor.py +18 -2
  99. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +2 -1
  100. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +5 -3
  101. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +2 -2
  102. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +2 -4
  103. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +2 -4
  104. snowflake/ml/modeling/linear_model/ard_regression.py +5 -10
  105. snowflake/ml/modeling/linear_model/bayesian_ridge.py +5 -11
  106. snowflake/ml/modeling/linear_model/elastic_net.py +3 -0
  107. snowflake/ml/modeling/linear_model/elastic_net_cv.py +1 -1
  108. snowflake/ml/modeling/linear_model/lars.py +0 -10
  109. snowflake/ml/modeling/linear_model/lars_cv.py +1 -11
  110. snowflake/ml/modeling/linear_model/lasso_cv.py +1 -1
  111. snowflake/ml/modeling/linear_model/lasso_lars.py +0 -10
  112. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +1 -11
  113. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +0 -10
  114. snowflake/ml/modeling/linear_model/logistic_regression.py +28 -22
  115. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +30 -24
  116. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +1 -1
  117. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +1 -1
  118. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +4 -13
  119. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +4 -4
  120. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +1 -1
  121. snowflake/ml/modeling/linear_model/perceptron.py +3 -3
  122. snowflake/ml/modeling/linear_model/ransac_regressor.py +3 -2
  123. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +14 -6
  124. snowflake/ml/modeling/linear_model/ridge_cv.py +17 -11
  125. snowflake/ml/modeling/linear_model/sgd_classifier.py +2 -2
  126. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +5 -1
  127. snowflake/ml/modeling/linear_model/sgd_regressor.py +12 -3
  128. snowflake/ml/modeling/manifold/isomap.py +1 -1
  129. snowflake/ml/modeling/manifold/mds.py +3 -3
  130. snowflake/ml/modeling/manifold/tsne.py +10 -4
  131. snowflake/ml/modeling/metrics/classification.py +12 -16
  132. snowflake/ml/modeling/metrics/ranking.py +3 -3
  133. snowflake/ml/modeling/metrics/regression.py +3 -3
  134. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +3 -3
  135. snowflake/ml/modeling/naive_bayes/categorical_nb.py +3 -3
  136. snowflake/ml/modeling/naive_bayes/complement_nb.py +3 -3
  137. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +3 -3
  138. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +10 -4
  139. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +5 -2
  140. snowflake/ml/modeling/neighbors/local_outlier_factor.py +2 -2
  141. snowflake/ml/modeling/neighbors/nearest_centroid.py +7 -14
  142. snowflake/ml/modeling/neighbors/nearest_neighbors.py +1 -1
  143. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +6 -1
  144. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +1 -1
  145. snowflake/ml/modeling/neural_network/mlp_classifier.py +7 -1
  146. snowflake/ml/modeling/neural_network/mlp_regressor.py +3 -0
  147. snowflake/ml/modeling/pipeline/pipeline.py +16 -14
  148. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +8 -4
  149. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +9 -7
  150. snowflake/ml/modeling/svm/linear_svc.py +25 -16
  151. snowflake/ml/modeling/svm/linear_svr.py +23 -17
  152. snowflake/ml/modeling/svm/nu_svc.py +5 -3
  153. snowflake/ml/modeling/svm/nu_svr.py +3 -1
  154. snowflake/ml/modeling/svm/svc.py +9 -5
  155. snowflake/ml/modeling/svm/svr.py +3 -1
  156. snowflake/ml/modeling/tree/decision_tree_classifier.py +21 -2
  157. snowflake/ml/modeling/tree/decision_tree_regressor.py +18 -2
  158. snowflake/ml/modeling/tree/extra_tree_classifier.py +28 -9
  159. snowflake/ml/modeling/tree/extra_tree_regressor.py +18 -2
  160. snowflake/ml/monitoring/_client/model_monitor_sql_client.py +448 -0
  161. snowflake/ml/monitoring/_manager/model_monitor_manager.py +238 -0
  162. snowflake/ml/monitoring/entities/model_monitor_config.py +10 -10
  163. snowflake/ml/monitoring/model_monitor.py +37 -0
  164. snowflake/ml/registry/_manager/model_manager.py +15 -1
  165. snowflake/ml/registry/registry.py +32 -37
  166. snowflake/ml/version.py +1 -1
  167. {snowflake_ml_python-1.6.4.dist-info → snowflake_ml_python-1.7.1.dist-info}/METADATA +104 -12
  168. {snowflake_ml_python-1.6.4.dist-info → snowflake_ml_python-1.7.1.dist-info}/RECORD +172 -171
  169. {snowflake_ml_python-1.6.4.dist-info → snowflake_ml_python-1.7.1.dist-info}/WHEEL +1 -1
  170. snowflake/ml/monitoring/_client/model_monitor.py +0 -126
  171. snowflake/ml/monitoring/_client/model_monitor_manager.py +0 -361
  172. snowflake/ml/monitoring/_client/monitor_sql_client.py +0 -1335
  173. snowflake/ml/monitoring/entities/model_monitor_interval.py +0 -46
  174. /snowflake/ml/monitoring/{_client/model_monitor_version.py → model_monitor_version.py} +0 -0
  175. {snowflake_ml_python-1.6.4.dist-info → snowflake_ml_python-1.7.1.dist-info}/LICENSE.txt +0 -0
  176. {snowflake_ml_python-1.6.4.dist-info → snowflake_ml_python-1.7.1.dist-info}/top_level.txt +0 -0
@@ -5,16 +5,20 @@ from typing import Any, Dict, Iterable, List, Optional, Union
5
5
 
6
6
  import numpy as np
7
7
  import pandas as pd
8
- from sklearn import preprocessing, utils as sklearn_utils
8
+ from sklearn import preprocessing
9
9
 
10
10
  from snowflake import snowpark
11
11
  from snowflake.ml._internal import telemetry, type_utils
12
12
  from snowflake.ml._internal.exceptions import error_codes, exceptions
13
- from snowflake.ml._internal.utils import identifier
13
+ from snowflake.ml._internal.utils import identifier, import_utils
14
14
  from snowflake.ml.modeling.framework import _utils, base
15
15
  from snowflake.snowpark import functions as F, types as T
16
16
  from snowflake.snowpark._internal import utils as snowpark_utils
17
17
 
18
+ is_scalar_nan = import_utils.import_with_fallbacks(
19
+ "sklearn.utils.is_scalar_nan", "sklearn.utils._missing.is_scalar_nan"
20
+ )
21
+
18
22
  _COLUMN_NAME = "_COLUMN_NAME"
19
23
  _CATEGORY = "_CATEGORY"
20
24
  _INDEX = "_INDEX"
@@ -440,7 +444,7 @@ class OrdinalEncoder(base.BaseTransformer):
440
444
  used to encode a known category.
441
445
  """
442
446
  if self._missing_indices:
443
- if not sklearn_utils.is_scalar_nan(self.encoded_missing_value):
447
+ if not is_scalar_nan(self.encoded_missing_value):
444
448
  # Features are invalid when they contain a missing category
445
449
  # and encoded_missing_value was already used to encode a
446
450
  # known category
@@ -624,9 +628,7 @@ class OrdinalEncoder(base.BaseTransformer):
624
628
  )
625
629
 
626
630
  if self.handle_unknown == "use_encoded_value":
627
- if not (
628
- sklearn_utils.is_scalar_nan(self.unknown_value) or isinstance(self.unknown_value, numbers.Integral)
629
- ):
631
+ if not (is_scalar_nan(self.unknown_value) or isinstance(self.unknown_value, numbers.Integral)):
630
632
  raise exceptions.SnowflakeMLException(
631
633
  error_code=error_codes.INVALID_ATTRIBUTE,
632
634
  original_exception=TypeError(
@@ -663,7 +665,7 @@ class OrdinalEncoder(base.BaseTransformer):
663
665
 
664
666
  if self.handle_unknown == "use_encoded_value":
665
667
  # left outer join has already filled unknown values with null
666
- if not (self.unknown_value is None or sklearn_utils.is_scalar_nan(self.unknown_value)):
668
+ if not (self.unknown_value is None or is_scalar_nan(self.unknown_value)):
667
669
  transformed_dataset = transformed_dataset.na.fill(self.unknown_value, self.output_cols)
668
670
 
669
671
  return transformed_dataset
@@ -120,12 +120,12 @@ class LinearSVC(BaseTransformer):
120
120
  square of the hinge loss. The combination of ``penalty='l1'``
121
121
  and ``loss='hinge'`` is not supported.
122
122
 
123
- dual: "auto" or bool, default=True
123
+ dual: "auto" or bool, default="auto"
124
124
  Select the algorithm to either solve the dual or primal
125
125
  optimization problem. Prefer dual=False when n_samples > n_features.
126
126
  `dual="auto"` will choose the value of the parameter automatically,
127
127
  based on the values of `n_samples`, `n_features`, `loss`, `multi_class`
128
- and `penalty`. If `n_samples` < `n_features` and optmizer supports
128
+ and `penalty`. If `n_samples` < `n_features` and optimizer supports
129
129
  chosen `loss`, `multi_class` and `penalty`, then dual will be set to True,
130
130
  otherwise it will be set to False.
131
131
 
@@ -135,6 +135,9 @@ class LinearSVC(BaseTransformer):
135
135
  C: float, default=1.0
136
136
  Regularization parameter. The strength of the regularization is
137
137
  inversely proportional to C. Must be strictly positive.
138
+ For an intuitive visualization of the effects of scaling
139
+ the regularization parameter C, see
140
+ :ref:`sphx_glr_auto_examples_svm_plot_svm_scale_c.py`.
138
141
 
139
142
  multi_class: {'ovr', 'crammer_singer'}, default='ovr'
140
143
  Determines the multi-class strategy if `y` contains more than
@@ -148,20 +151,26 @@ class LinearSVC(BaseTransformer):
148
151
  will be ignored.
149
152
 
150
153
  fit_intercept: bool, default=True
151
- Whether to calculate the intercept for this model. If set
152
- to false, no intercept will be used in calculations
153
- (i.e. data is expected to be already centered).
154
+ Whether or not to fit an intercept. If set to True, the feature vector
155
+ is extended to include an intercept term: `[x_1, ..., x_n, 1]`, where
156
+ 1 corresponds to the intercept. If set to False, no intercept will be
157
+ used in calculations (i.e. data is expected to be already centered).
154
158
 
155
159
  intercept_scaling: float, default=1.0
156
- When self.fit_intercept is True, instance vector x becomes
157
- ``[x, self.intercept_scaling]``,
158
- i.e. a "synthetic" feature with constant value equals to
159
- intercept_scaling is appended to the instance vector.
160
- The intercept becomes intercept_scaling * synthetic feature weight
161
- Note! the synthetic feature weight is subject to l1/l2 regularization
162
- as all other features.
163
- To lessen the effect of regularization on synthetic feature weight
164
- (and therefore on the intercept) intercept_scaling has to be increased.
160
+ When `fit_intercept` is True, the instance vector x becomes ``[x_1,
161
+ ..., x_n, intercept_scaling]``, i.e. a "synthetic" feature with a
162
+ constant value equal to `intercept_scaling` is appended to the instance
163
+ vector. The intercept becomes intercept_scaling * synthetic feature
164
+ weight. Note that liblinear internally penalizes the intercept,
165
+ treating it like any other term in the feature vector. To reduce the
166
+ impact of the regularization on the intercept, the `intercept_scaling`
167
+ parameter can be set to a value greater than 1; the higher the value of
168
+ `intercept_scaling`, the lower the impact of regularization on it.
169
+ Then, the weights become `[w_x_1, ..., w_x_n,
170
+ w_intercept*intercept_scaling]`, where `w_x_1, ..., w_x_n` represent
171
+ the feature weights and the intercept weight is scaled by
172
+ `intercept_scaling`. This scaling allows the intercept term to have a
173
+ different regularization behavior compared to the other features.
165
174
 
166
175
  class_weight: dict or 'balanced', default=None
167
176
  Set the parameter C of class i to ``class_weight[i]*C`` for
@@ -193,7 +202,7 @@ class LinearSVC(BaseTransformer):
193
202
  *,
194
203
  penalty="l2",
195
204
  loss="squared_hinge",
196
- dual="warn",
205
+ dual="auto",
197
206
  tol=0.0001,
198
207
  C=1.0,
199
208
  multi_class="ovr",
@@ -226,7 +235,7 @@ class LinearSVC(BaseTransformer):
226
235
 
227
236
  init_args = {'penalty':(penalty, "l2", False),
228
237
  'loss':(loss, "squared_hinge", False),
229
- 'dual':(dual, "warn", False),
238
+ 'dual':(dual, "auto", False),
230
239
  'tol':(tol, 0.0001, False),
231
240
  'C':(C, 1.0, False),
232
241
  'multi_class':(multi_class, "ovr", False),
@@ -127,27 +127,33 @@ class LinearSVR(BaseTransformer):
127
127
  loss ('squared_epsilon_insensitive') is the L2 loss.
128
128
 
129
129
  fit_intercept: bool, default=True
130
- Whether to calculate the intercept for this model. If set
131
- to false, no intercept will be used in calculations
132
- (i.e. data is expected to be already centered).
130
+ Whether or not to fit an intercept. If set to True, the feature vector
131
+ is extended to include an intercept term: `[x_1, ..., x_n, 1]`, where
132
+ 1 corresponds to the intercept. If set to False, no intercept will be
133
+ used in calculations (i.e. data is expected to be already centered).
133
134
 
134
135
  intercept_scaling: float, default=1.0
135
- When self.fit_intercept is True, instance vector x becomes
136
- [x, self.intercept_scaling],
137
- i.e. a "synthetic" feature with constant value equals to
138
- intercept_scaling is appended to the instance vector.
139
- The intercept becomes intercept_scaling * synthetic feature weight
140
- Note! the synthetic feature weight is subject to l1/l2 regularization
141
- as all other features.
142
- To lessen the effect of regularization on synthetic feature weight
143
- (and therefore on the intercept) intercept_scaling has to be increased.
144
-
145
- dual: "auto" or bool, default=True
136
+ When `fit_intercept` is True, the instance vector x becomes `[x_1, ...,
137
+ x_n, intercept_scaling]`, i.e. a "synthetic" feature with a constant
138
+ value equal to `intercept_scaling` is appended to the instance vector.
139
+ The intercept becomes intercept_scaling * synthetic feature weight.
140
+ Note that liblinear internally penalizes the intercept, treating it
141
+ like any other term in the feature vector. To reduce the impact of the
142
+ regularization on the intercept, the `intercept_scaling` parameter can
143
+ be set to a value greater than 1; the higher the value of
144
+ `intercept_scaling`, the lower the impact of regularization on it.
145
+ Then, the weights become `[w_x_1, ..., w_x_n,
146
+ w_intercept*intercept_scaling]`, where `w_x_1, ..., w_x_n` represent
147
+ the feature weights and the intercept weight is scaled by
148
+ `intercept_scaling`. This scaling allows the intercept term to have a
149
+ different regularization behavior compared to the other features.
150
+
151
+ dual: "auto" or bool, default="auto"
146
152
  Select the algorithm to either solve the dual or primal
147
153
  optimization problem. Prefer dual=False when n_samples > n_features.
148
154
  `dual="auto"` will choose the value of the parameter automatically,
149
155
  based on the values of `n_samples`, `n_features` and `loss`. If
150
- `n_samples` < `n_features` and optmizer supports chosen `loss`,
156
+ `n_samples` < `n_features` and optimizer supports chosen `loss`,
151
157
  then dual will be set to True, otherwise it will be set to False.
152
158
 
153
159
  verbose: int, default=0
@@ -173,7 +179,7 @@ class LinearSVR(BaseTransformer):
173
179
  loss="epsilon_insensitive",
174
180
  fit_intercept=True,
175
181
  intercept_scaling=1.0,
176
- dual="warn",
182
+ dual="auto",
177
183
  verbose=0,
178
184
  random_state=None,
179
185
  max_iter=1000,
@@ -204,7 +210,7 @@ class LinearSVR(BaseTransformer):
204
210
  'loss':(loss, "epsilon_insensitive", False),
205
211
  'fit_intercept':(fit_intercept, True, False),
206
212
  'intercept_scaling':(intercept_scaling, 1.0, False),
207
- 'dual':(dual, "warn", False),
213
+ 'dual':(dual, "auto", False),
208
214
  'verbose':(verbose, 0, False),
209
215
  'random_state':(random_state, None, False),
210
216
  'max_iter':(max_iter, 1000, False),}
@@ -115,9 +115,11 @@ class NuSVC(BaseTransformer):
115
115
  Should be in the interval (0, 1].
116
116
 
117
117
  kernel: {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable, default='rbf'
118
- Specifies the kernel type to be used in the algorithm.
119
- If none is given, 'rbf' will be used. If a callable is given it is
120
- used to precompute the kernel matrix.
118
+ Specifies the kernel type to be used in the algorithm.
119
+ If none is given, 'rbf' will be used. If a callable is given it is
120
+ used to precompute the kernel matrix. For an intuitive
121
+ visualization of different kernel types see
122
+ :ref:`sphx_glr_auto_examples_svm_plot_svm_kernels.py`.
121
123
 
122
124
  degree: int, default=3
123
125
  Degree of the polynomial kernel function ('poly').
@@ -115,7 +115,9 @@ class NuSVR(BaseTransformer):
115
115
  default 0.5 will be taken.
116
116
 
117
117
  C: float, default=1.0
118
- Penalty parameter C of the error term.
118
+ Penalty parameter C of the error term. For an intuitive visualization
119
+ of the effects of scaling the regularization parameter C, see
120
+ :ref:`sphx_glr_auto_examples_svm_plot_svm_scale_c.py`.
119
121
 
120
122
  kernel: {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable, default='rbf'
121
123
  Specifies the kernel type to be used in the algorithm.
@@ -112,13 +112,17 @@ class SVC(BaseTransformer):
112
112
  C: float, default=1.0
113
113
  Regularization parameter. The strength of the regularization is
114
114
  inversely proportional to C. Must be strictly positive. The penalty
115
- is a squared l2 penalty.
115
+ is a squared l2 penalty. For an intuitive visualization of the effects
116
+ of scaling the regularization parameter C, see
117
+ :ref:`sphx_glr_auto_examples_svm_plot_svm_scale_c.py`.
116
118
 
117
119
  kernel: {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable, default='rbf'
118
- Specifies the kernel type to be used in the algorithm.
119
- If none is given, 'rbf' will be used. If a callable is given it is
120
- used to pre-compute the kernel matrix from data matrices; that matrix
121
- should be an array of shape ``(n_samples, n_samples)``.
120
+ Specifies the kernel type to be used in the algorithm. If
121
+ none is given, 'rbf' will be used. If a callable is given it is used to
122
+ pre-compute the kernel matrix from data matrices; that matrix should be
123
+ an array of shape ``(n_samples, n_samples)``. For an intuitive
124
+ visualization of different kernel types see
125
+ :ref:`sphx_glr_auto_examples_svm_plot_svm_kernels.py`.
122
126
 
123
127
  degree: int, default=3
124
128
  Degree of the polynomial kernel function ('poly').
@@ -136,7 +136,9 @@ class SVR(BaseTransformer):
136
136
  C: float, default=1.0
137
137
  Regularization parameter. The strength of the regularization is
138
138
  inversely proportional to C. Must be strictly positive.
139
- The penalty is a squared l2 penalty.
139
+ The penalty is a squared l2. For an intuitive visualization of the
140
+ effects of scaling the regularization parameter C, see
141
+ :ref:`sphx_glr_auto_examples_svm_plot_svm_scale_c.py`.
140
142
 
141
143
  epsilon: float, default=0.1
142
144
  Epsilon in the epsilon-SVR model. It specifies the epsilon-tube
@@ -149,7 +149,7 @@ class DecisionTreeClassifier(BaseTransformer):
149
149
  the input samples) required to be at a leaf node. Samples have
150
150
  equal weight when sample_weight is not provided.
151
151
 
152
- max_features: int, float or {"auto", "sqrt", "log2"}, default=None
152
+ max_features: int, float or {"sqrt", "log2"}, default=None
153
153
  The number of features to consider when looking for the best split:
154
154
 
155
155
  - If int, then consider `max_features` features at each split.
@@ -223,6 +223,23 @@ class DecisionTreeClassifier(BaseTransformer):
223
223
  subtree with the largest cost complexity that is smaller than
224
224
  ``ccp_alpha`` will be chosen. By default, no pruning is performed. See
225
225
  :ref:`minimal_cost_complexity_pruning` for details.
226
+
227
+ monotonic_cst: array-like of int of shape (n_features), default=None
228
+ Indicates the monotonicity constraint to enforce on each feature.
229
+ - 1: monotonic increase
230
+ - 0: no constraint
231
+ - -1: monotonic decrease
232
+
233
+ If monotonic_cst is None, no constraints are applied.
234
+
235
+ Monotonicity constraints are not supported for:
236
+ - multiclass classifications (i.e. when `n_classes > 2`),
237
+ - multioutput classifications (i.e. when `n_outputs_ > 1`),
238
+ - classifications trained on data with missing values.
239
+
240
+ The constraints hold over the probability of the positive class.
241
+
242
+ Read more in the :ref:`User Guide <monotonic_cst_gbdt>`.
226
243
  """
227
244
 
228
245
  def __init__( # type: ignore[no-untyped-def]
@@ -240,6 +257,7 @@ class DecisionTreeClassifier(BaseTransformer):
240
257
  min_impurity_decrease=0.0,
241
258
  class_weight=None,
242
259
  ccp_alpha=0.0,
260
+ monotonic_cst=None,
243
261
  input_cols: Optional[Union[str, Iterable[str]]] = None,
244
262
  output_cols: Optional[Union[str, Iterable[str]]] = None,
245
263
  label_cols: Optional[Union[str, Iterable[str]]] = None,
@@ -272,7 +290,8 @@ class DecisionTreeClassifier(BaseTransformer):
272
290
  'max_leaf_nodes':(max_leaf_nodes, None, False),
273
291
  'min_impurity_decrease':(min_impurity_decrease, 0.0, False),
274
292
  'class_weight':(class_weight, None, False),
275
- 'ccp_alpha':(ccp_alpha, 0.0, False),}
293
+ 'ccp_alpha':(ccp_alpha, 0.0, False),
294
+ 'monotonic_cst':(monotonic_cst, None, False),}
276
295
  cleaned_up_init_args = validate_sklearn_args(
277
296
  args=init_args,
278
297
  klass=sklearn.tree.DecisionTreeClassifier
@@ -154,7 +154,7 @@ class DecisionTreeRegressor(BaseTransformer):
154
154
  the input samples) required to be at a leaf node. Samples have
155
155
  equal weight when sample_weight is not provided.
156
156
 
157
- max_features: int, float or {"auto", "sqrt", "log2"}, default=None
157
+ max_features: int, float or {"sqrt", "log2"}, default=None
158
158
  The number of features to consider when looking for the best split:
159
159
 
160
160
  - If int, then consider `max_features` features at each split.
@@ -207,6 +207,20 @@ class DecisionTreeRegressor(BaseTransformer):
207
207
  subtree with the largest cost complexity that is smaller than
208
208
  ``ccp_alpha`` will be chosen. By default, no pruning is performed. See
209
209
  :ref:`minimal_cost_complexity_pruning` for details.
210
+
211
+ monotonic_cst: array-like of int of shape (n_features), default=None
212
+ Indicates the monotonicity constraint to enforce on each feature.
213
+ - 1: monotonic increase
214
+ - 0: no constraint
215
+ - -1: monotonic decrease
216
+
217
+ If monotonic_cst is None, no constraints are applied.
218
+
219
+ Monotonicity constraints are not supported for:
220
+ - multioutput regressions (i.e. when `n_outputs_ > 1`),
221
+ - regressions trained on data with missing values.
222
+
223
+ Read more in the :ref:`User Guide <monotonic_cst_gbdt>`.
210
224
  """
211
225
 
212
226
  def __init__( # type: ignore[no-untyped-def]
@@ -223,6 +237,7 @@ class DecisionTreeRegressor(BaseTransformer):
223
237
  max_leaf_nodes=None,
224
238
  min_impurity_decrease=0.0,
225
239
  ccp_alpha=0.0,
240
+ monotonic_cst=None,
226
241
  input_cols: Optional[Union[str, Iterable[str]]] = None,
227
242
  output_cols: Optional[Union[str, Iterable[str]]] = None,
228
243
  label_cols: Optional[Union[str, Iterable[str]]] = None,
@@ -254,7 +269,8 @@ class DecisionTreeRegressor(BaseTransformer):
254
269
  'random_state':(random_state, None, False),
255
270
  'max_leaf_nodes':(max_leaf_nodes, None, False),
256
271
  'min_impurity_decrease':(min_impurity_decrease, 0.0, False),
257
- 'ccp_alpha':(ccp_alpha, 0.0, False),}
272
+ 'ccp_alpha':(ccp_alpha, 0.0, False),
273
+ 'monotonic_cst':(monotonic_cst, None, False),}
258
274
  cleaned_up_init_args = validate_sklearn_args(
259
275
  args=init_args,
260
276
  klass=sklearn.tree.DecisionTreeRegressor
@@ -149,16 +149,16 @@ class ExtraTreeClassifier(BaseTransformer):
149
149
  the input samples) required to be at a leaf node. Samples have
150
150
  equal weight when sample_weight is not provided.
151
151
 
152
- max_features: int, float, {"auto", "sqrt", "log2"} or None, default="sqrt"
152
+ max_features: int, float, {"sqrt", "log2"} or None, default="sqrt"
153
153
  The number of features to consider when looking for the best split:
154
154
 
155
- - If int, then consider `max_features` features at each split.
156
- - If float, then `max_features` is a fraction and
157
- `max(1, int(max_features * n_features_in_))` features are considered at
158
- each split.
159
- - If "sqrt", then `max_features=sqrt(n_features)`.
160
- - If "log2", then `max_features=log2(n_features)`.
161
- - If None, then `max_features=n_features`.
155
+ - If int, then consider `max_features` features at each split.
156
+ - If float, then `max_features` is a fraction and
157
+ `max(1, int(max_features * n_features_in_))` features are considered at
158
+ each split.
159
+ - If "sqrt", then `max_features=sqrt(n_features)`.
160
+ - If "log2", then `max_features=log2(n_features)`.
161
+ - If None, then `max_features=n_features`.
162
162
 
163
163
  Note: the search for a split does not stop until at least one
164
164
  valid partition of the node samples is found, even if it requires to
@@ -215,6 +215,23 @@ class ExtraTreeClassifier(BaseTransformer):
215
215
  subtree with the largest cost complexity that is smaller than
216
216
  ``ccp_alpha`` will be chosen. By default, no pruning is performed. See
217
217
  :ref:`minimal_cost_complexity_pruning` for details.
218
+
219
+ monotonic_cst: array-like of int of shape (n_features), default=None
220
+ Indicates the monotonicity constraint to enforce on each feature.
221
+ - 1: monotonic increase
222
+ - 0: no constraint
223
+ - -1: monotonic decrease
224
+
225
+ If monotonic_cst is None, no constraints are applied.
226
+
227
+ Monotonicity constraints are not supported for:
228
+ - multiclass classifications (i.e. when `n_classes > 2`),
229
+ - multioutput classifications (i.e. when `n_outputs_ > 1`),
230
+ - classifications trained on data with missing values.
231
+
232
+ The constraints hold over the probability of the positive class.
233
+
234
+ Read more in the :ref:`User Guide <monotonic_cst_gbdt>`.
218
235
  """
219
236
 
220
237
  def __init__( # type: ignore[no-untyped-def]
@@ -232,6 +249,7 @@ class ExtraTreeClassifier(BaseTransformer):
232
249
  min_impurity_decrease=0.0,
233
250
  class_weight=None,
234
251
  ccp_alpha=0.0,
252
+ monotonic_cst=None,
235
253
  input_cols: Optional[Union[str, Iterable[str]]] = None,
236
254
  output_cols: Optional[Union[str, Iterable[str]]] = None,
237
255
  label_cols: Optional[Union[str, Iterable[str]]] = None,
@@ -264,7 +282,8 @@ class ExtraTreeClassifier(BaseTransformer):
264
282
  'max_leaf_nodes':(max_leaf_nodes, None, False),
265
283
  'min_impurity_decrease':(min_impurity_decrease, 0.0, False),
266
284
  'class_weight':(class_weight, None, False),
267
- 'ccp_alpha':(ccp_alpha, 0.0, False),}
285
+ 'ccp_alpha':(ccp_alpha, 0.0, False),
286
+ 'monotonic_cst':(monotonic_cst, None, False),}
268
287
  cleaned_up_init_args = validate_sklearn_args(
269
288
  args=init_args,
270
289
  klass=sklearn.tree.ExtraTreeClassifier
@@ -154,7 +154,7 @@ class ExtraTreeRegressor(BaseTransformer):
154
154
  the input samples) required to be at a leaf node. Samples have
155
155
  equal weight when sample_weight is not provided.
156
156
 
157
- max_features: int, float, {"auto", "sqrt", "log2"} or None, default=1.0
157
+ max_features: int, float, {"sqrt", "log2"} or None, default=1.0
158
158
  The number of features to consider when looking for the best split:
159
159
 
160
160
  - If int, then consider `max_features` features at each split.
@@ -199,6 +199,20 @@ class ExtraTreeRegressor(BaseTransformer):
199
199
  subtree with the largest cost complexity that is smaller than
200
200
  ``ccp_alpha`` will be chosen. By default, no pruning is performed. See
201
201
  :ref:`minimal_cost_complexity_pruning` for details.
202
+
203
+ monotonic_cst: array-like of int of shape (n_features), default=None
204
+ Indicates the monotonicity constraint to enforce on each feature.
205
+ - 1: monotonic increase
206
+ - 0: no constraint
207
+ - -1: monotonic decrease
208
+
209
+ If monotonic_cst is None, no constraints are applied.
210
+
211
+ Monotonicity constraints are not supported for:
212
+ - multioutput regressions (i.e. when `n_outputs_ > 1`),
213
+ - regressions trained on data with missing values.
214
+
215
+ Read more in the :ref:`User Guide <monotonic_cst_gbdt>`.
202
216
  """
203
217
 
204
218
  def __init__( # type: ignore[no-untyped-def]
@@ -215,6 +229,7 @@ class ExtraTreeRegressor(BaseTransformer):
215
229
  min_impurity_decrease=0.0,
216
230
  max_leaf_nodes=None,
217
231
  ccp_alpha=0.0,
232
+ monotonic_cst=None,
218
233
  input_cols: Optional[Union[str, Iterable[str]]] = None,
219
234
  output_cols: Optional[Union[str, Iterable[str]]] = None,
220
235
  label_cols: Optional[Union[str, Iterable[str]]] = None,
@@ -246,7 +261,8 @@ class ExtraTreeRegressor(BaseTransformer):
246
261
  'random_state':(random_state, None, False),
247
262
  'min_impurity_decrease':(min_impurity_decrease, 0.0, False),
248
263
  'max_leaf_nodes':(max_leaf_nodes, None, False),
249
- 'ccp_alpha':(ccp_alpha, 0.0, False),}
264
+ 'ccp_alpha':(ccp_alpha, 0.0, False),
265
+ 'monotonic_cst':(monotonic_cst, None, False),}
250
266
  cleaned_up_init_args = validate_sklearn_args(
251
267
  args=init_args,
252
268
  klass=sklearn.tree.ExtraTreeRegressor