snowflake-ml-python 1.7.2__py3-none-any.whl → 1.7.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/cortex/__init__.py +16 -8
- snowflake/cortex/_classify_text.py +12 -1
- snowflake/cortex/_complete.py +101 -13
- snowflake/cortex/_embed_text_1024.py +9 -2
- snowflake/cortex/_embed_text_768.py +9 -2
- snowflake/cortex/_extract_answer.py +9 -2
- snowflake/cortex/_sentiment.py +9 -2
- snowflake/cortex/_summarize.py +9 -2
- snowflake/cortex/_translate.py +9 -2
- snowflake/ml/_internal/env_utils.py +7 -52
- snowflake/ml/_internal/platform_capabilities.py +87 -0
- snowflake/ml/_internal/utils/identifier.py +4 -2
- snowflake/ml/data/__init__.py +3 -0
- snowflake/ml/data/_internal/arrow_ingestor.py +4 -4
- snowflake/ml/data/data_connector.py +53 -11
- snowflake/ml/data/data_ingestor.py +2 -1
- snowflake/ml/data/torch_utils.py +18 -5
- snowflake/ml/dataset/dataset.py +0 -1
- snowflake/ml/feature_store/examples/example_helper.py +2 -1
- snowflake/ml/fileset/fileset.py +24 -18
- snowflake/ml/jobs/__init__.py +21 -0
- snowflake/ml/jobs/_utils/constants.py +51 -0
- snowflake/ml/jobs/_utils/payload_utils.py +352 -0
- snowflake/ml/jobs/_utils/spec_utils.py +298 -0
- snowflake/ml/jobs/_utils/types.py +39 -0
- snowflake/ml/jobs/decorators.py +91 -0
- snowflake/ml/jobs/job.py +113 -0
- snowflake/ml/jobs/manager.py +298 -0
- snowflake/ml/model/_client/model/model_version_impl.py +5 -3
- snowflake/ml/model/_client/ops/model_ops.py +13 -8
- snowflake/ml/model/_client/ops/service_ops.py +1 -11
- snowflake/ml/model/_client/sql/model_version.py +11 -0
- snowflake/ml/model/_client/sql/service.py +13 -6
- snowflake/ml/model/_model_composer/model_composer.py +8 -3
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +20 -1
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +1 -0
- snowflake/ml/model/_model_composer/model_method/constants.py +1 -0
- snowflake/ml/model/_model_composer/model_method/function_generator.py +2 -0
- snowflake/ml/model/_model_composer/model_method/infer_function.py_template +1 -1
- snowflake/ml/model/_model_composer/model_method/infer_partitioned.py_template +1 -1
- snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +1 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +9 -1
- snowflake/ml/model/_model_composer/model_user_file/model_user_file.py +27 -0
- snowflake/ml/model/_packager/model_handlers/_utils.py +39 -5
- snowflake/ml/model/_packager/model_handlers/catboost.py +3 -3
- snowflake/ml/model/_packager/model_handlers/custom.py +1 -2
- snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +6 -1
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +5 -3
- snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +55 -20
- snowflake/ml/model/_packager/model_handlers/sklearn.py +9 -10
- snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +66 -28
- snowflake/ml/model/_packager/model_handlers/tensorflow.py +70 -17
- snowflake/ml/model/_packager/model_handlers/xgboost.py +3 -3
- snowflake/ml/model/_packager/model_meta/model_meta.py +3 -0
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +6 -1
- snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +2 -2
- snowflake/ml/model/_packager/model_task/model_task_utils.py +3 -2
- snowflake/ml/model/_signatures/base_handler.py +1 -2
- snowflake/ml/model/_signatures/builtins_handler.py +2 -2
- snowflake/ml/model/_signatures/numpy_handler.py +6 -7
- snowflake/ml/model/_signatures/pandas_handler.py +3 -3
- snowflake/ml/model/_signatures/pytorch_handler.py +2 -5
- snowflake/ml/model/_signatures/snowpark_handler.py +11 -5
- snowflake/ml/model/_signatures/tensorflow_handler.py +2 -7
- snowflake/ml/model/model_signature.py +17 -4
- snowflake/ml/model/type_hints.py +1 -0
- snowflake/ml/modeling/_internal/model_trainer_builder.py +0 -8
- snowflake/ml/modeling/_internal/model_transformer_builder.py +0 -13
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +6 -3
- snowflake/ml/modeling/cluster/affinity_propagation.py +6 -3
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +6 -3
- snowflake/ml/modeling/cluster/birch.py +6 -3
- snowflake/ml/modeling/cluster/bisecting_k_means.py +6 -3
- snowflake/ml/modeling/cluster/dbscan.py +6 -3
- snowflake/ml/modeling/cluster/feature_agglomeration.py +6 -3
- snowflake/ml/modeling/cluster/k_means.py +6 -3
- snowflake/ml/modeling/cluster/mean_shift.py +6 -3
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +6 -3
- snowflake/ml/modeling/cluster/optics.py +6 -3
- snowflake/ml/modeling/cluster/spectral_biclustering.py +6 -3
- snowflake/ml/modeling/cluster/spectral_clustering.py +6 -3
- snowflake/ml/modeling/cluster/spectral_coclustering.py +6 -3
- snowflake/ml/modeling/compose/column_transformer.py +6 -3
- snowflake/ml/modeling/compose/transformed_target_regressor.py +6 -3
- snowflake/ml/modeling/covariance/elliptic_envelope.py +6 -3
- snowflake/ml/modeling/covariance/empirical_covariance.py +6 -3
- snowflake/ml/modeling/covariance/graphical_lasso.py +6 -3
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +6 -3
- snowflake/ml/modeling/covariance/ledoit_wolf.py +6 -3
- snowflake/ml/modeling/covariance/min_cov_det.py +6 -3
- snowflake/ml/modeling/covariance/oas.py +6 -3
- snowflake/ml/modeling/covariance/shrunk_covariance.py +6 -3
- snowflake/ml/modeling/decomposition/dictionary_learning.py +6 -3
- snowflake/ml/modeling/decomposition/factor_analysis.py +6 -3
- snowflake/ml/modeling/decomposition/fast_ica.py +6 -3
- snowflake/ml/modeling/decomposition/incremental_pca.py +6 -3
- snowflake/ml/modeling/decomposition/kernel_pca.py +6 -3
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +6 -3
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +6 -3
- snowflake/ml/modeling/decomposition/pca.py +6 -3
- snowflake/ml/modeling/decomposition/sparse_pca.py +6 -3
- snowflake/ml/modeling/decomposition/truncated_svd.py +6 -3
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +6 -3
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +6 -3
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/bagging_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/bagging_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/isolation_forest.py +6 -3
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/stacking_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/voting_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/voting_regressor.py +6 -3
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +6 -3
- snowflake/ml/modeling/feature_selection/select_fdr.py +6 -3
- snowflake/ml/modeling/feature_selection/select_fpr.py +6 -3
- snowflake/ml/modeling/feature_selection/select_fwe.py +6 -3
- snowflake/ml/modeling/feature_selection/select_k_best.py +6 -3
- snowflake/ml/modeling/feature_selection/select_percentile.py +6 -3
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +6 -3
- snowflake/ml/modeling/feature_selection/variance_threshold.py +6 -3
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +6 -3
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +6 -3
- snowflake/ml/modeling/impute/iterative_imputer.py +6 -3
- snowflake/ml/modeling/impute/knn_imputer.py +6 -3
- snowflake/ml/modeling/impute/missing_indicator.py +6 -3
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +6 -3
- snowflake/ml/modeling/kernel_approximation/nystroem.py +6 -3
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +6 -3
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +6 -3
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +6 -3
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +6 -3
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +6 -3
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/ard_regression.py +6 -3
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +6 -3
- snowflake/ml/modeling/linear_model/elastic_net.py +6 -3
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +6 -3
- snowflake/ml/modeling/linear_model/gamma_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/huber_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/lars.py +6 -3
- snowflake/ml/modeling/linear_model/lars_cv.py +6 -3
- snowflake/ml/modeling/linear_model/lasso.py +6 -3
- snowflake/ml/modeling/linear_model/lasso_cv.py +6 -3
- snowflake/ml/modeling/linear_model/lasso_lars.py +6 -3
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +6 -3
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +6 -3
- snowflake/ml/modeling/linear_model/linear_regression.py +6 -3
- snowflake/ml/modeling/linear_model/logistic_regression.py +6 -3
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +6 -3
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +6 -3
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +6 -3
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +6 -3
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +6 -3
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +6 -3
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +6 -3
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/perceptron.py +6 -3
- snowflake/ml/modeling/linear_model/poisson_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/ransac_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/ridge.py +6 -3
- snowflake/ml/modeling/linear_model/ridge_classifier.py +6 -3
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +6 -3
- snowflake/ml/modeling/linear_model/ridge_cv.py +6 -3
- snowflake/ml/modeling/linear_model/sgd_classifier.py +6 -3
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +6 -3
- snowflake/ml/modeling/linear_model/sgd_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +6 -3
- snowflake/ml/modeling/manifold/isomap.py +6 -3
- snowflake/ml/modeling/manifold/mds.py +6 -3
- snowflake/ml/modeling/manifold/spectral_embedding.py +6 -3
- snowflake/ml/modeling/manifold/tsne.py +6 -3
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +6 -3
- snowflake/ml/modeling/mixture/gaussian_mixture.py +6 -3
- snowflake/ml/modeling/model_selection/grid_search_cv.py +17 -2
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +17 -2
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +6 -3
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +6 -3
- snowflake/ml/modeling/multiclass/output_code_classifier.py +6 -3
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +6 -3
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +6 -3
- snowflake/ml/modeling/naive_bayes/complement_nb.py +6 -3
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +6 -3
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +6 -3
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +6 -3
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +6 -3
- snowflake/ml/modeling/neighbors/kernel_density.py +6 -3
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +6 -3
- snowflake/ml/modeling/neighbors/nearest_centroid.py +6 -3
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +6 -3
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +6 -3
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +6 -3
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +6 -3
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +6 -3
- snowflake/ml/modeling/neural_network/mlp_classifier.py +6 -3
- snowflake/ml/modeling/neural_network/mlp_regressor.py +6 -3
- snowflake/ml/modeling/pipeline/pipeline.py +16 -178
- snowflake/ml/modeling/preprocessing/polynomial_features.py +6 -3
- snowflake/ml/modeling/semi_supervised/label_propagation.py +6 -3
- snowflake/ml/modeling/semi_supervised/label_spreading.py +6 -3
- snowflake/ml/modeling/svm/linear_svc.py +6 -3
- snowflake/ml/modeling/svm/linear_svr.py +6 -3
- snowflake/ml/modeling/svm/nu_svc.py +6 -3
- snowflake/ml/modeling/svm/nu_svr.py +6 -3
- snowflake/ml/modeling/svm/svc.py +6 -3
- snowflake/ml/modeling/svm/svr.py +6 -3
- snowflake/ml/modeling/tree/decision_tree_classifier.py +6 -3
- snowflake/ml/modeling/tree/decision_tree_regressor.py +6 -3
- snowflake/ml/modeling/tree/extra_tree_classifier.py +6 -3
- snowflake/ml/modeling/tree/extra_tree_regressor.py +6 -3
- snowflake/ml/modeling/xgboost/xgb_classifier.py +167 -91
- snowflake/ml/modeling/xgboost/xgb_regressor.py +166 -88
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +166 -88
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +166 -88
- snowflake/ml/monitoring/_client/model_monitor_sql_client.py +4 -4
- snowflake/ml/registry/_manager/model_manager.py +70 -33
- snowflake/ml/registry/registry.py +41 -22
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.4.dist-info}/METADATA +63 -19
- {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.4.dist-info}/RECORD +231 -226
- {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.4.dist-info}/WHEEL +1 -1
- snowflake/ml/_internal/utils/retryable_http.py +0 -39
- snowflake/ml/fileset/parquet_parser.py +0 -170
- snowflake/ml/fileset/tf_dataset.py +0 -88
- snowflake/ml/fileset/torch_datapipe.py +0 -57
- snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +0 -151
- snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_trainer.py +0 -66
- {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.4.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.4.dist-info}/top_level.txt +0 -0
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "xgboost".replace("sklearn.", "")
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class XGBRegressor(BaseTransformer):
|
61
64
|
r"""Implementation of the scikit-learn API for XGBoost regression
|
62
65
|
For more details on this class, see [xgboost.XGBRegressor]
|
@@ -125,112 +128,172 @@ class XGBRegressor(BaseTransformer):
|
|
125
128
|
can seriously hurt performance in gradient boosting. Set the batch_size as large as possible
|
126
129
|
based on the available memory.
|
127
130
|
|
128
|
-
n_estimators: int
|
131
|
+
n_estimators: typing.Optional[int]
|
129
132
|
Number of gradient boosted trees. Equivalent to number of boosting
|
130
133
|
rounds.
|
131
134
|
|
132
|
-
max_depth: Optional[int]
|
135
|
+
max_depth: typing.Optional[int]
|
136
|
+
|
133
137
|
Maximum tree depth for base learners.
|
134
|
-
|
138
|
+
|
139
|
+
max_leaves: typing.Optional[int]
|
140
|
+
|
135
141
|
Maximum number of leaves; 0 indicates no limit.
|
136
|
-
|
142
|
+
|
143
|
+
max_bin: typing.Optional[int]
|
144
|
+
|
137
145
|
If using histogram-based algorithm, maximum number of bins per feature
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
146
|
+
|
147
|
+
grow_policy: typing.Optional[str]
|
148
|
+
|
149
|
+
Tree growing policy.
|
150
|
+
|
151
|
+
- depthwise: Favors splitting at nodes closest to the node,
|
152
|
+
- lossguide: Favors splitting at nodes with highest loss change.
|
153
|
+
|
154
|
+
learning_rate: typing.Optional[float]
|
155
|
+
|
142
156
|
Boosting learning rate (xgb's "eta")
|
143
|
-
|
157
|
+
|
158
|
+
verbosity: typing.Optional[int]
|
159
|
+
|
144
160
|
The degree of verbosity. Valid values are 0 (silent) - 3 (debug).
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
161
|
+
|
162
|
+
objective: typing.Union[str, xgboost.sklearn._SklObjWProto, typing.Callable[[typing.Any, typing.Any], typing.Tuple[numpy.ndarray, numpy.ndarray]], NoneType]
|
163
|
+
|
164
|
+
Specify the learning task and the corresponding learning objective or a custom
|
165
|
+
objective function to be used.
|
166
|
+
|
167
|
+
For custom objective, see :doc:`/tutorials/custom_metric_obj` and
|
168
|
+
:ref:`custom-obj-metric` for more information, along with the end note for
|
169
|
+
function signatures.
|
170
|
+
|
171
|
+
booster: typing.Optional[str]
|
172
|
+
|
173
|
+
Specify which booster to use: ``gbtree``, ``gblinear`` or ``dart``.
|
174
|
+
|
175
|
+
tree_method: typing.Optional[str]
|
176
|
+
|
151
177
|
Specify which tree method to use. Default to auto. If this parameter is set to
|
152
178
|
default, XGBoost will choose the most conservative option available. It's
|
153
179
|
recommended to study this option from the parameters document :doc:`tree method
|
154
180
|
</treemethod>`
|
155
|
-
|
181
|
+
|
182
|
+
n_jobs: typing.Optional[int]
|
183
|
+
|
156
184
|
Number of parallel threads used to run xgboost. When used with other
|
157
185
|
Scikit-Learn algorithms like grid search, you may choose which algorithm to
|
158
186
|
parallelize and balance the threads. Creating thread contention will
|
159
187
|
significantly slow down both algorithms.
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
188
|
+
|
189
|
+
gamma: typing.Optional[float]
|
190
|
+
|
191
|
+
(min_split_loss) Minimum loss reduction required to make a further partition on
|
192
|
+
a leaf node of the tree.
|
193
|
+
|
194
|
+
min_child_weight: typing.Optional[float]
|
195
|
+
|
164
196
|
Minimum sum of instance weight(hessian) needed in a child.
|
165
|
-
|
197
|
+
|
198
|
+
max_delta_step: typing.Optional[float]
|
199
|
+
|
166
200
|
Maximum delta step we allow each tree's weight estimation to be.
|
167
|
-
|
201
|
+
|
202
|
+
subsample: typing.Optional[float]
|
203
|
+
|
168
204
|
Subsample ratio of the training instance.
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
205
|
+
|
206
|
+
sampling_method: typing.Optional[str]
|
207
|
+
|
208
|
+
Sampling method. Used only by the GPU version of ``hist`` tree method.
|
209
|
+
|
210
|
+
- ``uniform``: Select random training instances uniformly.
|
211
|
+
- ``gradient_based``: Select random training instances with higher probability
|
212
|
+
when the gradient and hessian are larger. (cf. CatBoost)
|
213
|
+
|
214
|
+
colsample_bytree: typing.Optional[float]
|
215
|
+
|
175
216
|
Subsample ratio of columns when constructing each tree.
|
176
|
-
|
217
|
+
|
218
|
+
colsample_bylevel: typing.Optional[float]
|
219
|
+
|
177
220
|
Subsample ratio of columns for each level.
|
178
|
-
|
221
|
+
|
222
|
+
colsample_bynode: typing.Optional[float]
|
223
|
+
|
179
224
|
Subsample ratio of columns for each split.
|
180
|
-
|
225
|
+
|
226
|
+
reg_alpha: typing.Optional[float]
|
227
|
+
|
181
228
|
L1 regularization term on weights (xgb's alpha).
|
182
|
-
|
229
|
+
|
230
|
+
reg_lambda: typing.Optional[float]
|
231
|
+
|
183
232
|
L2 regularization term on weights (xgb's lambda).
|
184
|
-
|
233
|
+
|
234
|
+
scale_pos_weight: typing.Optional[float]
|
185
235
|
Balancing of positive and negative weights.
|
186
|
-
|
236
|
+
|
237
|
+
base_score: typing.Optional[float]
|
238
|
+
|
187
239
|
The initial prediction score of all instances, global bias.
|
188
|
-
|
240
|
+
|
241
|
+
random_state: typing.Union[numpy.random.mtrand.RandomState, numpy.random._generator.Generator, int, NoneType]
|
242
|
+
|
189
243
|
Random number seed.
|
190
244
|
|
191
245
|
Using gblinear booster with shotgun updater is nondeterministic as
|
192
246
|
it uses Hogwild algorithm.
|
193
247
|
|
194
|
-
missing: float
|
195
|
-
|
196
|
-
|
248
|
+
missing: float
|
249
|
+
|
250
|
+
Value in the data which needs to be present as a missing value. Default to
|
251
|
+
:py:data:`numpy.nan`.
|
252
|
+
|
253
|
+
num_parallel_tree: typing.Optional[int]
|
254
|
+
|
197
255
|
Used for boosting random forest.
|
198
|
-
|
256
|
+
|
257
|
+
monotone_constraints: typing.Union[typing.Dict[str, int], str, NoneType]
|
258
|
+
|
199
259
|
Constraint of variable monotonicity. See :doc:`tutorial </tutorials/monotonic>`
|
200
260
|
for more information.
|
201
|
-
|
261
|
+
|
262
|
+
interaction_constraints: typing.Union[str, typing.List[typing.Tuple[str]], NoneType]
|
263
|
+
|
202
264
|
Constraints for interaction representing permitted interactions. The
|
203
265
|
constraints must be specified in the form of a nested list, e.g. ``[[0, 1], [2,
|
204
266
|
3, 4]]``, where each inner list is a group of indices of features that are
|
205
267
|
allowed to interact with each other. See :doc:`tutorial
|
206
268
|
</tutorials/feature_interaction_constraint>` for more information
|
207
|
-
|
269
|
+
|
270
|
+
importance_type: typing.Optional[str]
|
271
|
+
|
208
272
|
The feature importance type for the feature_importances\_ property:
|
209
273
|
|
210
274
|
* For tree model, it's either "gain", "weight", "cover", "total_gain" or
|
211
275
|
"total_cover".
|
212
|
-
* For linear model, only "weight" is defined and it's the normalized
|
213
|
-
without bias.
|
276
|
+
* For linear model, only "weight" is defined and it's the normalized
|
277
|
+
coefficients without bias.
|
278
|
+
|
279
|
+
device: typing.Optional[str]
|
280
|
+
|
281
|
+
Device ordinal, available options are `cpu`, `cuda`, and `gpu`.
|
282
|
+
|
283
|
+
validate_parameters: typing.Optional[bool]
|
214
284
|
|
215
|
-
gpu_id: Optional[int]
|
216
|
-
Device ordinal.
|
217
|
-
validate_parameters: Optional[bool]
|
218
285
|
Give warnings for unknown parameter.
|
219
|
-
|
220
|
-
Force XGBoost to use specific predictor, available choices are [cpu_predictor,
|
221
|
-
gpu_predictor].
|
286
|
+
|
222
287
|
enable_categorical: bool
|
223
288
|
|
224
|
-
|
225
|
-
should be used to specify categorical data type. Also, JSON/UBJSON
|
226
|
-
serialization format is required.
|
289
|
+
See the same parameter of :py:class:`DMatrix` for details.
|
227
290
|
|
228
|
-
feature_types:
|
291
|
+
feature_types: typing.Optional[typing.Sequence[str]]
|
229
292
|
|
230
293
|
Used for specifying feature types without constructing a dataframe. See
|
231
294
|
:py:class:`DMatrix` for details.
|
232
295
|
|
233
|
-
max_cat_to_onehot: Optional[int]
|
296
|
+
max_cat_to_onehot: typing.Optional[int]
|
234
297
|
|
235
298
|
A threshold for deciding whether XGBoost should use one-hot encoding based split
|
236
299
|
for categorical data. When number of categories is lesser than the threshold
|
@@ -239,36 +302,41 @@ class XGBRegressor(BaseTransformer):
|
|
239
302
|
categorical feature support. See :doc:`Categorical Data
|
240
303
|
</tutorials/categorical>` and :ref:`cat-param` for details.
|
241
304
|
|
242
|
-
max_cat_threshold: Optional[int]
|
305
|
+
max_cat_threshold: typing.Optional[int]
|
243
306
|
|
244
307
|
Maximum number of categories considered for each split. Used only by
|
245
308
|
partition-based splits for preventing over-fitting. Also, `enable_categorical`
|
246
309
|
needs to be set to have categorical feature support. See :doc:`Categorical Data
|
247
310
|
</tutorials/categorical>` and :ref:`cat-param` for details.
|
248
311
|
|
249
|
-
|
312
|
+
multi_strategy: typing.Optional[str]
|
313
|
+
|
314
|
+
The strategy used for training multi-target models, including multi-target
|
315
|
+
regression and multi-class classification. See :doc:`/tutorials/multioutput` for
|
316
|
+
more information.
|
317
|
+
|
318
|
+
- ``one_output_per_tree``: One model for each target.
|
319
|
+
- ``multi_output_tree``: Use multi-target trees.
|
320
|
+
|
321
|
+
eval_metric: typing.Union[str, typing.List[str], typing.Callable, NoneType]
|
250
322
|
|
251
323
|
Metric used for monitoring the training result and early stopping. It can be a
|
252
324
|
string or list of strings as names of predefined metric in XGBoost (See
|
253
|
-
doc/parameter.rst), one of the metrics in :py:mod:`sklearn.metrics`, or any
|
254
|
-
user defined metric that looks like `sklearn.metrics`.
|
325
|
+
doc/parameter.rst), one of the metrics in :py:mod:`sklearn.metrics`, or any
|
326
|
+
other user defined metric that looks like `sklearn.metrics`.
|
255
327
|
|
256
328
|
If custom objective is also provided, then custom metric should implement the
|
257
329
|
corresponding reverse link function.
|
258
330
|
|
259
331
|
Unlike the `scoring` parameter commonly used in scikit-learn, when a callable
|
260
|
-
object is provided, it's assumed to be a cost function and by default XGBoost
|
261
|
-
minimize the result during early stopping.
|
262
|
-
|
263
|
-
For advanced usage on Early stopping like directly choosing to maximize instead of
|
264
|
-
minimize, see :py:obj:`xgboost.callback.EarlyStopping`.
|
332
|
+
object is provided, it's assumed to be a cost function and by default XGBoost
|
333
|
+
will minimize the result during early stopping.
|
265
334
|
|
266
|
-
|
267
|
-
|
335
|
+
For advanced usage on Early stopping like directly choosing to maximize instead
|
336
|
+
of minimize, see :py:obj:`xgboost.callback.EarlyStopping`.
|
268
337
|
|
269
|
-
|
270
|
-
|
271
|
-
being used.
|
338
|
+
See :doc:`/tutorials/custom_metric_obj` and :ref:`custom-obj-metric` for more
|
339
|
+
information.
|
272
340
|
|
273
341
|
from sklearn.datasets import load_diabetes
|
274
342
|
from sklearn.metrics import mean_absolute_error
|
@@ -279,24 +347,29 @@ class XGBRegressor(BaseTransformer):
|
|
279
347
|
)
|
280
348
|
reg.fit(X, y, eval_set=[(X, y)])
|
281
349
|
|
282
|
-
early_stopping_rounds: Optional[int]
|
350
|
+
early_stopping_rounds: typing.Optional[int]
|
283
351
|
|
284
|
-
Activates early stopping. Validation metric needs to improve at least once in
|
285
|
-
|
286
|
-
|
352
|
+
- Activates early stopping. Validation metric needs to improve at least once in
|
353
|
+
every **early_stopping_rounds** round(s) to continue training. Requires at
|
354
|
+
least one item in **eval_set** in :py:meth:`fit`.
|
287
355
|
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
356
|
+
- If early stopping occurs, the model will have two additional attributes:
|
357
|
+
:py:attr:`best_score` and :py:attr:`best_iteration`. These are used by the
|
358
|
+
:py:meth:`predict` and :py:meth:`apply` methods to determine the optimal
|
359
|
+
number of trees during inference. If users want to access the full model
|
360
|
+
(including trees built after early stopping), they can specify the
|
361
|
+
`iteration_range` in these inference methods. In addition, other utilities
|
362
|
+
like model plotting can also use the entire model.
|
292
363
|
|
293
|
-
If
|
294
|
-
|
295
|
-
:py:attr:`best_ntree_limit`.
|
364
|
+
- If you prefer to discard the trees after `best_iteration`, consider using the
|
365
|
+
callback function :py:class:`xgboost.callback.EarlyStopping`.
|
296
366
|
|
297
|
-
|
367
|
+
- If there's more than one item in **eval_set**, the last entry will be used for
|
368
|
+
early stopping. If there's more than one metric in **eval_metric**, the last
|
369
|
+
metric will be used for early stopping.
|
370
|
+
|
371
|
+
callbacks: typing.Optional[typing.List[xgboost.callback.TrainingCallback]]
|
298
372
|
|
299
|
-
callbacks: Optional[List[TrainingCallback]]
|
300
373
|
List of callback functions that are applied at end of each iteration.
|
301
374
|
It is possible to use predefined callbacks by using
|
302
375
|
:ref:`Callback API <callback_api>`.
|
@@ -308,9 +381,11 @@ class XGBRegressor(BaseTransformer):
|
|
308
381
|
for params in parameters_grid:
|
309
382
|
# be sure to (re)initialize the callbacks before each run
|
310
383
|
callbacks = [xgb.callback.LearningRateScheduler(custom_rates)]
|
311
|
-
xgboost.
|
384
|
+
reg = xgboost.XGBRegressor(**params, callbacks=callbacks)
|
385
|
+
reg.fit(X, y)
|
386
|
+
|
387
|
+
kwargs: typing.Optional[typing.Any]
|
312
388
|
|
313
|
-
kwargs: dict, optional
|
314
389
|
Keyword arguments for XGBoost Booster object. Full documentation of parameters
|
315
390
|
can be found :doc:`here </parameter>`.
|
316
391
|
Attempting to set a parameter via the constructor args and \*\*kwargs
|
@@ -321,13 +396,16 @@ class XGBRegressor(BaseTransformer):
|
|
321
396
|
with scikit-learn.
|
322
397
|
|
323
398
|
A custom objective function can be provided for the ``objective``
|
324
|
-
parameter. In this case, it should have the signature
|
325
|
-
``objective(y_true, y_pred
|
399
|
+
parameter. In this case, it should have the signature ``objective(y_true,
|
400
|
+
y_pred) -> [grad, hess]`` or ``objective(y_true, y_pred, *, sample_weight)
|
401
|
+
-> [grad, hess]``:
|
326
402
|
|
327
403
|
y_true: array_like of shape [n_samples]
|
328
404
|
The target values
|
329
405
|
y_pred: array_like of shape [n_samples]
|
330
406
|
The predicted values
|
407
|
+
sample_weight :
|
408
|
+
Optional sample weights.
|
331
409
|
|
332
410
|
grad: array_like of shape [n_samples]
|
333
411
|
The value of the gradient for each sample point.
|
@@ -627,7 +705,7 @@ class XGBRegressor(BaseTransformer):
|
|
627
705
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
628
706
|
expected_dtype = "array"
|
629
707
|
else:
|
630
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
708
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
631
709
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
632
710
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
633
711
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1284,7 +1362,7 @@ class XGBRegressor(BaseTransformer):
|
|
1284
1362
|
|
1285
1363
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1286
1364
|
|
1287
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1365
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1288
1366
|
outputs: List[BaseFeatureSpec] = []
|
1289
1367
|
if hasattr(self, "predict"):
|
1290
1368
|
# keep mypy happy
|
@@ -1292,7 +1370,7 @@ class XGBRegressor(BaseTransformer):
|
|
1292
1370
|
# For classifier, the type of predict is the same as the type of label
|
1293
1371
|
if self._sklearn_object._estimator_type == "classifier":
|
1294
1372
|
# label columns is the desired type for output
|
1295
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1373
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1296
1374
|
# rename the output columns
|
1297
1375
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1298
1376
|
self._model_signature_dict["predict"] = ModelSignature(
|