snowflake-ml-python 1.7.2__py3-none-any.whl → 1.7.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/cortex/__init__.py +16 -8
- snowflake/cortex/_classify_text.py +12 -1
- snowflake/cortex/_complete.py +101 -13
- snowflake/cortex/_embed_text_1024.py +9 -2
- snowflake/cortex/_embed_text_768.py +9 -2
- snowflake/cortex/_extract_answer.py +9 -2
- snowflake/cortex/_sentiment.py +9 -2
- snowflake/cortex/_summarize.py +9 -2
- snowflake/cortex/_translate.py +9 -2
- snowflake/ml/_internal/env_utils.py +7 -52
- snowflake/ml/_internal/platform_capabilities.py +87 -0
- snowflake/ml/_internal/utils/identifier.py +4 -2
- snowflake/ml/data/__init__.py +3 -0
- snowflake/ml/data/_internal/arrow_ingestor.py +4 -4
- snowflake/ml/data/data_connector.py +53 -11
- snowflake/ml/data/data_ingestor.py +2 -1
- snowflake/ml/data/torch_utils.py +18 -5
- snowflake/ml/dataset/dataset.py +0 -1
- snowflake/ml/feature_store/examples/example_helper.py +2 -1
- snowflake/ml/fileset/fileset.py +24 -18
- snowflake/ml/jobs/__init__.py +21 -0
- snowflake/ml/jobs/_utils/constants.py +51 -0
- snowflake/ml/jobs/_utils/payload_utils.py +352 -0
- snowflake/ml/jobs/_utils/spec_utils.py +298 -0
- snowflake/ml/jobs/_utils/types.py +39 -0
- snowflake/ml/jobs/decorators.py +91 -0
- snowflake/ml/jobs/job.py +113 -0
- snowflake/ml/jobs/manager.py +298 -0
- snowflake/ml/model/_client/model/model_version_impl.py +5 -3
- snowflake/ml/model/_client/ops/model_ops.py +13 -8
- snowflake/ml/model/_client/ops/service_ops.py +1 -11
- snowflake/ml/model/_client/sql/model_version.py +11 -0
- snowflake/ml/model/_client/sql/service.py +13 -6
- snowflake/ml/model/_model_composer/model_composer.py +8 -3
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +20 -1
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +1 -0
- snowflake/ml/model/_model_composer/model_method/constants.py +1 -0
- snowflake/ml/model/_model_composer/model_method/function_generator.py +2 -0
- snowflake/ml/model/_model_composer/model_method/infer_function.py_template +1 -1
- snowflake/ml/model/_model_composer/model_method/infer_partitioned.py_template +1 -1
- snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +1 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +9 -1
- snowflake/ml/model/_model_composer/model_user_file/model_user_file.py +27 -0
- snowflake/ml/model/_packager/model_handlers/_utils.py +39 -5
- snowflake/ml/model/_packager/model_handlers/catboost.py +3 -3
- snowflake/ml/model/_packager/model_handlers/custom.py +1 -2
- snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +6 -1
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +5 -3
- snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +55 -20
- snowflake/ml/model/_packager/model_handlers/sklearn.py +9 -10
- snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +66 -28
- snowflake/ml/model/_packager/model_handlers/tensorflow.py +70 -17
- snowflake/ml/model/_packager/model_handlers/xgboost.py +3 -3
- snowflake/ml/model/_packager/model_meta/model_meta.py +3 -0
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +6 -1
- snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +2 -2
- snowflake/ml/model/_packager/model_task/model_task_utils.py +3 -2
- snowflake/ml/model/_signatures/base_handler.py +1 -2
- snowflake/ml/model/_signatures/builtins_handler.py +2 -2
- snowflake/ml/model/_signatures/numpy_handler.py +6 -7
- snowflake/ml/model/_signatures/pandas_handler.py +3 -3
- snowflake/ml/model/_signatures/pytorch_handler.py +2 -5
- snowflake/ml/model/_signatures/snowpark_handler.py +11 -5
- snowflake/ml/model/_signatures/tensorflow_handler.py +2 -7
- snowflake/ml/model/model_signature.py +17 -4
- snowflake/ml/model/type_hints.py +1 -0
- snowflake/ml/modeling/_internal/model_trainer_builder.py +0 -8
- snowflake/ml/modeling/_internal/model_transformer_builder.py +0 -13
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +6 -3
- snowflake/ml/modeling/cluster/affinity_propagation.py +6 -3
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +6 -3
- snowflake/ml/modeling/cluster/birch.py +6 -3
- snowflake/ml/modeling/cluster/bisecting_k_means.py +6 -3
- snowflake/ml/modeling/cluster/dbscan.py +6 -3
- snowflake/ml/modeling/cluster/feature_agglomeration.py +6 -3
- snowflake/ml/modeling/cluster/k_means.py +6 -3
- snowflake/ml/modeling/cluster/mean_shift.py +6 -3
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +6 -3
- snowflake/ml/modeling/cluster/optics.py +6 -3
- snowflake/ml/modeling/cluster/spectral_biclustering.py +6 -3
- snowflake/ml/modeling/cluster/spectral_clustering.py +6 -3
- snowflake/ml/modeling/cluster/spectral_coclustering.py +6 -3
- snowflake/ml/modeling/compose/column_transformer.py +6 -3
- snowflake/ml/modeling/compose/transformed_target_regressor.py +6 -3
- snowflake/ml/modeling/covariance/elliptic_envelope.py +6 -3
- snowflake/ml/modeling/covariance/empirical_covariance.py +6 -3
- snowflake/ml/modeling/covariance/graphical_lasso.py +6 -3
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +6 -3
- snowflake/ml/modeling/covariance/ledoit_wolf.py +6 -3
- snowflake/ml/modeling/covariance/min_cov_det.py +6 -3
- snowflake/ml/modeling/covariance/oas.py +6 -3
- snowflake/ml/modeling/covariance/shrunk_covariance.py +6 -3
- snowflake/ml/modeling/decomposition/dictionary_learning.py +6 -3
- snowflake/ml/modeling/decomposition/factor_analysis.py +6 -3
- snowflake/ml/modeling/decomposition/fast_ica.py +6 -3
- snowflake/ml/modeling/decomposition/incremental_pca.py +6 -3
- snowflake/ml/modeling/decomposition/kernel_pca.py +6 -3
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +6 -3
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +6 -3
- snowflake/ml/modeling/decomposition/pca.py +6 -3
- snowflake/ml/modeling/decomposition/sparse_pca.py +6 -3
- snowflake/ml/modeling/decomposition/truncated_svd.py +6 -3
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +6 -3
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +6 -3
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/bagging_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/bagging_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/isolation_forest.py +6 -3
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/stacking_regressor.py +6 -3
- snowflake/ml/modeling/ensemble/voting_classifier.py +6 -3
- snowflake/ml/modeling/ensemble/voting_regressor.py +6 -3
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +6 -3
- snowflake/ml/modeling/feature_selection/select_fdr.py +6 -3
- snowflake/ml/modeling/feature_selection/select_fpr.py +6 -3
- snowflake/ml/modeling/feature_selection/select_fwe.py +6 -3
- snowflake/ml/modeling/feature_selection/select_k_best.py +6 -3
- snowflake/ml/modeling/feature_selection/select_percentile.py +6 -3
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +6 -3
- snowflake/ml/modeling/feature_selection/variance_threshold.py +6 -3
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +6 -3
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +6 -3
- snowflake/ml/modeling/impute/iterative_imputer.py +6 -3
- snowflake/ml/modeling/impute/knn_imputer.py +6 -3
- snowflake/ml/modeling/impute/missing_indicator.py +6 -3
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +6 -3
- snowflake/ml/modeling/kernel_approximation/nystroem.py +6 -3
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +6 -3
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +6 -3
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +6 -3
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +6 -3
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +6 -3
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/ard_regression.py +6 -3
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +6 -3
- snowflake/ml/modeling/linear_model/elastic_net.py +6 -3
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +6 -3
- snowflake/ml/modeling/linear_model/gamma_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/huber_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/lars.py +6 -3
- snowflake/ml/modeling/linear_model/lars_cv.py +6 -3
- snowflake/ml/modeling/linear_model/lasso.py +6 -3
- snowflake/ml/modeling/linear_model/lasso_cv.py +6 -3
- snowflake/ml/modeling/linear_model/lasso_lars.py +6 -3
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +6 -3
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +6 -3
- snowflake/ml/modeling/linear_model/linear_regression.py +6 -3
- snowflake/ml/modeling/linear_model/logistic_regression.py +6 -3
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +6 -3
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +6 -3
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +6 -3
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +6 -3
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +6 -3
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +6 -3
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +6 -3
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/perceptron.py +6 -3
- snowflake/ml/modeling/linear_model/poisson_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/ransac_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/ridge.py +6 -3
- snowflake/ml/modeling/linear_model/ridge_classifier.py +6 -3
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +6 -3
- snowflake/ml/modeling/linear_model/ridge_cv.py +6 -3
- snowflake/ml/modeling/linear_model/sgd_classifier.py +6 -3
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +6 -3
- snowflake/ml/modeling/linear_model/sgd_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +6 -3
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +6 -3
- snowflake/ml/modeling/manifold/isomap.py +6 -3
- snowflake/ml/modeling/manifold/mds.py +6 -3
- snowflake/ml/modeling/manifold/spectral_embedding.py +6 -3
- snowflake/ml/modeling/manifold/tsne.py +6 -3
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +6 -3
- snowflake/ml/modeling/mixture/gaussian_mixture.py +6 -3
- snowflake/ml/modeling/model_selection/grid_search_cv.py +17 -2
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +17 -2
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +6 -3
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +6 -3
- snowflake/ml/modeling/multiclass/output_code_classifier.py +6 -3
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +6 -3
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +6 -3
- snowflake/ml/modeling/naive_bayes/complement_nb.py +6 -3
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +6 -3
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +6 -3
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +6 -3
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +6 -3
- snowflake/ml/modeling/neighbors/kernel_density.py +6 -3
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +6 -3
- snowflake/ml/modeling/neighbors/nearest_centroid.py +6 -3
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +6 -3
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +6 -3
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +6 -3
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +6 -3
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +6 -3
- snowflake/ml/modeling/neural_network/mlp_classifier.py +6 -3
- snowflake/ml/modeling/neural_network/mlp_regressor.py +6 -3
- snowflake/ml/modeling/pipeline/pipeline.py +16 -178
- snowflake/ml/modeling/preprocessing/polynomial_features.py +6 -3
- snowflake/ml/modeling/semi_supervised/label_propagation.py +6 -3
- snowflake/ml/modeling/semi_supervised/label_spreading.py +6 -3
- snowflake/ml/modeling/svm/linear_svc.py +6 -3
- snowflake/ml/modeling/svm/linear_svr.py +6 -3
- snowflake/ml/modeling/svm/nu_svc.py +6 -3
- snowflake/ml/modeling/svm/nu_svr.py +6 -3
- snowflake/ml/modeling/svm/svc.py +6 -3
- snowflake/ml/modeling/svm/svr.py +6 -3
- snowflake/ml/modeling/tree/decision_tree_classifier.py +6 -3
- snowflake/ml/modeling/tree/decision_tree_regressor.py +6 -3
- snowflake/ml/modeling/tree/extra_tree_classifier.py +6 -3
- snowflake/ml/modeling/tree/extra_tree_regressor.py +6 -3
- snowflake/ml/modeling/xgboost/xgb_classifier.py +167 -91
- snowflake/ml/modeling/xgboost/xgb_regressor.py +166 -88
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +166 -88
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +166 -88
- snowflake/ml/monitoring/_client/model_monitor_sql_client.py +4 -4
- snowflake/ml/registry/_manager/model_manager.py +70 -33
- snowflake/ml/registry/registry.py +41 -22
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.4.dist-info}/METADATA +63 -19
- {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.4.dist-info}/RECORD +231 -226
- {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.4.dist-info}/WHEEL +1 -1
- snowflake/ml/_internal/utils/retryable_http.py +0 -39
- snowflake/ml/fileset/parquet_parser.py +0 -170
- snowflake/ml/fileset/tf_dataset.py +0 -88
- snowflake/ml/fileset/torch_datapipe.py +0 -57
- snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +0 -151
- snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_trainer.py +0 -66
- {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.4.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.4.dist-info}/top_level.txt +0 -0
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
|
|
37
37
|
FeatureSpec,
|
38
38
|
ModelSignature,
|
39
39
|
_infer_signature,
|
40
|
+
_truncate_data,
|
40
41
|
_rename_signature_with_snowflake_identifiers,
|
41
42
|
)
|
42
43
|
|
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "xgboost".replace("sklearn.", "")
|
|
57
58
|
|
58
59
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
59
60
|
|
61
|
+
INFER_SIGNATURE_MAX_ROWS = 100
|
62
|
+
|
60
63
|
class XGBRFClassifier(BaseTransformer):
|
61
64
|
r"""scikit-learn API for XGBoost random forest classification
|
62
65
|
For more details on this class, see [xgboost.XGBRFClassifier]
|
@@ -125,111 +128,171 @@ class XGBRFClassifier(BaseTransformer):
|
|
125
128
|
can seriously hurt performance in gradient boosting. Set the batch_size as large as possible
|
126
129
|
based on the available memory.
|
127
130
|
|
128
|
-
n_estimators: int
|
131
|
+
n_estimators: Optional[int]
|
129
132
|
Number of trees in random forest to fit.
|
130
133
|
|
131
|
-
max_depth: Optional[int]
|
134
|
+
max_depth: typing.Optional[int]
|
135
|
+
|
132
136
|
Maximum tree depth for base learners.
|
133
|
-
|
137
|
+
|
138
|
+
max_leaves: typing.Optional[int]
|
139
|
+
|
134
140
|
Maximum number of leaves; 0 indicates no limit.
|
135
|
-
|
141
|
+
|
142
|
+
max_bin: typing.Optional[int]
|
143
|
+
|
136
144
|
If using histogram-based algorithm, maximum number of bins per feature
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
145
|
+
|
146
|
+
grow_policy: typing.Optional[str]
|
147
|
+
|
148
|
+
Tree growing policy.
|
149
|
+
|
150
|
+
- depthwise: Favors splitting at nodes closest to the node,
|
151
|
+
- lossguide: Favors splitting at nodes with highest loss change.
|
152
|
+
|
153
|
+
learning_rate: typing.Optional[float]
|
154
|
+
|
141
155
|
Boosting learning rate (xgb's "eta")
|
142
|
-
|
156
|
+
|
157
|
+
verbosity: typing.Optional[int]
|
158
|
+
|
143
159
|
The degree of verbosity. Valid values are 0 (silent) - 3 (debug).
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
160
|
+
|
161
|
+
objective: typing.Union[str, xgboost.sklearn._SklObjWProto, typing.Callable[[typing.Any, typing.Any], typing.Tuple[numpy.ndarray, numpy.ndarray]], NoneType]
|
162
|
+
|
163
|
+
Specify the learning task and the corresponding learning objective or a custom
|
164
|
+
objective function to be used.
|
165
|
+
|
166
|
+
For custom objective, see :doc:`/tutorials/custom_metric_obj` and
|
167
|
+
:ref:`custom-obj-metric` for more information, along with the end note for
|
168
|
+
function signatures.
|
169
|
+
|
170
|
+
booster: typing.Optional[str]
|
171
|
+
|
172
|
+
Specify which booster to use: ``gbtree``, ``gblinear`` or ``dart``.
|
173
|
+
|
174
|
+
tree_method: typing.Optional[str]
|
175
|
+
|
150
176
|
Specify which tree method to use. Default to auto. If this parameter is set to
|
151
177
|
default, XGBoost will choose the most conservative option available. It's
|
152
178
|
recommended to study this option from the parameters document :doc:`tree method
|
153
179
|
</treemethod>`
|
154
|
-
|
180
|
+
|
181
|
+
n_jobs: typing.Optional[int]
|
182
|
+
|
155
183
|
Number of parallel threads used to run xgboost. When used with other
|
156
184
|
Scikit-Learn algorithms like grid search, you may choose which algorithm to
|
157
185
|
parallelize and balance the threads. Creating thread contention will
|
158
186
|
significantly slow down both algorithms.
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
187
|
+
|
188
|
+
gamma: typing.Optional[float]
|
189
|
+
|
190
|
+
(min_split_loss) Minimum loss reduction required to make a further partition on
|
191
|
+
a leaf node of the tree.
|
192
|
+
|
193
|
+
min_child_weight: typing.Optional[float]
|
194
|
+
|
163
195
|
Minimum sum of instance weight(hessian) needed in a child.
|
164
|
-
|
196
|
+
|
197
|
+
max_delta_step: typing.Optional[float]
|
198
|
+
|
165
199
|
Maximum delta step we allow each tree's weight estimation to be.
|
166
|
-
|
200
|
+
|
201
|
+
subsample: typing.Optional[float]
|
202
|
+
|
167
203
|
Subsample ratio of the training instance.
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
204
|
+
|
205
|
+
sampling_method: typing.Optional[str]
|
206
|
+
|
207
|
+
Sampling method. Used only by the GPU version of ``hist`` tree method.
|
208
|
+
|
209
|
+
- ``uniform``: Select random training instances uniformly.
|
210
|
+
- ``gradient_based``: Select random training instances with higher probability
|
211
|
+
when the gradient and hessian are larger. (cf. CatBoost)
|
212
|
+
|
213
|
+
colsample_bytree: typing.Optional[float]
|
214
|
+
|
174
215
|
Subsample ratio of columns when constructing each tree.
|
175
|
-
|
216
|
+
|
217
|
+
colsample_bylevel: typing.Optional[float]
|
218
|
+
|
176
219
|
Subsample ratio of columns for each level.
|
177
|
-
|
220
|
+
|
221
|
+
colsample_bynode: typing.Optional[float]
|
222
|
+
|
178
223
|
Subsample ratio of columns for each split.
|
179
|
-
|
224
|
+
|
225
|
+
reg_alpha: typing.Optional[float]
|
226
|
+
|
180
227
|
L1 regularization term on weights (xgb's alpha).
|
181
|
-
|
228
|
+
|
229
|
+
reg_lambda: typing.Optional[float]
|
230
|
+
|
182
231
|
L2 regularization term on weights (xgb's lambda).
|
183
|
-
|
232
|
+
|
233
|
+
scale_pos_weight: typing.Optional[float]
|
184
234
|
Balancing of positive and negative weights.
|
185
|
-
|
235
|
+
|
236
|
+
base_score: typing.Optional[float]
|
237
|
+
|
186
238
|
The initial prediction score of all instances, global bias.
|
187
|
-
|
239
|
+
|
240
|
+
random_state: typing.Union[numpy.random.mtrand.RandomState, numpy.random._generator.Generator, int, NoneType]
|
241
|
+
|
188
242
|
Random number seed.
|
189
243
|
|
190
244
|
Using gblinear booster with shotgun updater is nondeterministic as
|
191
245
|
it uses Hogwild algorithm.
|
192
246
|
|
193
|
-
missing: float
|
194
|
-
|
195
|
-
|
247
|
+
missing: float
|
248
|
+
|
249
|
+
Value in the data which needs to be present as a missing value. Default to
|
250
|
+
:py:data:`numpy.nan`.
|
251
|
+
|
252
|
+
num_parallel_tree: typing.Optional[int]
|
253
|
+
|
196
254
|
Used for boosting random forest.
|
197
|
-
|
255
|
+
|
256
|
+
monotone_constraints: typing.Union[typing.Dict[str, int], str, NoneType]
|
257
|
+
|
198
258
|
Constraint of variable monotonicity. See :doc:`tutorial </tutorials/monotonic>`
|
199
259
|
for more information.
|
200
|
-
|
260
|
+
|
261
|
+
interaction_constraints: typing.Union[str, typing.List[typing.Tuple[str]], NoneType]
|
262
|
+
|
201
263
|
Constraints for interaction representing permitted interactions. The
|
202
264
|
constraints must be specified in the form of a nested list, e.g. ``[[0, 1], [2,
|
203
265
|
3, 4]]``, where each inner list is a group of indices of features that are
|
204
266
|
allowed to interact with each other. See :doc:`tutorial
|
205
267
|
</tutorials/feature_interaction_constraint>` for more information
|
206
|
-
|
268
|
+
|
269
|
+
importance_type: typing.Optional[str]
|
270
|
+
|
207
271
|
The feature importance type for the feature_importances\_ property:
|
208
272
|
|
209
273
|
* For tree model, it's either "gain", "weight", "cover", "total_gain" or
|
210
274
|
"total_cover".
|
211
|
-
* For linear model, only "weight" is defined and it's the normalized
|
212
|
-
without bias.
|
275
|
+
* For linear model, only "weight" is defined and it's the normalized
|
276
|
+
coefficients without bias.
|
277
|
+
|
278
|
+
device: typing.Optional[str]
|
279
|
+
|
280
|
+
Device ordinal, available options are `cpu`, `cuda`, and `gpu`.
|
281
|
+
|
282
|
+
validate_parameters: typing.Optional[bool]
|
213
283
|
|
214
|
-
gpu_id: Optional[int]
|
215
|
-
Device ordinal.
|
216
|
-
validate_parameters: Optional[bool]
|
217
284
|
Give warnings for unknown parameter.
|
218
|
-
|
219
|
-
Force XGBoost to use specific predictor, available choices are [cpu_predictor,
|
220
|
-
gpu_predictor].
|
285
|
+
|
221
286
|
enable_categorical: bool
|
222
287
|
|
223
|
-
|
224
|
-
should be used to specify categorical data type. Also, JSON/UBJSON
|
225
|
-
serialization format is required.
|
288
|
+
See the same parameter of :py:class:`DMatrix` for details.
|
226
289
|
|
227
|
-
feature_types:
|
290
|
+
feature_types: typing.Optional[typing.Sequence[str]]
|
228
291
|
|
229
292
|
Used for specifying feature types without constructing a dataframe. See
|
230
293
|
:py:class:`DMatrix` for details.
|
231
294
|
|
232
|
-
max_cat_to_onehot: Optional[int]
|
295
|
+
max_cat_to_onehot: typing.Optional[int]
|
233
296
|
|
234
297
|
A threshold for deciding whether XGBoost should use one-hot encoding based split
|
235
298
|
for categorical data. When number of categories is lesser than the threshold
|
@@ -238,36 +301,41 @@ class XGBRFClassifier(BaseTransformer):
|
|
238
301
|
categorical feature support. See :doc:`Categorical Data
|
239
302
|
</tutorials/categorical>` and :ref:`cat-param` for details.
|
240
303
|
|
241
|
-
max_cat_threshold: Optional[int]
|
304
|
+
max_cat_threshold: typing.Optional[int]
|
242
305
|
|
243
306
|
Maximum number of categories considered for each split. Used only by
|
244
307
|
partition-based splits for preventing over-fitting. Also, `enable_categorical`
|
245
308
|
needs to be set to have categorical feature support. See :doc:`Categorical Data
|
246
309
|
</tutorials/categorical>` and :ref:`cat-param` for details.
|
247
310
|
|
248
|
-
|
311
|
+
multi_strategy: typing.Optional[str]
|
312
|
+
|
313
|
+
The strategy used for training multi-target models, including multi-target
|
314
|
+
regression and multi-class classification. See :doc:`/tutorials/multioutput` for
|
315
|
+
more information.
|
316
|
+
|
317
|
+
- ``one_output_per_tree``: One model for each target.
|
318
|
+
- ``multi_output_tree``: Use multi-target trees.
|
319
|
+
|
320
|
+
eval_metric: typing.Union[str, typing.List[str], typing.Callable, NoneType]
|
249
321
|
|
250
322
|
Metric used for monitoring the training result and early stopping. It can be a
|
251
323
|
string or list of strings as names of predefined metric in XGBoost (See
|
252
|
-
doc/parameter.rst), one of the metrics in :py:mod:`sklearn.metrics`, or any
|
253
|
-
user defined metric that looks like `sklearn.metrics`.
|
324
|
+
doc/parameter.rst), one of the metrics in :py:mod:`sklearn.metrics`, or any
|
325
|
+
other user defined metric that looks like `sklearn.metrics`.
|
254
326
|
|
255
327
|
If custom objective is also provided, then custom metric should implement the
|
256
328
|
corresponding reverse link function.
|
257
329
|
|
258
330
|
Unlike the `scoring` parameter commonly used in scikit-learn, when a callable
|
259
|
-
object is provided, it's assumed to be a cost function and by default XGBoost
|
260
|
-
minimize the result during early stopping.
|
261
|
-
|
262
|
-
For advanced usage on Early stopping like directly choosing to maximize instead of
|
263
|
-
minimize, see :py:obj:`xgboost.callback.EarlyStopping`.
|
331
|
+
object is provided, it's assumed to be a cost function and by default XGBoost
|
332
|
+
will minimize the result during early stopping.
|
264
333
|
|
265
|
-
|
266
|
-
|
334
|
+
For advanced usage on Early stopping like directly choosing to maximize instead
|
335
|
+
of minimize, see :py:obj:`xgboost.callback.EarlyStopping`.
|
267
336
|
|
268
|
-
|
269
|
-
|
270
|
-
being used.
|
337
|
+
See :doc:`/tutorials/custom_metric_obj` and :ref:`custom-obj-metric` for more
|
338
|
+
information.
|
271
339
|
|
272
340
|
from sklearn.datasets import load_diabetes
|
273
341
|
from sklearn.metrics import mean_absolute_error
|
@@ -278,24 +346,29 @@ class XGBRFClassifier(BaseTransformer):
|
|
278
346
|
)
|
279
347
|
reg.fit(X, y, eval_set=[(X, y)])
|
280
348
|
|
281
|
-
early_stopping_rounds: Optional[int]
|
349
|
+
early_stopping_rounds: typing.Optional[int]
|
282
350
|
|
283
|
-
Activates early stopping. Validation metric needs to improve at least once in
|
284
|
-
|
285
|
-
|
351
|
+
- Activates early stopping. Validation metric needs to improve at least once in
|
352
|
+
every **early_stopping_rounds** round(s) to continue training. Requires at
|
353
|
+
least one item in **eval_set** in :py:meth:`fit`.
|
286
354
|
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
355
|
+
- If early stopping occurs, the model will have two additional attributes:
|
356
|
+
:py:attr:`best_score` and :py:attr:`best_iteration`. These are used by the
|
357
|
+
:py:meth:`predict` and :py:meth:`apply` methods to determine the optimal
|
358
|
+
number of trees during inference. If users want to access the full model
|
359
|
+
(including trees built after early stopping), they can specify the
|
360
|
+
`iteration_range` in these inference methods. In addition, other utilities
|
361
|
+
like model plotting can also use the entire model.
|
291
362
|
|
292
|
-
If
|
293
|
-
|
294
|
-
:py:attr:`best_ntree_limit`.
|
363
|
+
- If you prefer to discard the trees after `best_iteration`, consider using the
|
364
|
+
callback function :py:class:`xgboost.callback.EarlyStopping`.
|
295
365
|
|
296
|
-
|
366
|
+
- If there's more than one item in **eval_set**, the last entry will be used for
|
367
|
+
early stopping. If there's more than one metric in **eval_metric**, the last
|
368
|
+
metric will be used for early stopping.
|
369
|
+
|
370
|
+
callbacks: typing.Optional[typing.List[xgboost.callback.TrainingCallback]]
|
297
371
|
|
298
|
-
callbacks: Optional[List[TrainingCallback]]
|
299
372
|
List of callback functions that are applied at end of each iteration.
|
300
373
|
It is possible to use predefined callbacks by using
|
301
374
|
:ref:`Callback API <callback_api>`.
|
@@ -307,9 +380,11 @@ class XGBRFClassifier(BaseTransformer):
|
|
307
380
|
for params in parameters_grid:
|
308
381
|
# be sure to (re)initialize the callbacks before each run
|
309
382
|
callbacks = [xgb.callback.LearningRateScheduler(custom_rates)]
|
310
|
-
xgboost.
|
383
|
+
reg = xgboost.XGBRegressor(**params, callbacks=callbacks)
|
384
|
+
reg.fit(X, y)
|
385
|
+
|
386
|
+
kwargs: typing.Optional[typing.Any]
|
311
387
|
|
312
|
-
kwargs: dict, optional
|
313
388
|
Keyword arguments for XGBoost Booster object. Full documentation of parameters
|
314
389
|
can be found :doc:`here </parameter>`.
|
315
390
|
Attempting to set a parameter via the constructor args and \*\*kwargs
|
@@ -320,13 +395,16 @@ class XGBRFClassifier(BaseTransformer):
|
|
320
395
|
with scikit-learn.
|
321
396
|
|
322
397
|
A custom objective function can be provided for the ``objective``
|
323
|
-
parameter. In this case, it should have the signature
|
324
|
-
``objective(y_true, y_pred
|
398
|
+
parameter. In this case, it should have the signature ``objective(y_true,
|
399
|
+
y_pred) -> [grad, hess]`` or ``objective(y_true, y_pred, *, sample_weight)
|
400
|
+
-> [grad, hess]``:
|
325
401
|
|
326
402
|
y_true: array_like of shape [n_samples]
|
327
403
|
The target values
|
328
404
|
y_pred: array_like of shape [n_samples]
|
329
405
|
The predicted values
|
406
|
+
sample_weight :
|
407
|
+
Optional sample weights.
|
330
408
|
|
331
409
|
grad: array_like of shape [n_samples]
|
332
410
|
The value of the gradient for each sample point.
|
@@ -632,7 +710,7 @@ class XGBRFClassifier(BaseTransformer):
|
|
632
710
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
633
711
|
expected_dtype = "array"
|
634
712
|
else:
|
635
|
-
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
713
|
+
output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
|
636
714
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
637
715
|
# 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
|
638
716
|
# 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
|
@@ -1293,7 +1371,7 @@ class XGBRFClassifier(BaseTransformer):
|
|
1293
1371
|
|
1294
1372
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1295
1373
|
|
1296
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1374
|
+
inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
|
1297
1375
|
outputs: List[BaseFeatureSpec] = []
|
1298
1376
|
if hasattr(self, "predict"):
|
1299
1377
|
# keep mypy happy
|
@@ -1301,7 +1379,7 @@ class XGBRFClassifier(BaseTransformer):
|
|
1301
1379
|
# For classifier, the type of predict is the same as the type of label
|
1302
1380
|
if self._sklearn_object._estimator_type == "classifier":
|
1303
1381
|
# label columns is the desired type for output
|
1304
|
-
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1382
|
+
outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
|
1305
1383
|
# rename the output columns
|
1306
1384
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1307
1385
|
self._model_signature_dict["predict"] = ModelSignature(
|