snowflake-ml-python 1.7.2__py3-none-any.whl → 1.7.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. snowflake/cortex/__init__.py +16 -8
  2. snowflake/cortex/_classify_text.py +12 -1
  3. snowflake/cortex/_complete.py +101 -13
  4. snowflake/cortex/_embed_text_1024.py +9 -2
  5. snowflake/cortex/_embed_text_768.py +9 -2
  6. snowflake/cortex/_extract_answer.py +9 -2
  7. snowflake/cortex/_sentiment.py +9 -2
  8. snowflake/cortex/_summarize.py +9 -2
  9. snowflake/cortex/_translate.py +9 -2
  10. snowflake/ml/_internal/env_utils.py +7 -52
  11. snowflake/ml/_internal/platform_capabilities.py +87 -0
  12. snowflake/ml/_internal/utils/identifier.py +4 -2
  13. snowflake/ml/data/__init__.py +3 -0
  14. snowflake/ml/data/_internal/arrow_ingestor.py +4 -4
  15. snowflake/ml/data/data_connector.py +53 -11
  16. snowflake/ml/data/data_ingestor.py +2 -1
  17. snowflake/ml/data/torch_utils.py +18 -5
  18. snowflake/ml/dataset/dataset.py +0 -1
  19. snowflake/ml/feature_store/examples/example_helper.py +2 -1
  20. snowflake/ml/fileset/fileset.py +24 -18
  21. snowflake/ml/jobs/__init__.py +21 -0
  22. snowflake/ml/jobs/_utils/constants.py +51 -0
  23. snowflake/ml/jobs/_utils/payload_utils.py +352 -0
  24. snowflake/ml/jobs/_utils/spec_utils.py +298 -0
  25. snowflake/ml/jobs/_utils/types.py +39 -0
  26. snowflake/ml/jobs/decorators.py +91 -0
  27. snowflake/ml/jobs/job.py +113 -0
  28. snowflake/ml/jobs/manager.py +298 -0
  29. snowflake/ml/model/_client/model/model_version_impl.py +5 -3
  30. snowflake/ml/model/_client/ops/model_ops.py +13 -8
  31. snowflake/ml/model/_client/ops/service_ops.py +1 -11
  32. snowflake/ml/model/_client/sql/model_version.py +11 -0
  33. snowflake/ml/model/_client/sql/service.py +13 -6
  34. snowflake/ml/model/_model_composer/model_composer.py +8 -3
  35. snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +20 -1
  36. snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +1 -0
  37. snowflake/ml/model/_model_composer/model_method/constants.py +1 -0
  38. snowflake/ml/model/_model_composer/model_method/function_generator.py +2 -0
  39. snowflake/ml/model/_model_composer/model_method/infer_function.py_template +1 -1
  40. snowflake/ml/model/_model_composer/model_method/infer_partitioned.py_template +1 -1
  41. snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +1 -1
  42. snowflake/ml/model/_model_composer/model_method/model_method.py +9 -1
  43. snowflake/ml/model/_model_composer/model_user_file/model_user_file.py +27 -0
  44. snowflake/ml/model/_packager/model_handlers/_utils.py +39 -5
  45. snowflake/ml/model/_packager/model_handlers/catboost.py +3 -3
  46. snowflake/ml/model/_packager/model_handlers/custom.py +1 -2
  47. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +6 -1
  48. snowflake/ml/model/_packager/model_handlers/lightgbm.py +5 -3
  49. snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +55 -20
  50. snowflake/ml/model/_packager/model_handlers/sklearn.py +9 -10
  51. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +66 -28
  52. snowflake/ml/model/_packager/model_handlers/tensorflow.py +70 -17
  53. snowflake/ml/model/_packager/model_handlers/xgboost.py +3 -3
  54. snowflake/ml/model/_packager/model_meta/model_meta.py +3 -0
  55. snowflake/ml/model/_packager/model_meta/model_meta_schema.py +6 -1
  56. snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +2 -2
  57. snowflake/ml/model/_packager/model_task/model_task_utils.py +3 -2
  58. snowflake/ml/model/_signatures/base_handler.py +1 -2
  59. snowflake/ml/model/_signatures/builtins_handler.py +2 -2
  60. snowflake/ml/model/_signatures/numpy_handler.py +6 -7
  61. snowflake/ml/model/_signatures/pandas_handler.py +3 -3
  62. snowflake/ml/model/_signatures/pytorch_handler.py +2 -5
  63. snowflake/ml/model/_signatures/snowpark_handler.py +11 -5
  64. snowflake/ml/model/_signatures/tensorflow_handler.py +2 -7
  65. snowflake/ml/model/model_signature.py +17 -4
  66. snowflake/ml/model/type_hints.py +1 -0
  67. snowflake/ml/modeling/_internal/model_trainer_builder.py +0 -8
  68. snowflake/ml/modeling/_internal/model_transformer_builder.py +0 -13
  69. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +6 -3
  70. snowflake/ml/modeling/cluster/affinity_propagation.py +6 -3
  71. snowflake/ml/modeling/cluster/agglomerative_clustering.py +6 -3
  72. snowflake/ml/modeling/cluster/birch.py +6 -3
  73. snowflake/ml/modeling/cluster/bisecting_k_means.py +6 -3
  74. snowflake/ml/modeling/cluster/dbscan.py +6 -3
  75. snowflake/ml/modeling/cluster/feature_agglomeration.py +6 -3
  76. snowflake/ml/modeling/cluster/k_means.py +6 -3
  77. snowflake/ml/modeling/cluster/mean_shift.py +6 -3
  78. snowflake/ml/modeling/cluster/mini_batch_k_means.py +6 -3
  79. snowflake/ml/modeling/cluster/optics.py +6 -3
  80. snowflake/ml/modeling/cluster/spectral_biclustering.py +6 -3
  81. snowflake/ml/modeling/cluster/spectral_clustering.py +6 -3
  82. snowflake/ml/modeling/cluster/spectral_coclustering.py +6 -3
  83. snowflake/ml/modeling/compose/column_transformer.py +6 -3
  84. snowflake/ml/modeling/compose/transformed_target_regressor.py +6 -3
  85. snowflake/ml/modeling/covariance/elliptic_envelope.py +6 -3
  86. snowflake/ml/modeling/covariance/empirical_covariance.py +6 -3
  87. snowflake/ml/modeling/covariance/graphical_lasso.py +6 -3
  88. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +6 -3
  89. snowflake/ml/modeling/covariance/ledoit_wolf.py +6 -3
  90. snowflake/ml/modeling/covariance/min_cov_det.py +6 -3
  91. snowflake/ml/modeling/covariance/oas.py +6 -3
  92. snowflake/ml/modeling/covariance/shrunk_covariance.py +6 -3
  93. snowflake/ml/modeling/decomposition/dictionary_learning.py +6 -3
  94. snowflake/ml/modeling/decomposition/factor_analysis.py +6 -3
  95. snowflake/ml/modeling/decomposition/fast_ica.py +6 -3
  96. snowflake/ml/modeling/decomposition/incremental_pca.py +6 -3
  97. snowflake/ml/modeling/decomposition/kernel_pca.py +6 -3
  98. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +6 -3
  99. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +6 -3
  100. snowflake/ml/modeling/decomposition/pca.py +6 -3
  101. snowflake/ml/modeling/decomposition/sparse_pca.py +6 -3
  102. snowflake/ml/modeling/decomposition/truncated_svd.py +6 -3
  103. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +6 -3
  104. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +6 -3
  105. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +6 -3
  106. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +6 -3
  107. snowflake/ml/modeling/ensemble/bagging_classifier.py +6 -3
  108. snowflake/ml/modeling/ensemble/bagging_regressor.py +6 -3
  109. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +6 -3
  110. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +6 -3
  111. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +6 -3
  112. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +6 -3
  113. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +6 -3
  114. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +6 -3
  115. snowflake/ml/modeling/ensemble/isolation_forest.py +6 -3
  116. snowflake/ml/modeling/ensemble/random_forest_classifier.py +6 -3
  117. snowflake/ml/modeling/ensemble/random_forest_regressor.py +6 -3
  118. snowflake/ml/modeling/ensemble/stacking_regressor.py +6 -3
  119. snowflake/ml/modeling/ensemble/voting_classifier.py +6 -3
  120. snowflake/ml/modeling/ensemble/voting_regressor.py +6 -3
  121. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +6 -3
  122. snowflake/ml/modeling/feature_selection/select_fdr.py +6 -3
  123. snowflake/ml/modeling/feature_selection/select_fpr.py +6 -3
  124. snowflake/ml/modeling/feature_selection/select_fwe.py +6 -3
  125. snowflake/ml/modeling/feature_selection/select_k_best.py +6 -3
  126. snowflake/ml/modeling/feature_selection/select_percentile.py +6 -3
  127. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +6 -3
  128. snowflake/ml/modeling/feature_selection/variance_threshold.py +6 -3
  129. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +6 -3
  130. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +6 -3
  131. snowflake/ml/modeling/impute/iterative_imputer.py +6 -3
  132. snowflake/ml/modeling/impute/knn_imputer.py +6 -3
  133. snowflake/ml/modeling/impute/missing_indicator.py +6 -3
  134. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +6 -3
  135. snowflake/ml/modeling/kernel_approximation/nystroem.py +6 -3
  136. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +6 -3
  137. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +6 -3
  138. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +6 -3
  139. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +6 -3
  140. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +6 -3
  141. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +6 -3
  142. snowflake/ml/modeling/linear_model/ard_regression.py +6 -3
  143. snowflake/ml/modeling/linear_model/bayesian_ridge.py +6 -3
  144. snowflake/ml/modeling/linear_model/elastic_net.py +6 -3
  145. snowflake/ml/modeling/linear_model/elastic_net_cv.py +6 -3
  146. snowflake/ml/modeling/linear_model/gamma_regressor.py +6 -3
  147. snowflake/ml/modeling/linear_model/huber_regressor.py +6 -3
  148. snowflake/ml/modeling/linear_model/lars.py +6 -3
  149. snowflake/ml/modeling/linear_model/lars_cv.py +6 -3
  150. snowflake/ml/modeling/linear_model/lasso.py +6 -3
  151. snowflake/ml/modeling/linear_model/lasso_cv.py +6 -3
  152. snowflake/ml/modeling/linear_model/lasso_lars.py +6 -3
  153. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +6 -3
  154. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +6 -3
  155. snowflake/ml/modeling/linear_model/linear_regression.py +6 -3
  156. snowflake/ml/modeling/linear_model/logistic_regression.py +6 -3
  157. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +6 -3
  158. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +6 -3
  159. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +6 -3
  160. snowflake/ml/modeling/linear_model/multi_task_lasso.py +6 -3
  161. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +6 -3
  162. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +6 -3
  163. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +6 -3
  164. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +6 -3
  165. snowflake/ml/modeling/linear_model/perceptron.py +6 -3
  166. snowflake/ml/modeling/linear_model/poisson_regressor.py +6 -3
  167. snowflake/ml/modeling/linear_model/ransac_regressor.py +6 -3
  168. snowflake/ml/modeling/linear_model/ridge.py +6 -3
  169. snowflake/ml/modeling/linear_model/ridge_classifier.py +6 -3
  170. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +6 -3
  171. snowflake/ml/modeling/linear_model/ridge_cv.py +6 -3
  172. snowflake/ml/modeling/linear_model/sgd_classifier.py +6 -3
  173. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +6 -3
  174. snowflake/ml/modeling/linear_model/sgd_regressor.py +6 -3
  175. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +6 -3
  176. snowflake/ml/modeling/linear_model/tweedie_regressor.py +6 -3
  177. snowflake/ml/modeling/manifold/isomap.py +6 -3
  178. snowflake/ml/modeling/manifold/mds.py +6 -3
  179. snowflake/ml/modeling/manifold/spectral_embedding.py +6 -3
  180. snowflake/ml/modeling/manifold/tsne.py +6 -3
  181. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +6 -3
  182. snowflake/ml/modeling/mixture/gaussian_mixture.py +6 -3
  183. snowflake/ml/modeling/model_selection/grid_search_cv.py +17 -2
  184. snowflake/ml/modeling/model_selection/randomized_search_cv.py +17 -2
  185. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +6 -3
  186. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +6 -3
  187. snowflake/ml/modeling/multiclass/output_code_classifier.py +6 -3
  188. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +6 -3
  189. snowflake/ml/modeling/naive_bayes/categorical_nb.py +6 -3
  190. snowflake/ml/modeling/naive_bayes/complement_nb.py +6 -3
  191. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +6 -3
  192. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +6 -3
  193. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +6 -3
  194. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +6 -3
  195. snowflake/ml/modeling/neighbors/kernel_density.py +6 -3
  196. snowflake/ml/modeling/neighbors/local_outlier_factor.py +6 -3
  197. snowflake/ml/modeling/neighbors/nearest_centroid.py +6 -3
  198. snowflake/ml/modeling/neighbors/nearest_neighbors.py +6 -3
  199. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +6 -3
  200. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +6 -3
  201. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +6 -3
  202. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +6 -3
  203. snowflake/ml/modeling/neural_network/mlp_classifier.py +6 -3
  204. snowflake/ml/modeling/neural_network/mlp_regressor.py +6 -3
  205. snowflake/ml/modeling/pipeline/pipeline.py +16 -178
  206. snowflake/ml/modeling/preprocessing/polynomial_features.py +6 -3
  207. snowflake/ml/modeling/semi_supervised/label_propagation.py +6 -3
  208. snowflake/ml/modeling/semi_supervised/label_spreading.py +6 -3
  209. snowflake/ml/modeling/svm/linear_svc.py +6 -3
  210. snowflake/ml/modeling/svm/linear_svr.py +6 -3
  211. snowflake/ml/modeling/svm/nu_svc.py +6 -3
  212. snowflake/ml/modeling/svm/nu_svr.py +6 -3
  213. snowflake/ml/modeling/svm/svc.py +6 -3
  214. snowflake/ml/modeling/svm/svr.py +6 -3
  215. snowflake/ml/modeling/tree/decision_tree_classifier.py +6 -3
  216. snowflake/ml/modeling/tree/decision_tree_regressor.py +6 -3
  217. snowflake/ml/modeling/tree/extra_tree_classifier.py +6 -3
  218. snowflake/ml/modeling/tree/extra_tree_regressor.py +6 -3
  219. snowflake/ml/modeling/xgboost/xgb_classifier.py +167 -91
  220. snowflake/ml/modeling/xgboost/xgb_regressor.py +166 -88
  221. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +166 -88
  222. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +166 -88
  223. snowflake/ml/monitoring/_client/model_monitor_sql_client.py +4 -4
  224. snowflake/ml/registry/_manager/model_manager.py +70 -33
  225. snowflake/ml/registry/registry.py +41 -22
  226. snowflake/ml/version.py +1 -1
  227. {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.4.dist-info}/METADATA +63 -19
  228. {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.4.dist-info}/RECORD +231 -226
  229. {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.4.dist-info}/WHEEL +1 -1
  230. snowflake/ml/_internal/utils/retryable_http.py +0 -39
  231. snowflake/ml/fileset/parquet_parser.py +0 -170
  232. snowflake/ml/fileset/tf_dataset.py +0 -88
  233. snowflake/ml/fileset/torch_datapipe.py +0 -57
  234. snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +0 -151
  235. snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_trainer.py +0 -66
  236. {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.4.dist-info}/LICENSE.txt +0 -0
  237. {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.4.dist-info}/top_level.txt +0 -0
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "xgboost".replace("sklearn.", "")
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class XGBRFClassifier(BaseTransformer):
61
64
  r"""scikit-learn API for XGBoost random forest classification
62
65
  For more details on this class, see [xgboost.XGBRFClassifier]
@@ -125,111 +128,171 @@ class XGBRFClassifier(BaseTransformer):
125
128
  can seriously hurt performance in gradient boosting. Set the batch_size as large as possible
126
129
  based on the available memory.
127
130
 
128
- n_estimators: int
131
+ n_estimators: Optional[int]
129
132
  Number of trees in random forest to fit.
130
133
 
131
- max_depth: Optional[int]
134
+ max_depth: typing.Optional[int]
135
+
132
136
  Maximum tree depth for base learners.
133
- max_leaves :
137
+
138
+ max_leaves: typing.Optional[int]
139
+
134
140
  Maximum number of leaves; 0 indicates no limit.
135
- max_bin :
141
+
142
+ max_bin: typing.Optional[int]
143
+
136
144
  If using histogram-based algorithm, maximum number of bins per feature
137
- grow_policy :
138
- Tree growing policy. 0: favor splitting at nodes closest to the node, i.e. grow
139
- depth-wise. 1: favor splitting at nodes with highest loss change.
140
- learning_rate: Optional[float]
145
+
146
+ grow_policy: typing.Optional[str]
147
+
148
+ Tree growing policy.
149
+
150
+ - depthwise: Favors splitting at nodes closest to the node,
151
+ - lossguide: Favors splitting at nodes with highest loss change.
152
+
153
+ learning_rate: typing.Optional[float]
154
+
141
155
  Boosting learning rate (xgb's "eta")
142
- verbosity: Optional[int]
156
+
157
+ verbosity: typing.Optional[int]
158
+
143
159
  The degree of verbosity. Valid values are 0 (silent) - 3 (debug).
144
- objective: typing.Union[str, typing.Callable[[numpy.ndarray, numpy.ndarray], typing.Tuple[numpy.ndarray, numpy.ndarray]], NoneType]
145
- Specify the learning task and the corresponding learning objective or
146
- a custom objective function to be used (see note below).
147
- booster: Optional[str]
148
- Specify which booster to use: gbtree, gblinear or dart.
149
- tree_method: Optional[str]
160
+
161
+ objective: typing.Union[str, xgboost.sklearn._SklObjWProto, typing.Callable[[typing.Any, typing.Any], typing.Tuple[numpy.ndarray, numpy.ndarray]], NoneType]
162
+
163
+ Specify the learning task and the corresponding learning objective or a custom
164
+ objective function to be used.
165
+
166
+ For custom objective, see :doc:`/tutorials/custom_metric_obj` and
167
+ :ref:`custom-obj-metric` for more information, along with the end note for
168
+ function signatures.
169
+
170
+ booster: typing.Optional[str]
171
+
172
+ Specify which booster to use: ``gbtree``, ``gblinear`` or ``dart``.
173
+
174
+ tree_method: typing.Optional[str]
175
+
150
176
  Specify which tree method to use. Default to auto. If this parameter is set to
151
177
  default, XGBoost will choose the most conservative option available. It's
152
178
  recommended to study this option from the parameters document :doc:`tree method
153
179
  </treemethod>`
154
- n_jobs: Optional[int]
180
+
181
+ n_jobs: typing.Optional[int]
182
+
155
183
  Number of parallel threads used to run xgboost. When used with other
156
184
  Scikit-Learn algorithms like grid search, you may choose which algorithm to
157
185
  parallelize and balance the threads. Creating thread contention will
158
186
  significantly slow down both algorithms.
159
- gamma: Optional[float]
160
- (min_split_loss) Minimum loss reduction required to make a further partition on a
161
- leaf node of the tree.
162
- min_child_weight: Optional[float]
187
+
188
+ gamma: typing.Optional[float]
189
+
190
+ (min_split_loss) Minimum loss reduction required to make a further partition on
191
+ a leaf node of the tree.
192
+
193
+ min_child_weight: typing.Optional[float]
194
+
163
195
  Minimum sum of instance weight(hessian) needed in a child.
164
- max_delta_step: Optional[float]
196
+
197
+ max_delta_step: typing.Optional[float]
198
+
165
199
  Maximum delta step we allow each tree's weight estimation to be.
166
- subsample: Optional[float]
200
+
201
+ subsample: typing.Optional[float]
202
+
167
203
  Subsample ratio of the training instance.
168
- sampling_method :
169
- Sampling method. Used only by `gpu_hist` tree method.
170
- - `uniform`: select random training instances uniformly.
171
- - `gradient_based` select random training instances with higher probability when
172
- the gradient and hessian are larger. (cf. CatBoost)
173
- colsample_bytree: Optional[float]
204
+
205
+ sampling_method: typing.Optional[str]
206
+
207
+ Sampling method. Used only by the GPU version of ``hist`` tree method.
208
+
209
+ - ``uniform``: Select random training instances uniformly.
210
+ - ``gradient_based``: Select random training instances with higher probability
211
+ when the gradient and hessian are larger. (cf. CatBoost)
212
+
213
+ colsample_bytree: typing.Optional[float]
214
+
174
215
  Subsample ratio of columns when constructing each tree.
175
- colsample_bylevel: Optional[float]
216
+
217
+ colsample_bylevel: typing.Optional[float]
218
+
176
219
  Subsample ratio of columns for each level.
177
- colsample_bynode: Optional[float]
220
+
221
+ colsample_bynode: typing.Optional[float]
222
+
178
223
  Subsample ratio of columns for each split.
179
- reg_alpha: Optional[float]
224
+
225
+ reg_alpha: typing.Optional[float]
226
+
180
227
  L1 regularization term on weights (xgb's alpha).
181
- reg_lambda: Optional[float]
228
+
229
+ reg_lambda: typing.Optional[float]
230
+
182
231
  L2 regularization term on weights (xgb's lambda).
183
- scale_pos_weight: Optional[float]
232
+
233
+ scale_pos_weight: typing.Optional[float]
184
234
  Balancing of positive and negative weights.
185
- base_score: Optional[float]
235
+
236
+ base_score: typing.Optional[float]
237
+
186
238
  The initial prediction score of all instances, global bias.
187
- random_state: Optional[Union[numpy.random.RandomState, int]]
239
+
240
+ random_state: typing.Union[numpy.random.mtrand.RandomState, numpy.random._generator.Generator, int, NoneType]
241
+
188
242
  Random number seed.
189
243
 
190
244
  Using gblinear booster with shotgun updater is nondeterministic as
191
245
  it uses Hogwild algorithm.
192
246
 
193
- missing: float, default np.nan
194
- Value in the data which needs to be present as a missing value.
195
- num_parallel_tree: Optional[int]
247
+ missing: float
248
+
249
+ Value in the data which needs to be present as a missing value. Default to
250
+ :py:data:`numpy.nan`.
251
+
252
+ num_parallel_tree: typing.Optional[int]
253
+
196
254
  Used for boosting random forest.
197
- monotone_constraints: Optional[Union[Dict[str, int], str]]
255
+
256
+ monotone_constraints: typing.Union[typing.Dict[str, int], str, NoneType]
257
+
198
258
  Constraint of variable monotonicity. See :doc:`tutorial </tutorials/monotonic>`
199
259
  for more information.
200
- interaction_constraints: Optional[Union[str, List[Tuple[str]]]]
260
+
261
+ interaction_constraints: typing.Union[str, typing.List[typing.Tuple[str]], NoneType]
262
+
201
263
  Constraints for interaction representing permitted interactions. The
202
264
  constraints must be specified in the form of a nested list, e.g. ``[[0, 1], [2,
203
265
  3, 4]]``, where each inner list is a group of indices of features that are
204
266
  allowed to interact with each other. See :doc:`tutorial
205
267
  </tutorials/feature_interaction_constraint>` for more information
206
- importance_type: Optional[str]
268
+
269
+ importance_type: typing.Optional[str]
270
+
207
271
  The feature importance type for the feature_importances\_ property:
208
272
 
209
273
  * For tree model, it's either "gain", "weight", "cover", "total_gain" or
210
274
  "total_cover".
211
- * For linear model, only "weight" is defined and it's the normalized coefficients
212
- without bias.
275
+ * For linear model, only "weight" is defined and it's the normalized
276
+ coefficients without bias.
277
+
278
+ device: typing.Optional[str]
279
+
280
+ Device ordinal, available options are `cpu`, `cuda`, and `gpu`.
281
+
282
+ validate_parameters: typing.Optional[bool]
213
283
 
214
- gpu_id: Optional[int]
215
- Device ordinal.
216
- validate_parameters: Optional[bool]
217
284
  Give warnings for unknown parameter.
218
- predictor: Optional[str]
219
- Force XGBoost to use specific predictor, available choices are [cpu_predictor,
220
- gpu_predictor].
285
+
221
286
  enable_categorical: bool
222
287
 
223
- Experimental support for categorical data. When enabled, cudf/pandas.DataFrame
224
- should be used to specify categorical data type. Also, JSON/UBJSON
225
- serialization format is required.
288
+ See the same parameter of :py:class:`DMatrix` for details.
226
289
 
227
- feature_types: FeatureTypes
290
+ feature_types: typing.Optional[typing.Sequence[str]]
228
291
 
229
292
  Used for specifying feature types without constructing a dataframe. See
230
293
  :py:class:`DMatrix` for details.
231
294
 
232
- max_cat_to_onehot: Optional[int]
295
+ max_cat_to_onehot: typing.Optional[int]
233
296
 
234
297
  A threshold for deciding whether XGBoost should use one-hot encoding based split
235
298
  for categorical data. When number of categories is lesser than the threshold
@@ -238,36 +301,41 @@ class XGBRFClassifier(BaseTransformer):
238
301
  categorical feature support. See :doc:`Categorical Data
239
302
  </tutorials/categorical>` and :ref:`cat-param` for details.
240
303
 
241
- max_cat_threshold: Optional[int]
304
+ max_cat_threshold: typing.Optional[int]
242
305
 
243
306
  Maximum number of categories considered for each split. Used only by
244
307
  partition-based splits for preventing over-fitting. Also, `enable_categorical`
245
308
  needs to be set to have categorical feature support. See :doc:`Categorical Data
246
309
  </tutorials/categorical>` and :ref:`cat-param` for details.
247
310
 
248
- eval_metric: Optional[Union[str, List[str], Callable]]
311
+ multi_strategy: typing.Optional[str]
312
+
313
+ The strategy used for training multi-target models, including multi-target
314
+ regression and multi-class classification. See :doc:`/tutorials/multioutput` for
315
+ more information.
316
+
317
+ - ``one_output_per_tree``: One model for each target.
318
+ - ``multi_output_tree``: Use multi-target trees.
319
+
320
+ eval_metric: typing.Union[str, typing.List[str], typing.Callable, NoneType]
249
321
 
250
322
  Metric used for monitoring the training result and early stopping. It can be a
251
323
  string or list of strings as names of predefined metric in XGBoost (See
252
- doc/parameter.rst), one of the metrics in :py:mod:`sklearn.metrics`, or any other
253
- user defined metric that looks like `sklearn.metrics`.
324
+ doc/parameter.rst), one of the metrics in :py:mod:`sklearn.metrics`, or any
325
+ other user defined metric that looks like `sklearn.metrics`.
254
326
 
255
327
  If custom objective is also provided, then custom metric should implement the
256
328
  corresponding reverse link function.
257
329
 
258
330
  Unlike the `scoring` parameter commonly used in scikit-learn, when a callable
259
- object is provided, it's assumed to be a cost function and by default XGBoost will
260
- minimize the result during early stopping.
261
-
262
- For advanced usage on Early stopping like directly choosing to maximize instead of
263
- minimize, see :py:obj:`xgboost.callback.EarlyStopping`.
331
+ object is provided, it's assumed to be a cost function and by default XGBoost
332
+ will minimize the result during early stopping.
264
333
 
265
- See :doc:`Custom Objective and Evaluation Metric </tutorials/custom_metric_obj>`
266
- for more.
334
+ For advanced usage on Early stopping like directly choosing to maximize instead
335
+ of minimize, see :py:obj:`xgboost.callback.EarlyStopping`.
267
336
 
268
- This parameter replaces `eval_metric` in :py:meth:`fit` method. The old one
269
- receives un-transformed prediction regardless of whether custom objective is
270
- being used.
337
+ See :doc:`/tutorials/custom_metric_obj` and :ref:`custom-obj-metric` for more
338
+ information.
271
339
 
272
340
  from sklearn.datasets import load_diabetes
273
341
  from sklearn.metrics import mean_absolute_error
@@ -278,24 +346,29 @@ class XGBRFClassifier(BaseTransformer):
278
346
  )
279
347
  reg.fit(X, y, eval_set=[(X, y)])
280
348
 
281
- early_stopping_rounds: Optional[int]
349
+ early_stopping_rounds: typing.Optional[int]
282
350
 
283
- Activates early stopping. Validation metric needs to improve at least once in
284
- every **early_stopping_rounds** round(s) to continue training. Requires at least
285
- one item in **eval_set** in :py:meth:`fit`.
351
+ - Activates early stopping. Validation metric needs to improve at least once in
352
+ every **early_stopping_rounds** round(s) to continue training. Requires at
353
+ least one item in **eval_set** in :py:meth:`fit`.
286
354
 
287
- The method returns the model from the last iteration (not the best one). If
288
- there's more than one item in **eval_set**, the last entry will be used for early
289
- stopping. If there's more than one metric in **eval_metric**, the last metric
290
- will be used for early stopping.
355
+ - If early stopping occurs, the model will have two additional attributes:
356
+ :py:attr:`best_score` and :py:attr:`best_iteration`. These are used by the
357
+ :py:meth:`predict` and :py:meth:`apply` methods to determine the optimal
358
+ number of trees during inference. If users want to access the full model
359
+ (including trees built after early stopping), they can specify the
360
+ `iteration_range` in these inference methods. In addition, other utilities
361
+ like model plotting can also use the entire model.
291
362
 
292
- If early stopping occurs, the model will have three additional fields:
293
- :py:attr:`best_score`, :py:attr:`best_iteration` and
294
- :py:attr:`best_ntree_limit`.
363
+ - If you prefer to discard the trees after `best_iteration`, consider using the
364
+ callback function :py:class:`xgboost.callback.EarlyStopping`.
295
365
 
296
- This parameter replaces `early_stopping_rounds` in :py:meth:`fit` method.
366
+ - If there's more than one item in **eval_set**, the last entry will be used for
367
+ early stopping. If there's more than one metric in **eval_metric**, the last
368
+ metric will be used for early stopping.
369
+
370
+ callbacks: typing.Optional[typing.List[xgboost.callback.TrainingCallback]]
297
371
 
298
- callbacks: Optional[List[TrainingCallback]]
299
372
  List of callback functions that are applied at end of each iteration.
300
373
  It is possible to use predefined callbacks by using
301
374
  :ref:`Callback API <callback_api>`.
@@ -307,9 +380,11 @@ class XGBRFClassifier(BaseTransformer):
307
380
  for params in parameters_grid:
308
381
  # be sure to (re)initialize the callbacks before each run
309
382
  callbacks = [xgb.callback.LearningRateScheduler(custom_rates)]
310
- xgboost.train(params, Xy, callbacks=callbacks)
383
+ reg = xgboost.XGBRegressor(**params, callbacks=callbacks)
384
+ reg.fit(X, y)
385
+
386
+ kwargs: typing.Optional[typing.Any]
311
387
 
312
- kwargs: dict, optional
313
388
  Keyword arguments for XGBoost Booster object. Full documentation of parameters
314
389
  can be found :doc:`here </parameter>`.
315
390
  Attempting to set a parameter via the constructor args and \*\*kwargs
@@ -320,13 +395,16 @@ class XGBRFClassifier(BaseTransformer):
320
395
  with scikit-learn.
321
396
 
322
397
  A custom objective function can be provided for the ``objective``
323
- parameter. In this case, it should have the signature
324
- ``objective(y_true, y_pred) -> grad, hess``:
398
+ parameter. In this case, it should have the signature ``objective(y_true,
399
+ y_pred) -> [grad, hess]`` or ``objective(y_true, y_pred, *, sample_weight)
400
+ -> [grad, hess]``:
325
401
 
326
402
  y_true: array_like of shape [n_samples]
327
403
  The target values
328
404
  y_pred: array_like of shape [n_samples]
329
405
  The predicted values
406
+ sample_weight :
407
+ Optional sample weights.
330
408
 
331
409
  grad: array_like of shape [n_samples]
332
410
  The value of the gradient for each sample point.
@@ -632,7 +710,7 @@ class XGBRFClassifier(BaseTransformer):
632
710
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
633
711
  expected_dtype = "array"
634
712
  else:
635
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
713
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
636
714
  # We can only infer the output types from the input types if the following two statemetns are true:
637
715
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
638
716
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1293,7 +1371,7 @@ class XGBRFClassifier(BaseTransformer):
1293
1371
 
1294
1372
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1295
1373
 
1296
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1374
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1297
1375
  outputs: List[BaseFeatureSpec] = []
1298
1376
  if hasattr(self, "predict"):
1299
1377
  # keep mypy happy
@@ -1301,7 +1379,7 @@ class XGBRFClassifier(BaseTransformer):
1301
1379
  # For classifier, the type of predict is the same as the type of label
1302
1380
  if self._sklearn_object._estimator_type == "classifier":
1303
1381
  # label columns is the desired type for output
1304
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1382
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1305
1383
  # rename the output columns
1306
1384
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1307
1385
  self._model_signature_dict["predict"] = ModelSignature(