snowflake-ml-python 1.7.2__py3-none-any.whl → 1.7.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. snowflake/cortex/__init__.py +16 -8
  2. snowflake/cortex/_classify_text.py +12 -1
  3. snowflake/cortex/_complete.py +101 -13
  4. snowflake/cortex/_embed_text_1024.py +9 -2
  5. snowflake/cortex/_embed_text_768.py +9 -2
  6. snowflake/cortex/_extract_answer.py +9 -2
  7. snowflake/cortex/_sentiment.py +9 -2
  8. snowflake/cortex/_summarize.py +9 -2
  9. snowflake/cortex/_translate.py +9 -2
  10. snowflake/ml/_internal/env_utils.py +7 -52
  11. snowflake/ml/_internal/platform_capabilities.py +87 -0
  12. snowflake/ml/_internal/utils/identifier.py +4 -2
  13. snowflake/ml/data/__init__.py +3 -0
  14. snowflake/ml/data/_internal/arrow_ingestor.py +4 -4
  15. snowflake/ml/data/data_connector.py +53 -11
  16. snowflake/ml/data/data_ingestor.py +2 -1
  17. snowflake/ml/data/torch_utils.py +18 -5
  18. snowflake/ml/dataset/dataset.py +0 -1
  19. snowflake/ml/feature_store/examples/example_helper.py +2 -1
  20. snowflake/ml/fileset/fileset.py +24 -18
  21. snowflake/ml/jobs/__init__.py +21 -0
  22. snowflake/ml/jobs/_utils/constants.py +51 -0
  23. snowflake/ml/jobs/_utils/payload_utils.py +352 -0
  24. snowflake/ml/jobs/_utils/spec_utils.py +298 -0
  25. snowflake/ml/jobs/_utils/types.py +39 -0
  26. snowflake/ml/jobs/decorators.py +91 -0
  27. snowflake/ml/jobs/job.py +113 -0
  28. snowflake/ml/jobs/manager.py +298 -0
  29. snowflake/ml/model/_client/model/model_version_impl.py +5 -3
  30. snowflake/ml/model/_client/ops/model_ops.py +13 -8
  31. snowflake/ml/model/_client/ops/service_ops.py +1 -11
  32. snowflake/ml/model/_client/sql/model_version.py +11 -0
  33. snowflake/ml/model/_client/sql/service.py +13 -6
  34. snowflake/ml/model/_model_composer/model_composer.py +8 -3
  35. snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +20 -1
  36. snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +1 -0
  37. snowflake/ml/model/_model_composer/model_method/constants.py +1 -0
  38. snowflake/ml/model/_model_composer/model_method/function_generator.py +2 -0
  39. snowflake/ml/model/_model_composer/model_method/infer_function.py_template +1 -1
  40. snowflake/ml/model/_model_composer/model_method/infer_partitioned.py_template +1 -1
  41. snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +1 -1
  42. snowflake/ml/model/_model_composer/model_method/model_method.py +9 -1
  43. snowflake/ml/model/_model_composer/model_user_file/model_user_file.py +27 -0
  44. snowflake/ml/model/_packager/model_handlers/_utils.py +39 -5
  45. snowflake/ml/model/_packager/model_handlers/catboost.py +3 -3
  46. snowflake/ml/model/_packager/model_handlers/custom.py +1 -2
  47. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +6 -1
  48. snowflake/ml/model/_packager/model_handlers/lightgbm.py +5 -3
  49. snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +55 -20
  50. snowflake/ml/model/_packager/model_handlers/sklearn.py +9 -10
  51. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +66 -28
  52. snowflake/ml/model/_packager/model_handlers/tensorflow.py +70 -17
  53. snowflake/ml/model/_packager/model_handlers/xgboost.py +3 -3
  54. snowflake/ml/model/_packager/model_meta/model_meta.py +3 -0
  55. snowflake/ml/model/_packager/model_meta/model_meta_schema.py +6 -1
  56. snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +2 -2
  57. snowflake/ml/model/_packager/model_task/model_task_utils.py +3 -2
  58. snowflake/ml/model/_signatures/base_handler.py +1 -2
  59. snowflake/ml/model/_signatures/builtins_handler.py +2 -2
  60. snowflake/ml/model/_signatures/numpy_handler.py +6 -7
  61. snowflake/ml/model/_signatures/pandas_handler.py +3 -3
  62. snowflake/ml/model/_signatures/pytorch_handler.py +2 -5
  63. snowflake/ml/model/_signatures/snowpark_handler.py +11 -5
  64. snowflake/ml/model/_signatures/tensorflow_handler.py +2 -7
  65. snowflake/ml/model/model_signature.py +17 -4
  66. snowflake/ml/model/type_hints.py +1 -0
  67. snowflake/ml/modeling/_internal/model_trainer_builder.py +0 -8
  68. snowflake/ml/modeling/_internal/model_transformer_builder.py +0 -13
  69. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +6 -3
  70. snowflake/ml/modeling/cluster/affinity_propagation.py +6 -3
  71. snowflake/ml/modeling/cluster/agglomerative_clustering.py +6 -3
  72. snowflake/ml/modeling/cluster/birch.py +6 -3
  73. snowflake/ml/modeling/cluster/bisecting_k_means.py +6 -3
  74. snowflake/ml/modeling/cluster/dbscan.py +6 -3
  75. snowflake/ml/modeling/cluster/feature_agglomeration.py +6 -3
  76. snowflake/ml/modeling/cluster/k_means.py +6 -3
  77. snowflake/ml/modeling/cluster/mean_shift.py +6 -3
  78. snowflake/ml/modeling/cluster/mini_batch_k_means.py +6 -3
  79. snowflake/ml/modeling/cluster/optics.py +6 -3
  80. snowflake/ml/modeling/cluster/spectral_biclustering.py +6 -3
  81. snowflake/ml/modeling/cluster/spectral_clustering.py +6 -3
  82. snowflake/ml/modeling/cluster/spectral_coclustering.py +6 -3
  83. snowflake/ml/modeling/compose/column_transformer.py +6 -3
  84. snowflake/ml/modeling/compose/transformed_target_regressor.py +6 -3
  85. snowflake/ml/modeling/covariance/elliptic_envelope.py +6 -3
  86. snowflake/ml/modeling/covariance/empirical_covariance.py +6 -3
  87. snowflake/ml/modeling/covariance/graphical_lasso.py +6 -3
  88. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +6 -3
  89. snowflake/ml/modeling/covariance/ledoit_wolf.py +6 -3
  90. snowflake/ml/modeling/covariance/min_cov_det.py +6 -3
  91. snowflake/ml/modeling/covariance/oas.py +6 -3
  92. snowflake/ml/modeling/covariance/shrunk_covariance.py +6 -3
  93. snowflake/ml/modeling/decomposition/dictionary_learning.py +6 -3
  94. snowflake/ml/modeling/decomposition/factor_analysis.py +6 -3
  95. snowflake/ml/modeling/decomposition/fast_ica.py +6 -3
  96. snowflake/ml/modeling/decomposition/incremental_pca.py +6 -3
  97. snowflake/ml/modeling/decomposition/kernel_pca.py +6 -3
  98. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +6 -3
  99. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +6 -3
  100. snowflake/ml/modeling/decomposition/pca.py +6 -3
  101. snowflake/ml/modeling/decomposition/sparse_pca.py +6 -3
  102. snowflake/ml/modeling/decomposition/truncated_svd.py +6 -3
  103. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +6 -3
  104. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +6 -3
  105. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +6 -3
  106. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +6 -3
  107. snowflake/ml/modeling/ensemble/bagging_classifier.py +6 -3
  108. snowflake/ml/modeling/ensemble/bagging_regressor.py +6 -3
  109. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +6 -3
  110. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +6 -3
  111. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +6 -3
  112. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +6 -3
  113. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +6 -3
  114. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +6 -3
  115. snowflake/ml/modeling/ensemble/isolation_forest.py +6 -3
  116. snowflake/ml/modeling/ensemble/random_forest_classifier.py +6 -3
  117. snowflake/ml/modeling/ensemble/random_forest_regressor.py +6 -3
  118. snowflake/ml/modeling/ensemble/stacking_regressor.py +6 -3
  119. snowflake/ml/modeling/ensemble/voting_classifier.py +6 -3
  120. snowflake/ml/modeling/ensemble/voting_regressor.py +6 -3
  121. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +6 -3
  122. snowflake/ml/modeling/feature_selection/select_fdr.py +6 -3
  123. snowflake/ml/modeling/feature_selection/select_fpr.py +6 -3
  124. snowflake/ml/modeling/feature_selection/select_fwe.py +6 -3
  125. snowflake/ml/modeling/feature_selection/select_k_best.py +6 -3
  126. snowflake/ml/modeling/feature_selection/select_percentile.py +6 -3
  127. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +6 -3
  128. snowflake/ml/modeling/feature_selection/variance_threshold.py +6 -3
  129. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +6 -3
  130. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +6 -3
  131. snowflake/ml/modeling/impute/iterative_imputer.py +6 -3
  132. snowflake/ml/modeling/impute/knn_imputer.py +6 -3
  133. snowflake/ml/modeling/impute/missing_indicator.py +6 -3
  134. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +6 -3
  135. snowflake/ml/modeling/kernel_approximation/nystroem.py +6 -3
  136. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +6 -3
  137. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +6 -3
  138. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +6 -3
  139. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +6 -3
  140. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +6 -3
  141. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +6 -3
  142. snowflake/ml/modeling/linear_model/ard_regression.py +6 -3
  143. snowflake/ml/modeling/linear_model/bayesian_ridge.py +6 -3
  144. snowflake/ml/modeling/linear_model/elastic_net.py +6 -3
  145. snowflake/ml/modeling/linear_model/elastic_net_cv.py +6 -3
  146. snowflake/ml/modeling/linear_model/gamma_regressor.py +6 -3
  147. snowflake/ml/modeling/linear_model/huber_regressor.py +6 -3
  148. snowflake/ml/modeling/linear_model/lars.py +6 -3
  149. snowflake/ml/modeling/linear_model/lars_cv.py +6 -3
  150. snowflake/ml/modeling/linear_model/lasso.py +6 -3
  151. snowflake/ml/modeling/linear_model/lasso_cv.py +6 -3
  152. snowflake/ml/modeling/linear_model/lasso_lars.py +6 -3
  153. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +6 -3
  154. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +6 -3
  155. snowflake/ml/modeling/linear_model/linear_regression.py +6 -3
  156. snowflake/ml/modeling/linear_model/logistic_regression.py +6 -3
  157. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +6 -3
  158. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +6 -3
  159. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +6 -3
  160. snowflake/ml/modeling/linear_model/multi_task_lasso.py +6 -3
  161. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +6 -3
  162. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +6 -3
  163. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +6 -3
  164. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +6 -3
  165. snowflake/ml/modeling/linear_model/perceptron.py +6 -3
  166. snowflake/ml/modeling/linear_model/poisson_regressor.py +6 -3
  167. snowflake/ml/modeling/linear_model/ransac_regressor.py +6 -3
  168. snowflake/ml/modeling/linear_model/ridge.py +6 -3
  169. snowflake/ml/modeling/linear_model/ridge_classifier.py +6 -3
  170. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +6 -3
  171. snowflake/ml/modeling/linear_model/ridge_cv.py +6 -3
  172. snowflake/ml/modeling/linear_model/sgd_classifier.py +6 -3
  173. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +6 -3
  174. snowflake/ml/modeling/linear_model/sgd_regressor.py +6 -3
  175. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +6 -3
  176. snowflake/ml/modeling/linear_model/tweedie_regressor.py +6 -3
  177. snowflake/ml/modeling/manifold/isomap.py +6 -3
  178. snowflake/ml/modeling/manifold/mds.py +6 -3
  179. snowflake/ml/modeling/manifold/spectral_embedding.py +6 -3
  180. snowflake/ml/modeling/manifold/tsne.py +6 -3
  181. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +6 -3
  182. snowflake/ml/modeling/mixture/gaussian_mixture.py +6 -3
  183. snowflake/ml/modeling/model_selection/grid_search_cv.py +17 -2
  184. snowflake/ml/modeling/model_selection/randomized_search_cv.py +17 -2
  185. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +6 -3
  186. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +6 -3
  187. snowflake/ml/modeling/multiclass/output_code_classifier.py +6 -3
  188. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +6 -3
  189. snowflake/ml/modeling/naive_bayes/categorical_nb.py +6 -3
  190. snowflake/ml/modeling/naive_bayes/complement_nb.py +6 -3
  191. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +6 -3
  192. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +6 -3
  193. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +6 -3
  194. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +6 -3
  195. snowflake/ml/modeling/neighbors/kernel_density.py +6 -3
  196. snowflake/ml/modeling/neighbors/local_outlier_factor.py +6 -3
  197. snowflake/ml/modeling/neighbors/nearest_centroid.py +6 -3
  198. snowflake/ml/modeling/neighbors/nearest_neighbors.py +6 -3
  199. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +6 -3
  200. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +6 -3
  201. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +6 -3
  202. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +6 -3
  203. snowflake/ml/modeling/neural_network/mlp_classifier.py +6 -3
  204. snowflake/ml/modeling/neural_network/mlp_regressor.py +6 -3
  205. snowflake/ml/modeling/pipeline/pipeline.py +16 -178
  206. snowflake/ml/modeling/preprocessing/polynomial_features.py +6 -3
  207. snowflake/ml/modeling/semi_supervised/label_propagation.py +6 -3
  208. snowflake/ml/modeling/semi_supervised/label_spreading.py +6 -3
  209. snowflake/ml/modeling/svm/linear_svc.py +6 -3
  210. snowflake/ml/modeling/svm/linear_svr.py +6 -3
  211. snowflake/ml/modeling/svm/nu_svc.py +6 -3
  212. snowflake/ml/modeling/svm/nu_svr.py +6 -3
  213. snowflake/ml/modeling/svm/svc.py +6 -3
  214. snowflake/ml/modeling/svm/svr.py +6 -3
  215. snowflake/ml/modeling/tree/decision_tree_classifier.py +6 -3
  216. snowflake/ml/modeling/tree/decision_tree_regressor.py +6 -3
  217. snowflake/ml/modeling/tree/extra_tree_classifier.py +6 -3
  218. snowflake/ml/modeling/tree/extra_tree_regressor.py +6 -3
  219. snowflake/ml/modeling/xgboost/xgb_classifier.py +167 -91
  220. snowflake/ml/modeling/xgboost/xgb_regressor.py +166 -88
  221. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +166 -88
  222. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +166 -88
  223. snowflake/ml/monitoring/_client/model_monitor_sql_client.py +4 -4
  224. snowflake/ml/registry/_manager/model_manager.py +70 -33
  225. snowflake/ml/registry/registry.py +41 -22
  226. snowflake/ml/version.py +1 -1
  227. {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.4.dist-info}/METADATA +63 -19
  228. {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.4.dist-info}/RECORD +231 -226
  229. {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.4.dist-info}/WHEEL +1 -1
  230. snowflake/ml/_internal/utils/retryable_http.py +0 -39
  231. snowflake/ml/fileset/parquet_parser.py +0 -170
  232. snowflake/ml/fileset/tf_dataset.py +0 -88
  233. snowflake/ml/fileset/torch_datapipe.py +0 -57
  234. snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +0 -151
  235. snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_trainer.py +0 -66
  236. {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.4.dist-info}/LICENSE.txt +0 -0
  237. {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.4.dist-info}/top_level.txt +0 -0
@@ -37,6 +37,7 @@ from snowflake.ml.model.model_signature import (
37
37
  FeatureSpec,
38
38
  ModelSignature,
39
39
  _infer_signature,
40
+ _truncate_data,
40
41
  _rename_signature_with_snowflake_identifiers,
41
42
  )
42
43
 
@@ -57,6 +58,8 @@ _SUBPROJECT = "".join([s.capitalize() for s in "xgboost".replace("sklearn.", "")
57
58
 
58
59
  DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
59
60
 
61
+ INFER_SIGNATURE_MAX_ROWS = 100
62
+
60
63
  class XGBRegressor(BaseTransformer):
61
64
  r"""Implementation of the scikit-learn API for XGBoost regression
62
65
  For more details on this class, see [xgboost.XGBRegressor]
@@ -125,112 +128,172 @@ class XGBRegressor(BaseTransformer):
125
128
  can seriously hurt performance in gradient boosting. Set the batch_size as large as possible
126
129
  based on the available memory.
127
130
 
128
- n_estimators: int
131
+ n_estimators: typing.Optional[int]
129
132
  Number of gradient boosted trees. Equivalent to number of boosting
130
133
  rounds.
131
134
 
132
- max_depth: Optional[int]
135
+ max_depth: typing.Optional[int]
136
+
133
137
  Maximum tree depth for base learners.
134
- max_leaves :
138
+
139
+ max_leaves: typing.Optional[int]
140
+
135
141
  Maximum number of leaves; 0 indicates no limit.
136
- max_bin :
142
+
143
+ max_bin: typing.Optional[int]
144
+
137
145
  If using histogram-based algorithm, maximum number of bins per feature
138
- grow_policy :
139
- Tree growing policy. 0: favor splitting at nodes closest to the node, i.e. grow
140
- depth-wise. 1: favor splitting at nodes with highest loss change.
141
- learning_rate: Optional[float]
146
+
147
+ grow_policy: typing.Optional[str]
148
+
149
+ Tree growing policy.
150
+
151
+ - depthwise: Favors splitting at nodes closest to the node,
152
+ - lossguide: Favors splitting at nodes with highest loss change.
153
+
154
+ learning_rate: typing.Optional[float]
155
+
142
156
  Boosting learning rate (xgb's "eta")
143
- verbosity: Optional[int]
157
+
158
+ verbosity: typing.Optional[int]
159
+
144
160
  The degree of verbosity. Valid values are 0 (silent) - 3 (debug).
145
- objective: typing.Union[str, typing.Callable[[numpy.ndarray, numpy.ndarray], typing.Tuple[numpy.ndarray, numpy.ndarray]], NoneType]
146
- Specify the learning task and the corresponding learning objective or
147
- a custom objective function to be used (see note below).
148
- booster: Optional[str]
149
- Specify which booster to use: gbtree, gblinear or dart.
150
- tree_method: Optional[str]
161
+
162
+ objective: typing.Union[str, xgboost.sklearn._SklObjWProto, typing.Callable[[typing.Any, typing.Any], typing.Tuple[numpy.ndarray, numpy.ndarray]], NoneType]
163
+
164
+ Specify the learning task and the corresponding learning objective or a custom
165
+ objective function to be used.
166
+
167
+ For custom objective, see :doc:`/tutorials/custom_metric_obj` and
168
+ :ref:`custom-obj-metric` for more information, along with the end note for
169
+ function signatures.
170
+
171
+ booster: typing.Optional[str]
172
+
173
+ Specify which booster to use: ``gbtree``, ``gblinear`` or ``dart``.
174
+
175
+ tree_method: typing.Optional[str]
176
+
151
177
  Specify which tree method to use. Default to auto. If this parameter is set to
152
178
  default, XGBoost will choose the most conservative option available. It's
153
179
  recommended to study this option from the parameters document :doc:`tree method
154
180
  </treemethod>`
155
- n_jobs: Optional[int]
181
+
182
+ n_jobs: typing.Optional[int]
183
+
156
184
  Number of parallel threads used to run xgboost. When used with other
157
185
  Scikit-Learn algorithms like grid search, you may choose which algorithm to
158
186
  parallelize and balance the threads. Creating thread contention will
159
187
  significantly slow down both algorithms.
160
- gamma: Optional[float]
161
- (min_split_loss) Minimum loss reduction required to make a further partition on a
162
- leaf node of the tree.
163
- min_child_weight: Optional[float]
188
+
189
+ gamma: typing.Optional[float]
190
+
191
+ (min_split_loss) Minimum loss reduction required to make a further partition on
192
+ a leaf node of the tree.
193
+
194
+ min_child_weight: typing.Optional[float]
195
+
164
196
  Minimum sum of instance weight(hessian) needed in a child.
165
- max_delta_step: Optional[float]
197
+
198
+ max_delta_step: typing.Optional[float]
199
+
166
200
  Maximum delta step we allow each tree's weight estimation to be.
167
- subsample: Optional[float]
201
+
202
+ subsample: typing.Optional[float]
203
+
168
204
  Subsample ratio of the training instance.
169
- sampling_method :
170
- Sampling method. Used only by `gpu_hist` tree method.
171
- - `uniform`: select random training instances uniformly.
172
- - `gradient_based` select random training instances with higher probability when
173
- the gradient and hessian are larger. (cf. CatBoost)
174
- colsample_bytree: Optional[float]
205
+
206
+ sampling_method: typing.Optional[str]
207
+
208
+ Sampling method. Used only by the GPU version of ``hist`` tree method.
209
+
210
+ - ``uniform``: Select random training instances uniformly.
211
+ - ``gradient_based``: Select random training instances with higher probability
212
+ when the gradient and hessian are larger. (cf. CatBoost)
213
+
214
+ colsample_bytree: typing.Optional[float]
215
+
175
216
  Subsample ratio of columns when constructing each tree.
176
- colsample_bylevel: Optional[float]
217
+
218
+ colsample_bylevel: typing.Optional[float]
219
+
177
220
  Subsample ratio of columns for each level.
178
- colsample_bynode: Optional[float]
221
+
222
+ colsample_bynode: typing.Optional[float]
223
+
179
224
  Subsample ratio of columns for each split.
180
- reg_alpha: Optional[float]
225
+
226
+ reg_alpha: typing.Optional[float]
227
+
181
228
  L1 regularization term on weights (xgb's alpha).
182
- reg_lambda: Optional[float]
229
+
230
+ reg_lambda: typing.Optional[float]
231
+
183
232
  L2 regularization term on weights (xgb's lambda).
184
- scale_pos_weight: Optional[float]
233
+
234
+ scale_pos_weight: typing.Optional[float]
185
235
  Balancing of positive and negative weights.
186
- base_score: Optional[float]
236
+
237
+ base_score: typing.Optional[float]
238
+
187
239
  The initial prediction score of all instances, global bias.
188
- random_state: Optional[Union[numpy.random.RandomState, int]]
240
+
241
+ random_state: typing.Union[numpy.random.mtrand.RandomState, numpy.random._generator.Generator, int, NoneType]
242
+
189
243
  Random number seed.
190
244
 
191
245
  Using gblinear booster with shotgun updater is nondeterministic as
192
246
  it uses Hogwild algorithm.
193
247
 
194
- missing: float, default np.nan
195
- Value in the data which needs to be present as a missing value.
196
- num_parallel_tree: Optional[int]
248
+ missing: float
249
+
250
+ Value in the data which needs to be present as a missing value. Default to
251
+ :py:data:`numpy.nan`.
252
+
253
+ num_parallel_tree: typing.Optional[int]
254
+
197
255
  Used for boosting random forest.
198
- monotone_constraints: Optional[Union[Dict[str, int], str]]
256
+
257
+ monotone_constraints: typing.Union[typing.Dict[str, int], str, NoneType]
258
+
199
259
  Constraint of variable monotonicity. See :doc:`tutorial </tutorials/monotonic>`
200
260
  for more information.
201
- interaction_constraints: Optional[Union[str, List[Tuple[str]]]]
261
+
262
+ interaction_constraints: typing.Union[str, typing.List[typing.Tuple[str]], NoneType]
263
+
202
264
  Constraints for interaction representing permitted interactions. The
203
265
  constraints must be specified in the form of a nested list, e.g. ``[[0, 1], [2,
204
266
  3, 4]]``, where each inner list is a group of indices of features that are
205
267
  allowed to interact with each other. See :doc:`tutorial
206
268
  </tutorials/feature_interaction_constraint>` for more information
207
- importance_type: Optional[str]
269
+
270
+ importance_type: typing.Optional[str]
271
+
208
272
  The feature importance type for the feature_importances\_ property:
209
273
 
210
274
  * For tree model, it's either "gain", "weight", "cover", "total_gain" or
211
275
  "total_cover".
212
- * For linear model, only "weight" is defined and it's the normalized coefficients
213
- without bias.
276
+ * For linear model, only "weight" is defined and it's the normalized
277
+ coefficients without bias.
278
+
279
+ device: typing.Optional[str]
280
+
281
+ Device ordinal, available options are `cpu`, `cuda`, and `gpu`.
282
+
283
+ validate_parameters: typing.Optional[bool]
214
284
 
215
- gpu_id: Optional[int]
216
- Device ordinal.
217
- validate_parameters: Optional[bool]
218
285
  Give warnings for unknown parameter.
219
- predictor: Optional[str]
220
- Force XGBoost to use specific predictor, available choices are [cpu_predictor,
221
- gpu_predictor].
286
+
222
287
  enable_categorical: bool
223
288
 
224
- Experimental support for categorical data. When enabled, cudf/pandas.DataFrame
225
- should be used to specify categorical data type. Also, JSON/UBJSON
226
- serialization format is required.
289
+ See the same parameter of :py:class:`DMatrix` for details.
227
290
 
228
- feature_types: FeatureTypes
291
+ feature_types: typing.Optional[typing.Sequence[str]]
229
292
 
230
293
  Used for specifying feature types without constructing a dataframe. See
231
294
  :py:class:`DMatrix` for details.
232
295
 
233
- max_cat_to_onehot: Optional[int]
296
+ max_cat_to_onehot: typing.Optional[int]
234
297
 
235
298
  A threshold for deciding whether XGBoost should use one-hot encoding based split
236
299
  for categorical data. When number of categories is lesser than the threshold
@@ -239,36 +302,41 @@ class XGBRegressor(BaseTransformer):
239
302
  categorical feature support. See :doc:`Categorical Data
240
303
  </tutorials/categorical>` and :ref:`cat-param` for details.
241
304
 
242
- max_cat_threshold: Optional[int]
305
+ max_cat_threshold: typing.Optional[int]
243
306
 
244
307
  Maximum number of categories considered for each split. Used only by
245
308
  partition-based splits for preventing over-fitting. Also, `enable_categorical`
246
309
  needs to be set to have categorical feature support. See :doc:`Categorical Data
247
310
  </tutorials/categorical>` and :ref:`cat-param` for details.
248
311
 
249
- eval_metric: Optional[Union[str, List[str], Callable]]
312
+ multi_strategy: typing.Optional[str]
313
+
314
+ The strategy used for training multi-target models, including multi-target
315
+ regression and multi-class classification. See :doc:`/tutorials/multioutput` for
316
+ more information.
317
+
318
+ - ``one_output_per_tree``: One model for each target.
319
+ - ``multi_output_tree``: Use multi-target trees.
320
+
321
+ eval_metric: typing.Union[str, typing.List[str], typing.Callable, NoneType]
250
322
 
251
323
  Metric used for monitoring the training result and early stopping. It can be a
252
324
  string or list of strings as names of predefined metric in XGBoost (See
253
- doc/parameter.rst), one of the metrics in :py:mod:`sklearn.metrics`, or any other
254
- user defined metric that looks like `sklearn.metrics`.
325
+ doc/parameter.rst), one of the metrics in :py:mod:`sklearn.metrics`, or any
326
+ other user defined metric that looks like `sklearn.metrics`.
255
327
 
256
328
  If custom objective is also provided, then custom metric should implement the
257
329
  corresponding reverse link function.
258
330
 
259
331
  Unlike the `scoring` parameter commonly used in scikit-learn, when a callable
260
- object is provided, it's assumed to be a cost function and by default XGBoost will
261
- minimize the result during early stopping.
262
-
263
- For advanced usage on Early stopping like directly choosing to maximize instead of
264
- minimize, see :py:obj:`xgboost.callback.EarlyStopping`.
332
+ object is provided, it's assumed to be a cost function and by default XGBoost
333
+ will minimize the result during early stopping.
265
334
 
266
- See :doc:`Custom Objective and Evaluation Metric </tutorials/custom_metric_obj>`
267
- for more.
335
+ For advanced usage on Early stopping like directly choosing to maximize instead
336
+ of minimize, see :py:obj:`xgboost.callback.EarlyStopping`.
268
337
 
269
- This parameter replaces `eval_metric` in :py:meth:`fit` method. The old one
270
- receives un-transformed prediction regardless of whether custom objective is
271
- being used.
338
+ See :doc:`/tutorials/custom_metric_obj` and :ref:`custom-obj-metric` for more
339
+ information.
272
340
 
273
341
  from sklearn.datasets import load_diabetes
274
342
  from sklearn.metrics import mean_absolute_error
@@ -279,24 +347,29 @@ class XGBRegressor(BaseTransformer):
279
347
  )
280
348
  reg.fit(X, y, eval_set=[(X, y)])
281
349
 
282
- early_stopping_rounds: Optional[int]
350
+ early_stopping_rounds: typing.Optional[int]
283
351
 
284
- Activates early stopping. Validation metric needs to improve at least once in
285
- every **early_stopping_rounds** round(s) to continue training. Requires at least
286
- one item in **eval_set** in :py:meth:`fit`.
352
+ - Activates early stopping. Validation metric needs to improve at least once in
353
+ every **early_stopping_rounds** round(s) to continue training. Requires at
354
+ least one item in **eval_set** in :py:meth:`fit`.
287
355
 
288
- The method returns the model from the last iteration (not the best one). If
289
- there's more than one item in **eval_set**, the last entry will be used for early
290
- stopping. If there's more than one metric in **eval_metric**, the last metric
291
- will be used for early stopping.
356
+ - If early stopping occurs, the model will have two additional attributes:
357
+ :py:attr:`best_score` and :py:attr:`best_iteration`. These are used by the
358
+ :py:meth:`predict` and :py:meth:`apply` methods to determine the optimal
359
+ number of trees during inference. If users want to access the full model
360
+ (including trees built after early stopping), they can specify the
361
+ `iteration_range` in these inference methods. In addition, other utilities
362
+ like model plotting can also use the entire model.
292
363
 
293
- If early stopping occurs, the model will have three additional fields:
294
- :py:attr:`best_score`, :py:attr:`best_iteration` and
295
- :py:attr:`best_ntree_limit`.
364
+ - If you prefer to discard the trees after `best_iteration`, consider using the
365
+ callback function :py:class:`xgboost.callback.EarlyStopping`.
296
366
 
297
- This parameter replaces `early_stopping_rounds` in :py:meth:`fit` method.
367
+ - If there's more than one item in **eval_set**, the last entry will be used for
368
+ early stopping. If there's more than one metric in **eval_metric**, the last
369
+ metric will be used for early stopping.
370
+
371
+ callbacks: typing.Optional[typing.List[xgboost.callback.TrainingCallback]]
298
372
 
299
- callbacks: Optional[List[TrainingCallback]]
300
373
  List of callback functions that are applied at end of each iteration.
301
374
  It is possible to use predefined callbacks by using
302
375
  :ref:`Callback API <callback_api>`.
@@ -308,9 +381,11 @@ class XGBRegressor(BaseTransformer):
308
381
  for params in parameters_grid:
309
382
  # be sure to (re)initialize the callbacks before each run
310
383
  callbacks = [xgb.callback.LearningRateScheduler(custom_rates)]
311
- xgboost.train(params, Xy, callbacks=callbacks)
384
+ reg = xgboost.XGBRegressor(**params, callbacks=callbacks)
385
+ reg.fit(X, y)
386
+
387
+ kwargs: typing.Optional[typing.Any]
312
388
 
313
- kwargs: dict, optional
314
389
  Keyword arguments for XGBoost Booster object. Full documentation of parameters
315
390
  can be found :doc:`here </parameter>`.
316
391
  Attempting to set a parameter via the constructor args and \*\*kwargs
@@ -321,13 +396,16 @@ class XGBRegressor(BaseTransformer):
321
396
  with scikit-learn.
322
397
 
323
398
  A custom objective function can be provided for the ``objective``
324
- parameter. In this case, it should have the signature
325
- ``objective(y_true, y_pred) -> grad, hess``:
399
+ parameter. In this case, it should have the signature ``objective(y_true,
400
+ y_pred) -> [grad, hess]`` or ``objective(y_true, y_pred, *, sample_weight)
401
+ -> [grad, hess]``:
326
402
 
327
403
  y_true: array_like of shape [n_samples]
328
404
  The target values
329
405
  y_pred: array_like of shape [n_samples]
330
406
  The predicted values
407
+ sample_weight :
408
+ Optional sample weights.
331
409
 
332
410
  grad: array_like of shape [n_samples]
333
411
  The value of the gradient for each sample point.
@@ -627,7 +705,7 @@ class XGBRegressor(BaseTransformer):
627
705
  elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
628
706
  expected_dtype = "array"
629
707
  else:
630
- output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
708
+ output_types = [signature.as_snowpark_type() for signature in _infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True)]
631
709
  # We can only infer the output types from the input types if the following two statemetns are true:
632
710
  # 1) All of the output types are the same. Otherwise, we still have to fall back to variant because `_sklearn_inference` only accepts one type.
633
711
  # 2) The length of the input columns equals the length of the output columns. Otherwise the transform will likely result in an `ARRAY`.
@@ -1284,7 +1362,7 @@ class XGBRegressor(BaseTransformer):
1284
1362
 
1285
1363
  PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
1286
1364
 
1287
- inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
1365
+ inputs = list(_infer_signature(_truncate_data(dataset[self.input_cols], INFER_SIGNATURE_MAX_ROWS), "input", use_snowflake_identifiers=True))
1288
1366
  outputs: List[BaseFeatureSpec] = []
1289
1367
  if hasattr(self, "predict"):
1290
1368
  # keep mypy happy
@@ -1292,7 +1370,7 @@ class XGBRegressor(BaseTransformer):
1292
1370
  # For classifier, the type of predict is the same as the type of label
1293
1371
  if self._sklearn_object._estimator_type == "classifier":
1294
1372
  # label columns is the desired type for output
1295
- outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
1373
+ outputs = list(_infer_signature(_truncate_data(dataset[self.label_cols], INFER_SIGNATURE_MAX_ROWS), "output", use_snowflake_identifiers=True))
1296
1374
  # rename the output columns
1297
1375
  outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
1298
1376
  self._model_signature_dict["predict"] = ModelSignature(