snowflake-ml-python 1.7.2__py3-none-any.whl → 1.7.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/cortex/__init__.py +16 -8
- snowflake/cortex/_classify_text.py +12 -1
- snowflake/cortex/_complete.py +82 -13
- snowflake/cortex/_embed_text_1024.py +9 -2
- snowflake/cortex/_embed_text_768.py +9 -2
- snowflake/cortex/_extract_answer.py +9 -2
- snowflake/cortex/_sentiment.py +9 -2
- snowflake/cortex/_summarize.py +9 -2
- snowflake/cortex/_translate.py +9 -2
- snowflake/ml/_internal/env_utils.py +7 -52
- snowflake/ml/_internal/utils/identifier.py +4 -2
- snowflake/ml/data/__init__.py +3 -0
- snowflake/ml/data/_internal/arrow_ingestor.py +4 -4
- snowflake/ml/data/data_connector.py +53 -11
- snowflake/ml/data/data_ingestor.py +2 -1
- snowflake/ml/data/torch_utils.py +18 -5
- snowflake/ml/feature_store/examples/example_helper.py +2 -1
- snowflake/ml/fileset/fileset.py +18 -18
- snowflake/ml/model/_client/model/model_version_impl.py +5 -3
- snowflake/ml/model/_client/ops/model_ops.py +2 -6
- snowflake/ml/model/_client/sql/model_version.py +11 -0
- snowflake/ml/model/_model_composer/model_composer.py +8 -3
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +20 -1
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +1 -0
- snowflake/ml/model/_model_composer/model_method/constants.py +1 -0
- snowflake/ml/model/_model_composer/model_method/function_generator.py +2 -0
- snowflake/ml/model/_model_composer/model_method/infer_function.py_template +1 -1
- snowflake/ml/model/_model_composer/model_method/infer_partitioned.py_template +1 -1
- snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +1 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +9 -1
- snowflake/ml/model/_model_composer/model_user_file/model_user_file.py +27 -0
- snowflake/ml/model/_packager/model_handlers/_utils.py +27 -2
- snowflake/ml/model/_packager/model_handlers/catboost.py +3 -3
- snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +5 -1
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +5 -3
- snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +55 -20
- snowflake/ml/model/_packager/model_handlers/sklearn.py +9 -10
- snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +66 -28
- snowflake/ml/model/_packager/model_handlers/tensorflow.py +70 -17
- snowflake/ml/model/_packager/model_handlers/xgboost.py +3 -3
- snowflake/ml/model/_packager/model_meta/model_meta.py +3 -0
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +6 -1
- snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +2 -2
- snowflake/ml/model/_packager/model_task/model_task_utils.py +3 -2
- snowflake/ml/model/_signatures/pandas_handler.py +1 -1
- snowflake/ml/model/_signatures/snowpark_handler.py +8 -2
- snowflake/ml/model/type_hints.py +1 -0
- snowflake/ml/modeling/_internal/model_trainer_builder.py +0 -8
- snowflake/ml/modeling/_internal/model_transformer_builder.py +0 -13
- snowflake/ml/modeling/pipeline/pipeline.py +6 -176
- snowflake/ml/modeling/xgboost/xgb_classifier.py +161 -88
- snowflake/ml/modeling/xgboost/xgb_regressor.py +160 -85
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +160 -85
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +160 -85
- snowflake/ml/monitoring/_client/model_monitor_sql_client.py +4 -4
- snowflake/ml/registry/_manager/model_manager.py +70 -33
- snowflake/ml/registry/registry.py +41 -22
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.3.dist-info}/METADATA +38 -9
- {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.3.dist-info}/RECORD +63 -67
- {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.3.dist-info}/WHEEL +1 -1
- snowflake/ml/_internal/utils/retryable_http.py +0 -39
- snowflake/ml/fileset/parquet_parser.py +0 -170
- snowflake/ml/fileset/tf_dataset.py +0 -88
- snowflake/ml/fileset/torch_datapipe.py +0 -57
- snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +0 -151
- snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_trainer.py +0 -66
- {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.3.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.3.dist-info}/top_level.txt +0 -0
@@ -125,112 +125,172 @@ class XGBRegressor(BaseTransformer):
|
|
125
125
|
can seriously hurt performance in gradient boosting. Set the batch_size as large as possible
|
126
126
|
based on the available memory.
|
127
127
|
|
128
|
-
n_estimators: int
|
128
|
+
n_estimators: typing.Optional[int]
|
129
129
|
Number of gradient boosted trees. Equivalent to number of boosting
|
130
130
|
rounds.
|
131
131
|
|
132
|
-
max_depth: Optional[int]
|
132
|
+
max_depth: typing.Optional[int]
|
133
|
+
|
133
134
|
Maximum tree depth for base learners.
|
134
|
-
|
135
|
+
|
136
|
+
max_leaves: typing.Optional[int]
|
137
|
+
|
135
138
|
Maximum number of leaves; 0 indicates no limit.
|
136
|
-
|
139
|
+
|
140
|
+
max_bin: typing.Optional[int]
|
141
|
+
|
137
142
|
If using histogram-based algorithm, maximum number of bins per feature
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
143
|
+
|
144
|
+
grow_policy: typing.Optional[str]
|
145
|
+
|
146
|
+
Tree growing policy.
|
147
|
+
|
148
|
+
- depthwise: Favors splitting at nodes closest to the node,
|
149
|
+
- lossguide: Favors splitting at nodes with highest loss change.
|
150
|
+
|
151
|
+
learning_rate: typing.Optional[float]
|
152
|
+
|
142
153
|
Boosting learning rate (xgb's "eta")
|
143
|
-
|
154
|
+
|
155
|
+
verbosity: typing.Optional[int]
|
156
|
+
|
144
157
|
The degree of verbosity. Valid values are 0 (silent) - 3 (debug).
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
158
|
+
|
159
|
+
objective: typing.Union[str, xgboost.sklearn._SklObjWProto, typing.Callable[[typing.Any, typing.Any], typing.Tuple[numpy.ndarray, numpy.ndarray]], NoneType]
|
160
|
+
|
161
|
+
Specify the learning task and the corresponding learning objective or a custom
|
162
|
+
objective function to be used.
|
163
|
+
|
164
|
+
For custom objective, see :doc:`/tutorials/custom_metric_obj` and
|
165
|
+
:ref:`custom-obj-metric` for more information, along with the end note for
|
166
|
+
function signatures.
|
167
|
+
|
168
|
+
booster: typing.Optional[str]
|
169
|
+
|
170
|
+
Specify which booster to use: ``gbtree``, ``gblinear`` or ``dart``.
|
171
|
+
|
172
|
+
tree_method: typing.Optional[str]
|
173
|
+
|
151
174
|
Specify which tree method to use. Default to auto. If this parameter is set to
|
152
175
|
default, XGBoost will choose the most conservative option available. It's
|
153
176
|
recommended to study this option from the parameters document :doc:`tree method
|
154
177
|
</treemethod>`
|
155
|
-
|
178
|
+
|
179
|
+
n_jobs: typing.Optional[int]
|
180
|
+
|
156
181
|
Number of parallel threads used to run xgboost. When used with other
|
157
182
|
Scikit-Learn algorithms like grid search, you may choose which algorithm to
|
158
183
|
parallelize and balance the threads. Creating thread contention will
|
159
184
|
significantly slow down both algorithms.
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
185
|
+
|
186
|
+
gamma: typing.Optional[float]
|
187
|
+
|
188
|
+
(min_split_loss) Minimum loss reduction required to make a further partition on
|
189
|
+
a leaf node of the tree.
|
190
|
+
|
191
|
+
min_child_weight: typing.Optional[float]
|
192
|
+
|
164
193
|
Minimum sum of instance weight(hessian) needed in a child.
|
165
|
-
|
194
|
+
|
195
|
+
max_delta_step: typing.Optional[float]
|
196
|
+
|
166
197
|
Maximum delta step we allow each tree's weight estimation to be.
|
167
|
-
|
198
|
+
|
199
|
+
subsample: typing.Optional[float]
|
200
|
+
|
168
201
|
Subsample ratio of the training instance.
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
202
|
+
|
203
|
+
sampling_method: typing.Optional[str]
|
204
|
+
|
205
|
+
Sampling method. Used only by the GPU version of ``hist`` tree method.
|
206
|
+
|
207
|
+
- ``uniform``: Select random training instances uniformly.
|
208
|
+
- ``gradient_based``: Select random training instances with higher probability
|
209
|
+
when the gradient and hessian are larger. (cf. CatBoost)
|
210
|
+
|
211
|
+
colsample_bytree: typing.Optional[float]
|
212
|
+
|
175
213
|
Subsample ratio of columns when constructing each tree.
|
176
|
-
|
214
|
+
|
215
|
+
colsample_bylevel: typing.Optional[float]
|
216
|
+
|
177
217
|
Subsample ratio of columns for each level.
|
178
|
-
|
218
|
+
|
219
|
+
colsample_bynode: typing.Optional[float]
|
220
|
+
|
179
221
|
Subsample ratio of columns for each split.
|
180
|
-
|
222
|
+
|
223
|
+
reg_alpha: typing.Optional[float]
|
224
|
+
|
181
225
|
L1 regularization term on weights (xgb's alpha).
|
182
|
-
|
226
|
+
|
227
|
+
reg_lambda: typing.Optional[float]
|
228
|
+
|
183
229
|
L2 regularization term on weights (xgb's lambda).
|
184
|
-
|
230
|
+
|
231
|
+
scale_pos_weight: typing.Optional[float]
|
185
232
|
Balancing of positive and negative weights.
|
186
|
-
|
233
|
+
|
234
|
+
base_score: typing.Optional[float]
|
235
|
+
|
187
236
|
The initial prediction score of all instances, global bias.
|
188
|
-
|
237
|
+
|
238
|
+
random_state: typing.Union[numpy.random.mtrand.RandomState, numpy.random._generator.Generator, int, NoneType]
|
239
|
+
|
189
240
|
Random number seed.
|
190
241
|
|
191
242
|
Using gblinear booster with shotgun updater is nondeterministic as
|
192
243
|
it uses Hogwild algorithm.
|
193
244
|
|
194
|
-
missing: float
|
195
|
-
|
196
|
-
|
245
|
+
missing: float
|
246
|
+
|
247
|
+
Value in the data which needs to be present as a missing value. Default to
|
248
|
+
:py:data:`numpy.nan`.
|
249
|
+
|
250
|
+
num_parallel_tree: typing.Optional[int]
|
251
|
+
|
197
252
|
Used for boosting random forest.
|
198
|
-
|
253
|
+
|
254
|
+
monotone_constraints: typing.Union[typing.Dict[str, int], str, NoneType]
|
255
|
+
|
199
256
|
Constraint of variable monotonicity. See :doc:`tutorial </tutorials/monotonic>`
|
200
257
|
for more information.
|
201
|
-
|
258
|
+
|
259
|
+
interaction_constraints: typing.Union[str, typing.List[typing.Tuple[str]], NoneType]
|
260
|
+
|
202
261
|
Constraints for interaction representing permitted interactions. The
|
203
262
|
constraints must be specified in the form of a nested list, e.g. ``[[0, 1], [2,
|
204
263
|
3, 4]]``, where each inner list is a group of indices of features that are
|
205
264
|
allowed to interact with each other. See :doc:`tutorial
|
206
265
|
</tutorials/feature_interaction_constraint>` for more information
|
207
|
-
|
266
|
+
|
267
|
+
importance_type: typing.Optional[str]
|
268
|
+
|
208
269
|
The feature importance type for the feature_importances\_ property:
|
209
270
|
|
210
271
|
* For tree model, it's either "gain", "weight", "cover", "total_gain" or
|
211
272
|
"total_cover".
|
212
|
-
* For linear model, only "weight" is defined and it's the normalized
|
213
|
-
without bias.
|
273
|
+
* For linear model, only "weight" is defined and it's the normalized
|
274
|
+
coefficients without bias.
|
275
|
+
|
276
|
+
device: typing.Optional[str]
|
277
|
+
|
278
|
+
Device ordinal, available options are `cpu`, `cuda`, and `gpu`.
|
279
|
+
|
280
|
+
validate_parameters: typing.Optional[bool]
|
214
281
|
|
215
|
-
gpu_id: Optional[int]
|
216
|
-
Device ordinal.
|
217
|
-
validate_parameters: Optional[bool]
|
218
282
|
Give warnings for unknown parameter.
|
219
|
-
|
220
|
-
Force XGBoost to use specific predictor, available choices are [cpu_predictor,
|
221
|
-
gpu_predictor].
|
283
|
+
|
222
284
|
enable_categorical: bool
|
223
285
|
|
224
|
-
|
225
|
-
should be used to specify categorical data type. Also, JSON/UBJSON
|
226
|
-
serialization format is required.
|
286
|
+
See the same parameter of :py:class:`DMatrix` for details.
|
227
287
|
|
228
|
-
feature_types:
|
288
|
+
feature_types: typing.Optional[typing.Sequence[str]]
|
229
289
|
|
230
290
|
Used for specifying feature types without constructing a dataframe. See
|
231
291
|
:py:class:`DMatrix` for details.
|
232
292
|
|
233
|
-
max_cat_to_onehot: Optional[int]
|
293
|
+
max_cat_to_onehot: typing.Optional[int]
|
234
294
|
|
235
295
|
A threshold for deciding whether XGBoost should use one-hot encoding based split
|
236
296
|
for categorical data. When number of categories is lesser than the threshold
|
@@ -239,36 +299,41 @@ class XGBRegressor(BaseTransformer):
|
|
239
299
|
categorical feature support. See :doc:`Categorical Data
|
240
300
|
</tutorials/categorical>` and :ref:`cat-param` for details.
|
241
301
|
|
242
|
-
max_cat_threshold: Optional[int]
|
302
|
+
max_cat_threshold: typing.Optional[int]
|
243
303
|
|
244
304
|
Maximum number of categories considered for each split. Used only by
|
245
305
|
partition-based splits for preventing over-fitting. Also, `enable_categorical`
|
246
306
|
needs to be set to have categorical feature support. See :doc:`Categorical Data
|
247
307
|
</tutorials/categorical>` and :ref:`cat-param` for details.
|
248
308
|
|
249
|
-
|
309
|
+
multi_strategy: typing.Optional[str]
|
310
|
+
|
311
|
+
The strategy used for training multi-target models, including multi-target
|
312
|
+
regression and multi-class classification. See :doc:`/tutorials/multioutput` for
|
313
|
+
more information.
|
314
|
+
|
315
|
+
- ``one_output_per_tree``: One model for each target.
|
316
|
+
- ``multi_output_tree``: Use multi-target trees.
|
317
|
+
|
318
|
+
eval_metric: typing.Union[str, typing.List[str], typing.Callable, NoneType]
|
250
319
|
|
251
320
|
Metric used for monitoring the training result and early stopping. It can be a
|
252
321
|
string or list of strings as names of predefined metric in XGBoost (See
|
253
|
-
doc/parameter.rst), one of the metrics in :py:mod:`sklearn.metrics`, or any
|
254
|
-
user defined metric that looks like `sklearn.metrics`.
|
322
|
+
doc/parameter.rst), one of the metrics in :py:mod:`sklearn.metrics`, or any
|
323
|
+
other user defined metric that looks like `sklearn.metrics`.
|
255
324
|
|
256
325
|
If custom objective is also provided, then custom metric should implement the
|
257
326
|
corresponding reverse link function.
|
258
327
|
|
259
328
|
Unlike the `scoring` parameter commonly used in scikit-learn, when a callable
|
260
|
-
object is provided, it's assumed to be a cost function and by default XGBoost
|
261
|
-
minimize the result during early stopping.
|
262
|
-
|
263
|
-
For advanced usage on Early stopping like directly choosing to maximize instead of
|
264
|
-
minimize, see :py:obj:`xgboost.callback.EarlyStopping`.
|
329
|
+
object is provided, it's assumed to be a cost function and by default XGBoost
|
330
|
+
will minimize the result during early stopping.
|
265
331
|
|
266
|
-
|
267
|
-
|
332
|
+
For advanced usage on Early stopping like directly choosing to maximize instead
|
333
|
+
of minimize, see :py:obj:`xgboost.callback.EarlyStopping`.
|
268
334
|
|
269
|
-
|
270
|
-
|
271
|
-
being used.
|
335
|
+
See :doc:`/tutorials/custom_metric_obj` and :ref:`custom-obj-metric` for more
|
336
|
+
information.
|
272
337
|
|
273
338
|
from sklearn.datasets import load_diabetes
|
274
339
|
from sklearn.metrics import mean_absolute_error
|
@@ -279,24 +344,29 @@ class XGBRegressor(BaseTransformer):
|
|
279
344
|
)
|
280
345
|
reg.fit(X, y, eval_set=[(X, y)])
|
281
346
|
|
282
|
-
early_stopping_rounds: Optional[int]
|
347
|
+
early_stopping_rounds: typing.Optional[int]
|
283
348
|
|
284
|
-
Activates early stopping. Validation metric needs to improve at least once in
|
285
|
-
|
286
|
-
|
349
|
+
- Activates early stopping. Validation metric needs to improve at least once in
|
350
|
+
every **early_stopping_rounds** round(s) to continue training. Requires at
|
351
|
+
least one item in **eval_set** in :py:meth:`fit`.
|
287
352
|
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
353
|
+
- If early stopping occurs, the model will have two additional attributes:
|
354
|
+
:py:attr:`best_score` and :py:attr:`best_iteration`. These are used by the
|
355
|
+
:py:meth:`predict` and :py:meth:`apply` methods to determine the optimal
|
356
|
+
number of trees during inference. If users want to access the full model
|
357
|
+
(including trees built after early stopping), they can specify the
|
358
|
+
`iteration_range` in these inference methods. In addition, other utilities
|
359
|
+
like model plotting can also use the entire model.
|
292
360
|
|
293
|
-
If
|
294
|
-
|
295
|
-
:py:attr:`best_ntree_limit`.
|
361
|
+
- If you prefer to discard the trees after `best_iteration`, consider using the
|
362
|
+
callback function :py:class:`xgboost.callback.EarlyStopping`.
|
296
363
|
|
297
|
-
|
364
|
+
- If there's more than one item in **eval_set**, the last entry will be used for
|
365
|
+
early stopping. If there's more than one metric in **eval_metric**, the last
|
366
|
+
metric will be used for early stopping.
|
367
|
+
|
368
|
+
callbacks: typing.Optional[typing.List[xgboost.callback.TrainingCallback]]
|
298
369
|
|
299
|
-
callbacks: Optional[List[TrainingCallback]]
|
300
370
|
List of callback functions that are applied at end of each iteration.
|
301
371
|
It is possible to use predefined callbacks by using
|
302
372
|
:ref:`Callback API <callback_api>`.
|
@@ -308,9 +378,11 @@ class XGBRegressor(BaseTransformer):
|
|
308
378
|
for params in parameters_grid:
|
309
379
|
# be sure to (re)initialize the callbacks before each run
|
310
380
|
callbacks = [xgb.callback.LearningRateScheduler(custom_rates)]
|
311
|
-
xgboost.
|
381
|
+
reg = xgboost.XGBRegressor(**params, callbacks=callbacks)
|
382
|
+
reg.fit(X, y)
|
383
|
+
|
384
|
+
kwargs: typing.Optional[typing.Any]
|
312
385
|
|
313
|
-
kwargs: dict, optional
|
314
386
|
Keyword arguments for XGBoost Booster object. Full documentation of parameters
|
315
387
|
can be found :doc:`here </parameter>`.
|
316
388
|
Attempting to set a parameter via the constructor args and \*\*kwargs
|
@@ -321,13 +393,16 @@ class XGBRegressor(BaseTransformer):
|
|
321
393
|
with scikit-learn.
|
322
394
|
|
323
395
|
A custom objective function can be provided for the ``objective``
|
324
|
-
parameter. In this case, it should have the signature
|
325
|
-
``objective(y_true, y_pred
|
396
|
+
parameter. In this case, it should have the signature ``objective(y_true,
|
397
|
+
y_pred) -> [grad, hess]`` or ``objective(y_true, y_pred, *, sample_weight)
|
398
|
+
-> [grad, hess]``:
|
326
399
|
|
327
400
|
y_true: array_like of shape [n_samples]
|
328
401
|
The target values
|
329
402
|
y_pred: array_like of shape [n_samples]
|
330
403
|
The predicted values
|
404
|
+
sample_weight :
|
405
|
+
Optional sample weights.
|
331
406
|
|
332
407
|
grad: array_like of shape [n_samples]
|
333
408
|
The value of the gradient for each sample point.
|