snowflake-ml-python 1.7.2__py3-none-any.whl → 1.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. snowflake/cortex/__init__.py +16 -8
  2. snowflake/cortex/_classify_text.py +12 -1
  3. snowflake/cortex/_complete.py +82 -13
  4. snowflake/cortex/_embed_text_1024.py +9 -2
  5. snowflake/cortex/_embed_text_768.py +9 -2
  6. snowflake/cortex/_extract_answer.py +9 -2
  7. snowflake/cortex/_sentiment.py +9 -2
  8. snowflake/cortex/_summarize.py +9 -2
  9. snowflake/cortex/_translate.py +9 -2
  10. snowflake/ml/_internal/env_utils.py +7 -52
  11. snowflake/ml/_internal/utils/identifier.py +4 -2
  12. snowflake/ml/data/__init__.py +3 -0
  13. snowflake/ml/data/_internal/arrow_ingestor.py +4 -4
  14. snowflake/ml/data/data_connector.py +53 -11
  15. snowflake/ml/data/data_ingestor.py +2 -1
  16. snowflake/ml/data/torch_utils.py +18 -5
  17. snowflake/ml/feature_store/examples/example_helper.py +2 -1
  18. snowflake/ml/fileset/fileset.py +18 -18
  19. snowflake/ml/model/_client/model/model_version_impl.py +5 -3
  20. snowflake/ml/model/_client/ops/model_ops.py +2 -6
  21. snowflake/ml/model/_client/sql/model_version.py +11 -0
  22. snowflake/ml/model/_model_composer/model_composer.py +8 -3
  23. snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +20 -1
  24. snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +1 -0
  25. snowflake/ml/model/_model_composer/model_method/constants.py +1 -0
  26. snowflake/ml/model/_model_composer/model_method/function_generator.py +2 -0
  27. snowflake/ml/model/_model_composer/model_method/infer_function.py_template +1 -1
  28. snowflake/ml/model/_model_composer/model_method/infer_partitioned.py_template +1 -1
  29. snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +1 -1
  30. snowflake/ml/model/_model_composer/model_method/model_method.py +9 -1
  31. snowflake/ml/model/_model_composer/model_user_file/model_user_file.py +27 -0
  32. snowflake/ml/model/_packager/model_handlers/_utils.py +27 -2
  33. snowflake/ml/model/_packager/model_handlers/catboost.py +3 -3
  34. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +5 -1
  35. snowflake/ml/model/_packager/model_handlers/lightgbm.py +5 -3
  36. snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +55 -20
  37. snowflake/ml/model/_packager/model_handlers/sklearn.py +9 -10
  38. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +66 -28
  39. snowflake/ml/model/_packager/model_handlers/tensorflow.py +70 -17
  40. snowflake/ml/model/_packager/model_handlers/xgboost.py +3 -3
  41. snowflake/ml/model/_packager/model_meta/model_meta.py +3 -0
  42. snowflake/ml/model/_packager/model_meta/model_meta_schema.py +6 -1
  43. snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +2 -2
  44. snowflake/ml/model/_packager/model_task/model_task_utils.py +3 -2
  45. snowflake/ml/model/_signatures/pandas_handler.py +1 -1
  46. snowflake/ml/model/_signatures/snowpark_handler.py +8 -2
  47. snowflake/ml/model/type_hints.py +1 -0
  48. snowflake/ml/modeling/_internal/model_trainer_builder.py +0 -8
  49. snowflake/ml/modeling/_internal/model_transformer_builder.py +0 -13
  50. snowflake/ml/modeling/pipeline/pipeline.py +6 -176
  51. snowflake/ml/modeling/xgboost/xgb_classifier.py +161 -88
  52. snowflake/ml/modeling/xgboost/xgb_regressor.py +160 -85
  53. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +160 -85
  54. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +160 -85
  55. snowflake/ml/monitoring/_client/model_monitor_sql_client.py +4 -4
  56. snowflake/ml/registry/_manager/model_manager.py +70 -33
  57. snowflake/ml/registry/registry.py +41 -22
  58. snowflake/ml/version.py +1 -1
  59. {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.3.dist-info}/METADATA +38 -9
  60. {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.3.dist-info}/RECORD +63 -67
  61. {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.3.dist-info}/WHEEL +1 -1
  62. snowflake/ml/_internal/utils/retryable_http.py +0 -39
  63. snowflake/ml/fileset/parquet_parser.py +0 -170
  64. snowflake/ml/fileset/tf_dataset.py +0 -88
  65. snowflake/ml/fileset/torch_datapipe.py +0 -57
  66. snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +0 -151
  67. snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_trainer.py +0 -66
  68. {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.3.dist-info}/LICENSE.txt +0 -0
  69. {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.3.dist-info}/top_level.txt +0 -0
@@ -125,112 +125,172 @@ class XGBRegressor(BaseTransformer):
125
125
  can seriously hurt performance in gradient boosting. Set the batch_size as large as possible
126
126
  based on the available memory.
127
127
 
128
- n_estimators: int
128
+ n_estimators: typing.Optional[int]
129
129
  Number of gradient boosted trees. Equivalent to number of boosting
130
130
  rounds.
131
131
 
132
- max_depth: Optional[int]
132
+ max_depth: typing.Optional[int]
133
+
133
134
  Maximum tree depth for base learners.
134
- max_leaves :
135
+
136
+ max_leaves: typing.Optional[int]
137
+
135
138
  Maximum number of leaves; 0 indicates no limit.
136
- max_bin :
139
+
140
+ max_bin: typing.Optional[int]
141
+
137
142
  If using histogram-based algorithm, maximum number of bins per feature
138
- grow_policy :
139
- Tree growing policy. 0: favor splitting at nodes closest to the node, i.e. grow
140
- depth-wise. 1: favor splitting at nodes with highest loss change.
141
- learning_rate: Optional[float]
143
+
144
+ grow_policy: typing.Optional[str]
145
+
146
+ Tree growing policy.
147
+
148
+ - depthwise: Favors splitting at nodes closest to the node,
149
+ - lossguide: Favors splitting at nodes with highest loss change.
150
+
151
+ learning_rate: typing.Optional[float]
152
+
142
153
  Boosting learning rate (xgb's "eta")
143
- verbosity: Optional[int]
154
+
155
+ verbosity: typing.Optional[int]
156
+
144
157
  The degree of verbosity. Valid values are 0 (silent) - 3 (debug).
145
- objective: typing.Union[str, typing.Callable[[numpy.ndarray, numpy.ndarray], typing.Tuple[numpy.ndarray, numpy.ndarray]], NoneType]
146
- Specify the learning task and the corresponding learning objective or
147
- a custom objective function to be used (see note below).
148
- booster: Optional[str]
149
- Specify which booster to use: gbtree, gblinear or dart.
150
- tree_method: Optional[str]
158
+
159
+ objective: typing.Union[str, xgboost.sklearn._SklObjWProto, typing.Callable[[typing.Any, typing.Any], typing.Tuple[numpy.ndarray, numpy.ndarray]], NoneType]
160
+
161
+ Specify the learning task and the corresponding learning objective or a custom
162
+ objective function to be used.
163
+
164
+ For custom objective, see :doc:`/tutorials/custom_metric_obj` and
165
+ :ref:`custom-obj-metric` for more information, along with the end note for
166
+ function signatures.
167
+
168
+ booster: typing.Optional[str]
169
+
170
+ Specify which booster to use: ``gbtree``, ``gblinear`` or ``dart``.
171
+
172
+ tree_method: typing.Optional[str]
173
+
151
174
  Specify which tree method to use. Default to auto. If this parameter is set to
152
175
  default, XGBoost will choose the most conservative option available. It's
153
176
  recommended to study this option from the parameters document :doc:`tree method
154
177
  </treemethod>`
155
- n_jobs: Optional[int]
178
+
179
+ n_jobs: typing.Optional[int]
180
+
156
181
  Number of parallel threads used to run xgboost. When used with other
157
182
  Scikit-Learn algorithms like grid search, you may choose which algorithm to
158
183
  parallelize and balance the threads. Creating thread contention will
159
184
  significantly slow down both algorithms.
160
- gamma: Optional[float]
161
- (min_split_loss) Minimum loss reduction required to make a further partition on a
162
- leaf node of the tree.
163
- min_child_weight: Optional[float]
185
+
186
+ gamma: typing.Optional[float]
187
+
188
+ (min_split_loss) Minimum loss reduction required to make a further partition on
189
+ a leaf node of the tree.
190
+
191
+ min_child_weight: typing.Optional[float]
192
+
164
193
  Minimum sum of instance weight(hessian) needed in a child.
165
- max_delta_step: Optional[float]
194
+
195
+ max_delta_step: typing.Optional[float]
196
+
166
197
  Maximum delta step we allow each tree's weight estimation to be.
167
- subsample: Optional[float]
198
+
199
+ subsample: typing.Optional[float]
200
+
168
201
  Subsample ratio of the training instance.
169
- sampling_method :
170
- Sampling method. Used only by `gpu_hist` tree method.
171
- - `uniform`: select random training instances uniformly.
172
- - `gradient_based` select random training instances with higher probability when
173
- the gradient and hessian are larger. (cf. CatBoost)
174
- colsample_bytree: Optional[float]
202
+
203
+ sampling_method: typing.Optional[str]
204
+
205
+ Sampling method. Used only by the GPU version of ``hist`` tree method.
206
+
207
+ - ``uniform``: Select random training instances uniformly.
208
+ - ``gradient_based``: Select random training instances with higher probability
209
+ when the gradient and hessian are larger. (cf. CatBoost)
210
+
211
+ colsample_bytree: typing.Optional[float]
212
+
175
213
  Subsample ratio of columns when constructing each tree.
176
- colsample_bylevel: Optional[float]
214
+
215
+ colsample_bylevel: typing.Optional[float]
216
+
177
217
  Subsample ratio of columns for each level.
178
- colsample_bynode: Optional[float]
218
+
219
+ colsample_bynode: typing.Optional[float]
220
+
179
221
  Subsample ratio of columns for each split.
180
- reg_alpha: Optional[float]
222
+
223
+ reg_alpha: typing.Optional[float]
224
+
181
225
  L1 regularization term on weights (xgb's alpha).
182
- reg_lambda: Optional[float]
226
+
227
+ reg_lambda: typing.Optional[float]
228
+
183
229
  L2 regularization term on weights (xgb's lambda).
184
- scale_pos_weight: Optional[float]
230
+
231
+ scale_pos_weight: typing.Optional[float]
185
232
  Balancing of positive and negative weights.
186
- base_score: Optional[float]
233
+
234
+ base_score: typing.Optional[float]
235
+
187
236
  The initial prediction score of all instances, global bias.
188
- random_state: Optional[Union[numpy.random.RandomState, int]]
237
+
238
+ random_state: typing.Union[numpy.random.mtrand.RandomState, numpy.random._generator.Generator, int, NoneType]
239
+
189
240
  Random number seed.
190
241
 
191
242
  Using gblinear booster with shotgun updater is nondeterministic as
192
243
  it uses Hogwild algorithm.
193
244
 
194
- missing: float, default np.nan
195
- Value in the data which needs to be present as a missing value.
196
- num_parallel_tree: Optional[int]
245
+ missing: float
246
+
247
+ Value in the data which needs to be present as a missing value. Default to
248
+ :py:data:`numpy.nan`.
249
+
250
+ num_parallel_tree: typing.Optional[int]
251
+
197
252
  Used for boosting random forest.
198
- monotone_constraints: Optional[Union[Dict[str, int], str]]
253
+
254
+ monotone_constraints: typing.Union[typing.Dict[str, int], str, NoneType]
255
+
199
256
  Constraint of variable monotonicity. See :doc:`tutorial </tutorials/monotonic>`
200
257
  for more information.
201
- interaction_constraints: Optional[Union[str, List[Tuple[str]]]]
258
+
259
+ interaction_constraints: typing.Union[str, typing.List[typing.Tuple[str]], NoneType]
260
+
202
261
  Constraints for interaction representing permitted interactions. The
203
262
  constraints must be specified in the form of a nested list, e.g. ``[[0, 1], [2,
204
263
  3, 4]]``, where each inner list is a group of indices of features that are
205
264
  allowed to interact with each other. See :doc:`tutorial
206
265
  </tutorials/feature_interaction_constraint>` for more information
207
- importance_type: Optional[str]
266
+
267
+ importance_type: typing.Optional[str]
268
+
208
269
  The feature importance type for the feature_importances\_ property:
209
270
 
210
271
  * For tree model, it's either "gain", "weight", "cover", "total_gain" or
211
272
  "total_cover".
212
- * For linear model, only "weight" is defined and it's the normalized coefficients
213
- without bias.
273
+ * For linear model, only "weight" is defined and it's the normalized
274
+ coefficients without bias.
275
+
276
+ device: typing.Optional[str]
277
+
278
+ Device ordinal, available options are `cpu`, `cuda`, and `gpu`.
279
+
280
+ validate_parameters: typing.Optional[bool]
214
281
 
215
- gpu_id: Optional[int]
216
- Device ordinal.
217
- validate_parameters: Optional[bool]
218
282
  Give warnings for unknown parameter.
219
- predictor: Optional[str]
220
- Force XGBoost to use specific predictor, available choices are [cpu_predictor,
221
- gpu_predictor].
283
+
222
284
  enable_categorical: bool
223
285
 
224
- Experimental support for categorical data. When enabled, cudf/pandas.DataFrame
225
- should be used to specify categorical data type. Also, JSON/UBJSON
226
- serialization format is required.
286
+ See the same parameter of :py:class:`DMatrix` for details.
227
287
 
228
- feature_types: FeatureTypes
288
+ feature_types: typing.Optional[typing.Sequence[str]]
229
289
 
230
290
  Used for specifying feature types without constructing a dataframe. See
231
291
  :py:class:`DMatrix` for details.
232
292
 
233
- max_cat_to_onehot: Optional[int]
293
+ max_cat_to_onehot: typing.Optional[int]
234
294
 
235
295
  A threshold for deciding whether XGBoost should use one-hot encoding based split
236
296
  for categorical data. When number of categories is lesser than the threshold
@@ -239,36 +299,41 @@ class XGBRegressor(BaseTransformer):
239
299
  categorical feature support. See :doc:`Categorical Data
240
300
  </tutorials/categorical>` and :ref:`cat-param` for details.
241
301
 
242
- max_cat_threshold: Optional[int]
302
+ max_cat_threshold: typing.Optional[int]
243
303
 
244
304
  Maximum number of categories considered for each split. Used only by
245
305
  partition-based splits for preventing over-fitting. Also, `enable_categorical`
246
306
  needs to be set to have categorical feature support. See :doc:`Categorical Data
247
307
  </tutorials/categorical>` and :ref:`cat-param` for details.
248
308
 
249
- eval_metric: Optional[Union[str, List[str], Callable]]
309
+ multi_strategy: typing.Optional[str]
310
+
311
+ The strategy used for training multi-target models, including multi-target
312
+ regression and multi-class classification. See :doc:`/tutorials/multioutput` for
313
+ more information.
314
+
315
+ - ``one_output_per_tree``: One model for each target.
316
+ - ``multi_output_tree``: Use multi-target trees.
317
+
318
+ eval_metric: typing.Union[str, typing.List[str], typing.Callable, NoneType]
250
319
 
251
320
  Metric used for monitoring the training result and early stopping. It can be a
252
321
  string or list of strings as names of predefined metric in XGBoost (See
253
- doc/parameter.rst), one of the metrics in :py:mod:`sklearn.metrics`, or any other
254
- user defined metric that looks like `sklearn.metrics`.
322
+ doc/parameter.rst), one of the metrics in :py:mod:`sklearn.metrics`, or any
323
+ other user defined metric that looks like `sklearn.metrics`.
255
324
 
256
325
  If custom objective is also provided, then custom metric should implement the
257
326
  corresponding reverse link function.
258
327
 
259
328
  Unlike the `scoring` parameter commonly used in scikit-learn, when a callable
260
- object is provided, it's assumed to be a cost function and by default XGBoost will
261
- minimize the result during early stopping.
262
-
263
- For advanced usage on Early stopping like directly choosing to maximize instead of
264
- minimize, see :py:obj:`xgboost.callback.EarlyStopping`.
329
+ object is provided, it's assumed to be a cost function and by default XGBoost
330
+ will minimize the result during early stopping.
265
331
 
266
- See :doc:`Custom Objective and Evaluation Metric </tutorials/custom_metric_obj>`
267
- for more.
332
+ For advanced usage on Early stopping like directly choosing to maximize instead
333
+ of minimize, see :py:obj:`xgboost.callback.EarlyStopping`.
268
334
 
269
- This parameter replaces `eval_metric` in :py:meth:`fit` method. The old one
270
- receives un-transformed prediction regardless of whether custom objective is
271
- being used.
335
+ See :doc:`/tutorials/custom_metric_obj` and :ref:`custom-obj-metric` for more
336
+ information.
272
337
 
273
338
  from sklearn.datasets import load_diabetes
274
339
  from sklearn.metrics import mean_absolute_error
@@ -279,24 +344,29 @@ class XGBRegressor(BaseTransformer):
279
344
  )
280
345
  reg.fit(X, y, eval_set=[(X, y)])
281
346
 
282
- early_stopping_rounds: Optional[int]
347
+ early_stopping_rounds: typing.Optional[int]
283
348
 
284
- Activates early stopping. Validation metric needs to improve at least once in
285
- every **early_stopping_rounds** round(s) to continue training. Requires at least
286
- one item in **eval_set** in :py:meth:`fit`.
349
+ - Activates early stopping. Validation metric needs to improve at least once in
350
+ every **early_stopping_rounds** round(s) to continue training. Requires at
351
+ least one item in **eval_set** in :py:meth:`fit`.
287
352
 
288
- The method returns the model from the last iteration (not the best one). If
289
- there's more than one item in **eval_set**, the last entry will be used for early
290
- stopping. If there's more than one metric in **eval_metric**, the last metric
291
- will be used for early stopping.
353
+ - If early stopping occurs, the model will have two additional attributes:
354
+ :py:attr:`best_score` and :py:attr:`best_iteration`. These are used by the
355
+ :py:meth:`predict` and :py:meth:`apply` methods to determine the optimal
356
+ number of trees during inference. If users want to access the full model
357
+ (including trees built after early stopping), they can specify the
358
+ `iteration_range` in these inference methods. In addition, other utilities
359
+ like model plotting can also use the entire model.
292
360
 
293
- If early stopping occurs, the model will have three additional fields:
294
- :py:attr:`best_score`, :py:attr:`best_iteration` and
295
- :py:attr:`best_ntree_limit`.
361
+ - If you prefer to discard the trees after `best_iteration`, consider using the
362
+ callback function :py:class:`xgboost.callback.EarlyStopping`.
296
363
 
297
- This parameter replaces `early_stopping_rounds` in :py:meth:`fit` method.
364
+ - If there's more than one item in **eval_set**, the last entry will be used for
365
+ early stopping. If there's more than one metric in **eval_metric**, the last
366
+ metric will be used for early stopping.
367
+
368
+ callbacks: typing.Optional[typing.List[xgboost.callback.TrainingCallback]]
298
369
 
299
- callbacks: Optional[List[TrainingCallback]]
300
370
  List of callback functions that are applied at end of each iteration.
301
371
  It is possible to use predefined callbacks by using
302
372
  :ref:`Callback API <callback_api>`.
@@ -308,9 +378,11 @@ class XGBRegressor(BaseTransformer):
308
378
  for params in parameters_grid:
309
379
  # be sure to (re)initialize the callbacks before each run
310
380
  callbacks = [xgb.callback.LearningRateScheduler(custom_rates)]
311
- xgboost.train(params, Xy, callbacks=callbacks)
381
+ reg = xgboost.XGBRegressor(**params, callbacks=callbacks)
382
+ reg.fit(X, y)
383
+
384
+ kwargs: typing.Optional[typing.Any]
312
385
 
313
- kwargs: dict, optional
314
386
  Keyword arguments for XGBoost Booster object. Full documentation of parameters
315
387
  can be found :doc:`here </parameter>`.
316
388
  Attempting to set a parameter via the constructor args and \*\*kwargs
@@ -321,13 +393,16 @@ class XGBRegressor(BaseTransformer):
321
393
  with scikit-learn.
322
394
 
323
395
  A custom objective function can be provided for the ``objective``
324
- parameter. In this case, it should have the signature
325
- ``objective(y_true, y_pred) -> grad, hess``:
396
+ parameter. In this case, it should have the signature ``objective(y_true,
397
+ y_pred) -> [grad, hess]`` or ``objective(y_true, y_pred, *, sample_weight)
398
+ -> [grad, hess]``:
326
399
 
327
400
  y_true: array_like of shape [n_samples]
328
401
  The target values
329
402
  y_pred: array_like of shape [n_samples]
330
403
  The predicted values
404
+ sample_weight :
405
+ Optional sample weights.
331
406
 
332
407
  grad: array_like of shape [n_samples]
333
408
  The value of the gradient for each sample point.