snowflake-ml-python 1.1.1__py3-none-any.whl → 1.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/cortex/_complete.py +1 -1
- snowflake/cortex/_extract_answer.py +1 -1
- snowflake/cortex/_sentiment.py +1 -1
- snowflake/cortex/_summarize.py +1 -1
- snowflake/cortex/_translate.py +1 -1
- snowflake/ml/_internal/env_utils.py +68 -6
- snowflake/ml/_internal/file_utils.py +34 -4
- snowflake/ml/_internal/telemetry.py +79 -91
- snowflake/ml/_internal/utils/retryable_http.py +16 -4
- snowflake/ml/_internal/utils/spcs_attribution_utils.py +122 -0
- snowflake/ml/dataset/dataset.py +1 -1
- snowflake/ml/model/_api.py +21 -14
- snowflake/ml/model/_client/model/model_impl.py +176 -0
- snowflake/ml/model/_client/model/model_method_info.py +19 -0
- snowflake/ml/model/_client/model/model_version_impl.py +291 -0
- snowflake/ml/model/_client/ops/metadata_ops.py +107 -0
- snowflake/ml/model/_client/ops/model_ops.py +308 -0
- snowflake/ml/model/_client/sql/model.py +75 -0
- snowflake/ml/model/_client/sql/model_version.py +213 -0
- snowflake/ml/model/_client/sql/stage.py +40 -0
- snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +3 -4
- snowflake/ml/model/_deploy_client/image_builds/templates/image_build_job_spec_template +24 -8
- snowflake/ml/model/_deploy_client/image_builds/templates/kaniko_shell_script_template +23 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +14 -2
- snowflake/ml/model/_deploy_client/utils/constants.py +1 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +2 -2
- snowflake/ml/model/_model_composer/model_composer.py +31 -9
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +25 -10
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +2 -2
- snowflake/ml/model/_model_composer/model_method/infer_function.py_template +2 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +34 -3
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +1 -1
- snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +3 -1
- snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +10 -28
- snowflake/ml/model/_packager/model_meta/model_meta.py +18 -16
- snowflake/ml/model/_signatures/snowpark_handler.py +1 -1
- snowflake/ml/model/model_signature.py +108 -53
- snowflake/ml/model/type_hints.py +1 -0
- snowflake/ml/modeling/_internal/distributed_hpo_trainer.py +554 -0
- snowflake/ml/modeling/_internal/estimator_protocols.py +1 -60
- snowflake/ml/modeling/_internal/model_specifications.py +146 -0
- snowflake/ml/modeling/_internal/model_trainer.py +13 -0
- snowflake/ml/modeling/_internal/model_trainer_builder.py +78 -0
- snowflake/ml/modeling/_internal/pandas_trainer.py +54 -0
- snowflake/ml/modeling/_internal/snowpark_handlers.py +6 -760
- snowflake/ml/modeling/_internal/snowpark_trainer.py +331 -0
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +96 -124
- snowflake/ml/modeling/cluster/affinity_propagation.py +94 -124
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +94 -124
- snowflake/ml/modeling/cluster/birch.py +94 -124
- snowflake/ml/modeling/cluster/bisecting_k_means.py +94 -124
- snowflake/ml/modeling/cluster/dbscan.py +94 -124
- snowflake/ml/modeling/cluster/feature_agglomeration.py +94 -124
- snowflake/ml/modeling/cluster/k_means.py +93 -124
- snowflake/ml/modeling/cluster/mean_shift.py +94 -124
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +93 -124
- snowflake/ml/modeling/cluster/optics.py +94 -124
- snowflake/ml/modeling/cluster/spectral_biclustering.py +94 -124
- snowflake/ml/modeling/cluster/spectral_clustering.py +94 -124
- snowflake/ml/modeling/cluster/spectral_coclustering.py +94 -124
- snowflake/ml/modeling/compose/column_transformer.py +94 -124
- snowflake/ml/modeling/compose/transformed_target_regressor.py +96 -124
- snowflake/ml/modeling/covariance/elliptic_envelope.py +94 -124
- snowflake/ml/modeling/covariance/empirical_covariance.py +80 -110
- snowflake/ml/modeling/covariance/graphical_lasso.py +94 -124
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +94 -124
- snowflake/ml/modeling/covariance/ledoit_wolf.py +85 -115
- snowflake/ml/modeling/covariance/min_cov_det.py +94 -124
- snowflake/ml/modeling/covariance/oas.py +80 -110
- snowflake/ml/modeling/covariance/shrunk_covariance.py +84 -114
- snowflake/ml/modeling/decomposition/dictionary_learning.py +94 -124
- snowflake/ml/modeling/decomposition/factor_analysis.py +94 -124
- snowflake/ml/modeling/decomposition/fast_ica.py +94 -124
- snowflake/ml/modeling/decomposition/incremental_pca.py +94 -124
- snowflake/ml/modeling/decomposition/kernel_pca.py +94 -124
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +94 -124
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +94 -124
- snowflake/ml/modeling/decomposition/pca.py +94 -124
- snowflake/ml/modeling/decomposition/sparse_pca.py +94 -124
- snowflake/ml/modeling/decomposition/truncated_svd.py +94 -124
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +96 -124
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +91 -119
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +96 -124
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +96 -124
- snowflake/ml/modeling/ensemble/bagging_classifier.py +96 -124
- snowflake/ml/modeling/ensemble/bagging_regressor.py +96 -124
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +96 -124
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +96 -124
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +96 -124
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +96 -124
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +96 -124
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +96 -124
- snowflake/ml/modeling/ensemble/isolation_forest.py +94 -124
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +96 -124
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +96 -124
- snowflake/ml/modeling/ensemble/stacking_regressor.py +96 -124
- snowflake/ml/modeling/ensemble/voting_classifier.py +96 -124
- snowflake/ml/modeling/ensemble/voting_regressor.py +91 -119
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +82 -110
- snowflake/ml/modeling/feature_selection/select_fdr.py +80 -108
- snowflake/ml/modeling/feature_selection/select_fpr.py +80 -108
- snowflake/ml/modeling/feature_selection/select_fwe.py +80 -108
- snowflake/ml/modeling/feature_selection/select_k_best.py +81 -109
- snowflake/ml/modeling/feature_selection/select_percentile.py +80 -108
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +94 -124
- snowflake/ml/modeling/feature_selection/variance_threshold.py +76 -106
- snowflake/ml/modeling/framework/base.py +2 -2
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +96 -124
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +96 -124
- snowflake/ml/modeling/impute/iterative_imputer.py +94 -124
- snowflake/ml/modeling/impute/knn_imputer.py +94 -124
- snowflake/ml/modeling/impute/missing_indicator.py +94 -124
- snowflake/ml/modeling/impute/simple_imputer.py +1 -1
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +77 -107
- snowflake/ml/modeling/kernel_approximation/nystroem.py +94 -124
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +94 -124
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +86 -116
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +84 -114
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +96 -124
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +71 -100
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +71 -100
- snowflake/ml/modeling/linear_model/ard_regression.py +96 -124
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +96 -124
- snowflake/ml/modeling/linear_model/elastic_net.py +96 -124
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +96 -124
- snowflake/ml/modeling/linear_model/gamma_regressor.py +96 -124
- snowflake/ml/modeling/linear_model/huber_regressor.py +96 -124
- snowflake/ml/modeling/linear_model/lars.py +96 -124
- snowflake/ml/modeling/linear_model/lars_cv.py +96 -124
- snowflake/ml/modeling/linear_model/lasso.py +96 -124
- snowflake/ml/modeling/linear_model/lasso_cv.py +96 -124
- snowflake/ml/modeling/linear_model/lasso_lars.py +96 -124
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +96 -124
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +96 -124
- snowflake/ml/modeling/linear_model/linear_regression.py +91 -119
- snowflake/ml/modeling/linear_model/logistic_regression.py +96 -124
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +96 -124
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +96 -124
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +96 -124
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +96 -124
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +96 -124
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +96 -124
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +96 -124
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +95 -124
- snowflake/ml/modeling/linear_model/perceptron.py +95 -124
- snowflake/ml/modeling/linear_model/poisson_regressor.py +96 -124
- snowflake/ml/modeling/linear_model/ransac_regressor.py +96 -124
- snowflake/ml/modeling/linear_model/ridge.py +96 -124
- snowflake/ml/modeling/linear_model/ridge_classifier.py +96 -124
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +96 -124
- snowflake/ml/modeling/linear_model/ridge_cv.py +96 -124
- snowflake/ml/modeling/linear_model/sgd_classifier.py +96 -124
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +94 -124
- snowflake/ml/modeling/linear_model/sgd_regressor.py +96 -124
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +96 -124
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +96 -124
- snowflake/ml/modeling/manifold/isomap.py +94 -124
- snowflake/ml/modeling/manifold/mds.py +94 -124
- snowflake/ml/modeling/manifold/spectral_embedding.py +94 -124
- snowflake/ml/modeling/manifold/tsne.py +94 -124
- snowflake/ml/modeling/metrics/classification.py +187 -52
- snowflake/ml/modeling/metrics/correlation.py +4 -2
- snowflake/ml/modeling/metrics/covariance.py +7 -4
- snowflake/ml/modeling/metrics/ranking.py +32 -16
- snowflake/ml/modeling/metrics/regression.py +60 -32
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +94 -124
- snowflake/ml/modeling/mixture/gaussian_mixture.py +94 -124
- snowflake/ml/modeling/model_selection/grid_search_cv.py +88 -138
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +90 -144
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +86 -114
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +93 -121
- snowflake/ml/modeling/multiclass/output_code_classifier.py +94 -122
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +92 -120
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +96 -124
- snowflake/ml/modeling/naive_bayes/complement_nb.py +92 -120
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +79 -107
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +88 -116
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +96 -124
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +96 -124
- snowflake/ml/modeling/neighbors/kernel_density.py +94 -124
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +94 -124
- snowflake/ml/modeling/neighbors/nearest_centroid.py +89 -117
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +94 -124
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +96 -124
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +96 -124
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +96 -124
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +94 -124
- snowflake/ml/modeling/neural_network/mlp_classifier.py +96 -124
- snowflake/ml/modeling/neural_network/mlp_regressor.py +96 -124
- snowflake/ml/modeling/parameters/disable_distributed_hpo.py +2 -6
- snowflake/ml/modeling/preprocessing/binarizer.py +14 -9
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +0 -4
- snowflake/ml/modeling/preprocessing/label_encoder.py +21 -13
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +20 -14
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +35 -19
- snowflake/ml/modeling/preprocessing/normalizer.py +6 -9
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +20 -13
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +25 -13
- snowflake/ml/modeling/preprocessing/polynomial_features.py +94 -124
- snowflake/ml/modeling/preprocessing/robust_scaler.py +28 -14
- snowflake/ml/modeling/preprocessing/standard_scaler.py +25 -13
- snowflake/ml/modeling/semi_supervised/label_propagation.py +96 -124
- snowflake/ml/modeling/semi_supervised/label_spreading.py +96 -124
- snowflake/ml/modeling/svm/linear_svc.py +96 -124
- snowflake/ml/modeling/svm/linear_svr.py +96 -124
- snowflake/ml/modeling/svm/nu_svc.py +96 -124
- snowflake/ml/modeling/svm/nu_svr.py +96 -124
- snowflake/ml/modeling/svm/svc.py +96 -124
- snowflake/ml/modeling/svm/svr.py +96 -124
- snowflake/ml/modeling/tree/decision_tree_classifier.py +96 -124
- snowflake/ml/modeling/tree/decision_tree_regressor.py +96 -124
- snowflake/ml/modeling/tree/extra_tree_classifier.py +96 -124
- snowflake/ml/modeling/tree/extra_tree_regressor.py +96 -124
- snowflake/ml/modeling/xgboost/xgb_classifier.py +96 -125
- snowflake/ml/modeling/xgboost/xgb_regressor.py +96 -125
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +96 -125
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +96 -125
- snowflake/ml/registry/model_registry.py +2 -0
- snowflake/ml/registry/registry.py +215 -0
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.1.1.dist-info → snowflake_ml_python-1.1.2.dist-info}/METADATA +21 -3
- snowflake_ml_python-1.1.2.dist-info/RECORD +347 -0
- snowflake_ml_python-1.1.1.dist-info/RECORD +0 -331
- {snowflake_ml_python-1.1.1.dist-info → snowflake_ml_python-1.1.2.dist-info}/WHEEL +0 -0
@@ -40,10 +40,14 @@ def d2_absolute_error_score(
|
|
40
40
|
gets a :math:`D^2` score of 0.0.
|
41
41
|
|
42
42
|
Args:
|
43
|
-
df:
|
44
|
-
|
45
|
-
|
46
|
-
|
43
|
+
df: snowpark.DataFrame
|
44
|
+
Input dataframe.
|
45
|
+
y_true_col_names: string or list of strings
|
46
|
+
Column name(s) representing actual values.
|
47
|
+
y_pred_col_names: string or list of strings
|
48
|
+
Column name(s) representing predicted values.
|
49
|
+
sample_weight_col_name: string, default=None
|
50
|
+
Column name representing sample weights.
|
47
51
|
multioutput: {'raw_values', 'uniform_average'} or array-like of shape \
|
48
52
|
(n_outputs,), default='uniform_average'
|
49
53
|
Defines aggregating of multiple output values.
|
@@ -128,11 +132,16 @@ def d2_pinball_score(
|
|
128
132
|
gets a :math:`D^2` score of 0.0.
|
129
133
|
|
130
134
|
Args:
|
131
|
-
df:
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
135
|
+
df: snowpark.DataFrame
|
136
|
+
Input dataframe.
|
137
|
+
y_true_col_names: string or list of strings
|
138
|
+
Column name(s) representing actual values.
|
139
|
+
y_pred_col_names: string or list of strings
|
140
|
+
Column name(s) representing predicted values.
|
141
|
+
sample_weight_col_name: string, default=None
|
142
|
+
Column name representing sample weights.
|
143
|
+
alpha: float, default=0.5
|
144
|
+
Slope of the pinball deviance. It determines the quantile level
|
136
145
|
alpha for which the pinball deviance and also D2 are optimal.
|
137
146
|
The default `alpha=0.5` is equivalent to `d2_absolute_error_score`.
|
138
147
|
multioutput: {'raw_values', 'uniform_average'} or array-like of shape \
|
@@ -233,10 +242,14 @@ def explained_variance_score(
|
|
233
242
|
the :func:`R^2 score <r2_score>` should be preferred.
|
234
243
|
|
235
244
|
Args:
|
236
|
-
df:
|
237
|
-
|
238
|
-
|
239
|
-
|
245
|
+
df: snowpark.DataFrame
|
246
|
+
Input dataframe.
|
247
|
+
y_true_col_names: string or list of strings
|
248
|
+
Column name(s) representing actual values.
|
249
|
+
y_pred_col_names: string or list of strings
|
250
|
+
Column name(s) representing predicted values.
|
251
|
+
sample_weight_col_name: string, default=None
|
252
|
+
Column name representing sample weights.
|
240
253
|
multioutput: {'raw_values', 'uniform_average', 'variance_weighted'} or \
|
241
254
|
array-like of shape (n_outputs,), default='uniform_average'
|
242
255
|
Defines aggregating of multiple output values.
|
@@ -248,7 +261,8 @@ def explained_variance_score(
|
|
248
261
|
'variance_weighted':
|
249
262
|
Scores of all outputs are averaged, weighted by the variances
|
250
263
|
of each individual output.
|
251
|
-
force_finite:
|
264
|
+
force_finite: boolean, default=True
|
265
|
+
Flag indicating if ``NaN`` and ``-Inf`` scores resulting
|
252
266
|
from constant data should be replaced with real numbers (``1.0`` if
|
253
267
|
prediction is perfect, ``0.0`` otherwise). Default is ``True``, a
|
254
268
|
convenient setting for hyperparameters' search procedures (e.g. grid
|
@@ -323,10 +337,14 @@ def mean_absolute_error(
|
|
323
337
|
Mean absolute error regression loss.
|
324
338
|
|
325
339
|
Args:
|
326
|
-
df:
|
327
|
-
|
328
|
-
|
329
|
-
|
340
|
+
df: snowpark.DataFrame
|
341
|
+
Input dataframe.
|
342
|
+
y_true_col_names: string or list of strings
|
343
|
+
Column name(s) representing actual values.
|
344
|
+
y_pred_col_names: string or list of strings
|
345
|
+
Column name(s) representing predicted values.
|
346
|
+
sample_weight_col_name: string, default=None
|
347
|
+
Column name representing sample weights.
|
330
348
|
multioutput: {'raw_values', 'uniform_average'} or array-like of shape \
|
331
349
|
(n_outputs,), default='uniform_average'
|
332
350
|
Defines aggregating of multiple output values.
|
@@ -398,10 +416,14 @@ def mean_absolute_percentage_error(
|
|
398
416
|
regression metrics).
|
399
417
|
|
400
418
|
Args:
|
401
|
-
df:
|
402
|
-
|
403
|
-
|
404
|
-
|
419
|
+
df: snowpark.DataFrame
|
420
|
+
Input dataframe.
|
421
|
+
y_true_col_names: string or list of strings
|
422
|
+
Column name(s) representing actual values.
|
423
|
+
y_pred_col_names: string or list of strings
|
424
|
+
Column name(s) representing predicted values.
|
425
|
+
sample_weight_col_name: string, default=None
|
426
|
+
Column name representing sample weights.
|
405
427
|
multioutput: {'raw_values', 'uniform_average'} or array-like of shape \
|
406
428
|
(n_outputs,), default='uniform_average'
|
407
429
|
Defines aggregating of multiple output values.
|
@@ -472,10 +494,14 @@ def mean_squared_error(
|
|
472
494
|
Mean squared error regression loss.
|
473
495
|
|
474
496
|
Args:
|
475
|
-
df:
|
476
|
-
|
477
|
-
|
478
|
-
|
497
|
+
df: snowpark.DataFrame
|
498
|
+
Input dataframe.
|
499
|
+
y_true_col_names: string or list of strings
|
500
|
+
Column name(s) representing actual values.
|
501
|
+
y_pred_col_names: string or list of strings
|
502
|
+
Column name(s) representing predicted values.
|
503
|
+
sample_weight_col_name: string, default=None
|
504
|
+
Column name representing sample weights.
|
479
505
|
multioutput: {'raw_values', 'uniform_average'} or array-like of shape \
|
480
506
|
(n_outputs,), default='uniform_average'
|
481
507
|
Defines aggregating of multiple output values.
|
@@ -484,7 +510,8 @@ def mean_squared_error(
|
|
484
510
|
Returns a full set of errors in case of multioutput input.
|
485
511
|
'uniform_average':
|
486
512
|
Errors of all outputs are averaged with uniform weight.
|
487
|
-
squared:
|
513
|
+
squared: boolean, default=True
|
514
|
+
If True returns MSE value, if False returns RMSE value.
|
488
515
|
|
489
516
|
Returns:
|
490
517
|
loss: float or ndarray of floats
|
@@ -538,12 +565,13 @@ def r2_score(*, df: snowpark.DataFrame, y_true_col_name: str, y_pred_col_name: s
|
|
538
565
|
non-constant, a constant model that always predicts the average y
|
539
566
|
disregarding the input features would get a :math:`R^2` score of 0.0.
|
540
567
|
|
541
|
-
TODO(pdorairaj): Implement other params from sklearn - sample_weight, multi_output, force_finite.
|
542
|
-
|
543
568
|
Args:
|
544
|
-
df:
|
545
|
-
|
546
|
-
|
569
|
+
df: snowpark.DataFrame
|
570
|
+
Input dataframe.
|
571
|
+
y_true_col_name: string
|
572
|
+
Column name representing actual values.
|
573
|
+
y_pred_col_name: string
|
574
|
+
Column name representing predicted values.
|
547
575
|
|
548
576
|
Returns:
|
549
577
|
R squared metric.
|
@@ -22,17 +22,19 @@ from sklearn.utils.metaestimators import available_if
|
|
22
22
|
from snowflake.ml.modeling.framework.base import BaseTransformer, _process_cols
|
23
23
|
from snowflake.ml._internal import telemetry
|
24
24
|
from snowflake.ml._internal.exceptions import error_codes, exceptions, modeling_error_messages
|
25
|
+
from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
25
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
26
|
-
from snowflake.snowpark import DataFrame
|
27
|
+
from snowflake.snowpark import DataFrame, Session
|
27
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
28
29
|
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
|
+
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
|
+
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
29
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
30
33
|
gather_dependencies,
|
31
34
|
original_estimator_has_callable,
|
32
35
|
transform_snowml_obj_to_sklearn_obj,
|
33
36
|
validate_sklearn_args,
|
34
37
|
)
|
35
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SklearnWrapperProvider
|
36
38
|
from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
|
37
39
|
|
38
40
|
from snowflake.ml.model.model_signature import (
|
@@ -52,7 +54,6 @@ _PROJECT = "ModelDevelopment"
|
|
52
54
|
_SUBPROJECT = "".join([s.capitalize() for s in "sklearn.mixture".replace("sklearn.", "").split("_")])
|
53
55
|
|
54
56
|
|
55
|
-
|
56
57
|
class BayesianGaussianMixture(BaseTransformer):
|
57
58
|
r"""Variational Bayesian estimation of a Gaussian mixture
|
58
59
|
For more details on this class, see [sklearn.mixture.BayesianGaussianMixture]
|
@@ -60,6 +61,49 @@ class BayesianGaussianMixture(BaseTransformer):
|
|
60
61
|
|
61
62
|
Parameters
|
62
63
|
----------
|
64
|
+
|
65
|
+
input_cols: Optional[Union[str, List[str]]]
|
66
|
+
A string or list of strings representing column names that contain features.
|
67
|
+
If this parameter is not specified, all columns in the input DataFrame except
|
68
|
+
the columns specified by label_cols, sample_weight_col, and passthrough_cols
|
69
|
+
parameters are considered input columns. Input columns can also be set after
|
70
|
+
initialization with the `set_input_cols` method.
|
71
|
+
|
72
|
+
label_cols: Optional[Union[str, List[str]]]
|
73
|
+
This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
|
74
|
+
|
75
|
+
output_cols: Optional[Union[str, List[str]]]
|
76
|
+
A string or list of strings representing column names that will store the
|
77
|
+
output of predict and transform operations. The length of output_cols must
|
78
|
+
match the expected number of output columns from the specific predictor or
|
79
|
+
transformer class used.
|
80
|
+
If you omit this parameter, output column names are derived by adding an
|
81
|
+
OUTPUT_ prefix to the label column names for supervised estimators, or
|
82
|
+
OUTPUT_<IDX>for unsupervised estimators. These inferred output column names
|
83
|
+
work for predictors, but output_cols must be set explicitly for transformers.
|
84
|
+
In general, explicitly specifying output column names is clearer, especially
|
85
|
+
if you don’t specify the input column names.
|
86
|
+
To transform in place, pass the same names for input_cols and output_cols.
|
87
|
+
be set explicitly for transformers. Output columns can also be set after
|
88
|
+
initialization with the `set_output_cols` method.
|
89
|
+
|
90
|
+
sample_weight_col: Optional[str]
|
91
|
+
A string representing the column name containing the sample weights.
|
92
|
+
This argument is only required when working with weighted datasets. Sample
|
93
|
+
weight column can also be set after initialization with the
|
94
|
+
`set_sample_weight_col` method.
|
95
|
+
|
96
|
+
passthrough_cols: Optional[Union[str, List[str]]]
|
97
|
+
A string or a list of strings indicating column names to be excluded from any
|
98
|
+
operations (such as train, transform, or inference). These specified column(s)
|
99
|
+
will remain untouched throughout the process. This option is helpful in scenarios
|
100
|
+
requiring automatic input_cols inference, but need to avoid using specific
|
101
|
+
columns, like index columns, during training or inference. Passthrough columns
|
102
|
+
can also be set after initialization with the `set_passthrough_cols` method.
|
103
|
+
|
104
|
+
drop_input_cols: Optional[bool], default=False
|
105
|
+
If set, the response of predict(), transform() methods will not contain input columns.
|
106
|
+
|
63
107
|
n_components: int, default=1
|
64
108
|
The number of mixture components. Depending on the data and the value
|
65
109
|
of the `weight_concentration_prior` the model can decide to not use
|
@@ -161,42 +205,6 @@ class BayesianGaussianMixture(BaseTransformer):
|
|
161
205
|
|
162
206
|
verbose_interval: int, default=10
|
163
207
|
Number of iteration done before the next print.
|
164
|
-
|
165
|
-
input_cols: Optional[Union[str, List[str]]]
|
166
|
-
A string or list of strings representing column names that contain features.
|
167
|
-
If this parameter is not specified, all columns in the input DataFrame except
|
168
|
-
the columns specified by label_cols, sample_weight_col, and passthrough_cols
|
169
|
-
parameters are considered input columns.
|
170
|
-
|
171
|
-
label_cols: Optional[Union[str, List[str]]]
|
172
|
-
A string or list of strings representing column names that contain labels.
|
173
|
-
This is a required param for estimators, as there is no way to infer these
|
174
|
-
columns. If this parameter is not specified, then object is fitted without
|
175
|
-
labels (like a transformer).
|
176
|
-
|
177
|
-
output_cols: Optional[Union[str, List[str]]]
|
178
|
-
A string or list of strings representing column names that will store the
|
179
|
-
output of predict and transform operations. The length of output_cols must
|
180
|
-
match the expected number of output columns from the specific estimator or
|
181
|
-
transformer class used.
|
182
|
-
If this parameter is not specified, output column names are derived by
|
183
|
-
adding an OUTPUT_ prefix to the label column names. These inferred output
|
184
|
-
column names work for estimator's predict() method, but output_cols must
|
185
|
-
be set explicitly for transformers.
|
186
|
-
|
187
|
-
sample_weight_col: Optional[str]
|
188
|
-
A string representing the column name containing the sample weights.
|
189
|
-
This argument is only required when working with weighted datasets.
|
190
|
-
|
191
|
-
passthrough_cols: Optional[Union[str, List[str]]]
|
192
|
-
A string or a list of strings indicating column names to be excluded from any
|
193
|
-
operations (such as train, transform, or inference). These specified column(s)
|
194
|
-
will remain untouched throughout the process. This option is helpful in scenarios
|
195
|
-
requiring automatic input_cols inference, but need to avoid using specific
|
196
|
-
columns, like index columns, during training or inference.
|
197
|
-
|
198
|
-
drop_input_cols: Optional[bool], default=False
|
199
|
-
If set, the response of predict(), transform() methods will not contain input columns.
|
200
208
|
"""
|
201
209
|
|
202
210
|
def __init__( # type: ignore[no-untyped-def]
|
@@ -234,7 +242,7 @@ class BayesianGaussianMixture(BaseTransformer):
|
|
234
242
|
self.set_passthrough_cols(passthrough_cols)
|
235
243
|
self.set_drop_input_cols(drop_input_cols)
|
236
244
|
self.set_sample_weight_col(sample_weight_col)
|
237
|
-
deps = set(
|
245
|
+
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
238
246
|
|
239
247
|
self._deps = list(deps)
|
240
248
|
|
@@ -259,13 +267,14 @@ class BayesianGaussianMixture(BaseTransformer):
|
|
259
267
|
args=init_args,
|
260
268
|
klass=sklearn.mixture.BayesianGaussianMixture
|
261
269
|
)
|
262
|
-
self._sklearn_object = sklearn.mixture.BayesianGaussianMixture(
|
270
|
+
self._sklearn_object: Any = sklearn.mixture.BayesianGaussianMixture(
|
263
271
|
**cleaned_up_init_args,
|
264
272
|
)
|
265
273
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
266
274
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
267
275
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
268
|
-
self._handlers: FitPredictHandlers = HandlersImpl(class_name=BayesianGaussianMixture.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True
|
276
|
+
self._handlers: FitPredictHandlers = HandlersImpl(class_name=BayesianGaussianMixture.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
277
|
+
self._autogenerated = True
|
269
278
|
|
270
279
|
def _get_rand_id(self) -> str:
|
271
280
|
"""
|
@@ -321,54 +330,48 @@ class BayesianGaussianMixture(BaseTransformer):
|
|
321
330
|
self
|
322
331
|
"""
|
323
332
|
self._infer_input_output_cols(dataset)
|
324
|
-
if isinstance(dataset,
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
self.
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
333
|
+
if isinstance(dataset, DataFrame):
|
334
|
+
session = dataset._session
|
335
|
+
assert session is not None # keep mypy happy
|
336
|
+
# Validate that key package version in user workspace are supported in snowflake conda channel
|
337
|
+
# If customer doesn't have package in conda channel, replace the ones have the closest versions
|
338
|
+
self._deps = pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
|
339
|
+
pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT)
|
340
|
+
|
341
|
+
# Specify input columns so column pruning will be enforced
|
342
|
+
selected_cols = self._get_active_columns()
|
343
|
+
if len(selected_cols) > 0:
|
344
|
+
dataset = dataset.select(selected_cols)
|
345
|
+
|
346
|
+
self._snowpark_cols = dataset.select(self.input_cols).columns
|
347
|
+
|
348
|
+
# If we are already in a stored procedure, no need to kick off another one.
|
349
|
+
if SNOWML_SPROC_ENV in os.environ:
|
350
|
+
statement_params = telemetry.get_function_usage_statement_params(
|
351
|
+
project=_PROJECT,
|
352
|
+
subproject=_SUBPROJECT,
|
353
|
+
function_name=telemetry.get_statement_params_full_func_name(inspect.currentframe(), BayesianGaussianMixture.__class__.__name__),
|
354
|
+
api_calls=[Session.call],
|
355
|
+
custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
|
356
|
+
)
|
357
|
+
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
358
|
+
pd_df.columns = dataset.columns
|
359
|
+
dataset = pd_df
|
360
|
+
|
361
|
+
model_trainer = ModelTrainerBuilder.build(
|
362
|
+
estimator=self._sklearn_object,
|
363
|
+
dataset=dataset,
|
364
|
+
input_cols=self.input_cols,
|
365
|
+
label_cols=self.label_cols,
|
366
|
+
sample_weight_col=self.sample_weight_col,
|
367
|
+
autogenerated=self._autogenerated,
|
368
|
+
subproject=_SUBPROJECT
|
369
|
+
)
|
370
|
+
self._sklearn_object = model_trainer.train()
|
340
371
|
self._is_fitted = True
|
341
372
|
self._get_model_signatures(dataset)
|
342
373
|
return self
|
343
374
|
|
344
|
-
def _fit_snowpark(self, dataset: DataFrame) -> None:
|
345
|
-
session = dataset._session
|
346
|
-
assert session is not None # keep mypy happy
|
347
|
-
# Validate that key package version in user workspace are supported in snowflake conda channel
|
348
|
-
# If customer doesn't have package in conda channel, replace the ones have the closest versions
|
349
|
-
self._deps = pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
|
350
|
-
pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT)
|
351
|
-
|
352
|
-
# Specify input columns so column pruning will be enforced
|
353
|
-
selected_cols = self._get_active_columns()
|
354
|
-
if len(selected_cols) > 0:
|
355
|
-
dataset = dataset.select(selected_cols)
|
356
|
-
|
357
|
-
estimator = self._sklearn_object
|
358
|
-
assert estimator is not None # Keep mypy happy
|
359
|
-
|
360
|
-
self._snowpark_cols = dataset.select(self.input_cols).columns
|
361
|
-
|
362
|
-
self._sklearn_object = self._handlers.fit_snowpark(
|
363
|
-
dataset,
|
364
|
-
session,
|
365
|
-
estimator,
|
366
|
-
["snowflake-snowpark-python"] + self._get_dependencies(),
|
367
|
-
self.input_cols,
|
368
|
-
self.label_cols,
|
369
|
-
self.sample_weight_col,
|
370
|
-
)
|
371
|
-
|
372
375
|
def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
|
373
376
|
if self._drop_input_cols:
|
374
377
|
return []
|
@@ -556,11 +559,6 @@ class BayesianGaussianMixture(BaseTransformer):
|
|
556
559
|
subproject=_SUBPROJECT,
|
557
560
|
custom_tags=dict([("autogen", True)]),
|
558
561
|
)
|
559
|
-
@telemetry.add_stmt_params_to_df(
|
560
|
-
project=_PROJECT,
|
561
|
-
subproject=_SUBPROJECT,
|
562
|
-
custom_tags=dict([("autogen", True)]),
|
563
|
-
)
|
564
562
|
def predict(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[DataFrame, pd.DataFrame]:
|
565
563
|
"""Predict the labels for the data samples in X using trained model
|
566
564
|
For more details on this function, see [sklearn.mixture.BayesianGaussianMixture.predict]
|
@@ -614,11 +612,6 @@ class BayesianGaussianMixture(BaseTransformer):
|
|
614
612
|
subproject=_SUBPROJECT,
|
615
613
|
custom_tags=dict([("autogen", True)]),
|
616
614
|
)
|
617
|
-
@telemetry.add_stmt_params_to_df(
|
618
|
-
project=_PROJECT,
|
619
|
-
subproject=_SUBPROJECT,
|
620
|
-
custom_tags=dict([("autogen", True)]),
|
621
|
-
)
|
622
615
|
def transform(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[DataFrame, pd.DataFrame]:
|
623
616
|
"""Method not supported for this class.
|
624
617
|
|
@@ -677,7 +670,8 @@ class BayesianGaussianMixture(BaseTransformer):
|
|
677
670
|
if False:
|
678
671
|
self.fit(dataset)
|
679
672
|
assert self._sklearn_object is not None
|
680
|
-
|
673
|
+
labels : npt.NDArray[Any] = self._sklearn_object.labels_
|
674
|
+
return labels
|
681
675
|
else:
|
682
676
|
raise NotImplementedError
|
683
677
|
|
@@ -713,6 +707,7 @@ class BayesianGaussianMixture(BaseTransformer):
|
|
713
707
|
output_cols = []
|
714
708
|
|
715
709
|
# Make sure column names are valid snowflake identifiers.
|
710
|
+
assert output_cols is not None # Make MyPy happy
|
716
711
|
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
717
712
|
|
718
713
|
return rv
|
@@ -723,11 +718,6 @@ class BayesianGaussianMixture(BaseTransformer):
|
|
723
718
|
subproject=_SUBPROJECT,
|
724
719
|
custom_tags=dict([("autogen", True)]),
|
725
720
|
)
|
726
|
-
@telemetry.add_stmt_params_to_df(
|
727
|
-
project=_PROJECT,
|
728
|
-
subproject=_SUBPROJECT,
|
729
|
-
custom_tags=dict([("autogen", True)]),
|
730
|
-
)
|
731
721
|
def predict_proba(
|
732
722
|
self, dataset: Union[DataFrame, pd.DataFrame], output_cols_prefix: str = "predict_proba_"
|
733
723
|
) -> Union[DataFrame, pd.DataFrame]:
|
@@ -770,11 +760,6 @@ class BayesianGaussianMixture(BaseTransformer):
|
|
770
760
|
subproject=_SUBPROJECT,
|
771
761
|
custom_tags=dict([("autogen", True)]),
|
772
762
|
)
|
773
|
-
@telemetry.add_stmt_params_to_df(
|
774
|
-
project=_PROJECT,
|
775
|
-
subproject=_SUBPROJECT,
|
776
|
-
custom_tags=dict([("autogen", True)]),
|
777
|
-
)
|
778
763
|
def predict_log_proba(
|
779
764
|
self, dataset: Union[DataFrame, pd.DataFrame], output_cols_prefix: str = "predict_log_proba_"
|
780
765
|
) -> Union[DataFrame, pd.DataFrame]:
|
@@ -813,16 +798,6 @@ class BayesianGaussianMixture(BaseTransformer):
|
|
813
798
|
return output_df
|
814
799
|
|
815
800
|
@available_if(original_estimator_has_callable("decision_function")) # type: ignore[misc]
|
816
|
-
@telemetry.send_api_usage_telemetry(
|
817
|
-
project=_PROJECT,
|
818
|
-
subproject=_SUBPROJECT,
|
819
|
-
custom_tags=dict([("autogen", True)]),
|
820
|
-
)
|
821
|
-
@telemetry.add_stmt_params_to_df(
|
822
|
-
project=_PROJECT,
|
823
|
-
subproject=_SUBPROJECT,
|
824
|
-
custom_tags=dict([("autogen", True)]),
|
825
|
-
)
|
826
801
|
def decision_function(
|
827
802
|
self, dataset: Union[DataFrame, pd.DataFrame], output_cols_prefix: str = "decision_function_"
|
828
803
|
) -> Union[DataFrame, pd.DataFrame]:
|
@@ -923,11 +898,6 @@ class BayesianGaussianMixture(BaseTransformer):
|
|
923
898
|
subproject=_SUBPROJECT,
|
924
899
|
custom_tags=dict([("autogen", True)]),
|
925
900
|
)
|
926
|
-
@telemetry.add_stmt_params_to_df(
|
927
|
-
project=_PROJECT,
|
928
|
-
subproject=_SUBPROJECT,
|
929
|
-
custom_tags=dict([("autogen", True)]),
|
930
|
-
)
|
931
901
|
def kneighbors(
|
932
902
|
self,
|
933
903
|
dataset: Union[DataFrame, pd.DataFrame],
|
@@ -987,9 +957,9 @@ class BayesianGaussianMixture(BaseTransformer):
|
|
987
957
|
# For classifier, the type of predict is the same as the type of label
|
988
958
|
if self._sklearn_object._estimator_type == 'classifier':
|
989
959
|
# label columns is the desired type for output
|
990
|
-
outputs = _infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True)
|
960
|
+
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
991
961
|
# rename the output columns
|
992
|
-
outputs = model_signature_utils.rename_features(outputs, self.output_cols)
|
962
|
+
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
993
963
|
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
994
964
|
([] if self._drop_input_cols else inputs)
|
995
965
|
+ outputs)
|