snowflake-ml-python 1.1.0__py3-none-any.whl → 1.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/cortex/_complete.py +1 -1
- snowflake/cortex/_extract_answer.py +1 -1
- snowflake/cortex/_sentiment.py +1 -1
- snowflake/cortex/_summarize.py +1 -1
- snowflake/cortex/_translate.py +1 -1
- snowflake/ml/_internal/env_utils.py +68 -6
- snowflake/ml/_internal/file_utils.py +34 -4
- snowflake/ml/_internal/telemetry.py +79 -91
- snowflake/ml/_internal/utils/identifier.py +78 -72
- snowflake/ml/_internal/utils/retryable_http.py +16 -4
- snowflake/ml/_internal/utils/spcs_attribution_utils.py +122 -0
- snowflake/ml/dataset/dataset.py +1 -1
- snowflake/ml/model/_api.py +21 -14
- snowflake/ml/model/_client/model/model_impl.py +176 -0
- snowflake/ml/model/_client/model/model_method_info.py +19 -0
- snowflake/ml/model/_client/model/model_version_impl.py +291 -0
- snowflake/ml/model/_client/ops/metadata_ops.py +107 -0
- snowflake/ml/model/_client/ops/model_ops.py +308 -0
- snowflake/ml/model/_client/sql/model.py +75 -0
- snowflake/ml/model/_client/sql/model_version.py +213 -0
- snowflake/ml/model/_client/sql/stage.py +40 -0
- snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +3 -4
- snowflake/ml/model/_deploy_client/image_builds/templates/image_build_job_spec_template +24 -8
- snowflake/ml/model/_deploy_client/image_builds/templates/kaniko_shell_script_template +23 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +14 -2
- snowflake/ml/model/_deploy_client/utils/constants.py +1 -0
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +2 -2
- snowflake/ml/model/_model_composer/model_composer.py +31 -9
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +25 -10
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +2 -2
- snowflake/ml/model/_model_composer/model_method/infer_function.py_template +2 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +34 -3
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +1 -1
- snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +3 -1
- snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +10 -28
- snowflake/ml/model/_packager/model_meta/model_meta.py +18 -16
- snowflake/ml/model/_signatures/snowpark_handler.py +1 -1
- snowflake/ml/model/model_signature.py +108 -53
- snowflake/ml/model/type_hints.py +1 -0
- snowflake/ml/modeling/_internal/distributed_hpo_trainer.py +554 -0
- snowflake/ml/modeling/_internal/estimator_protocols.py +1 -60
- snowflake/ml/modeling/_internal/model_specifications.py +146 -0
- snowflake/ml/modeling/_internal/model_trainer.py +13 -0
- snowflake/ml/modeling/_internal/model_trainer_builder.py +78 -0
- snowflake/ml/modeling/_internal/pandas_trainer.py +54 -0
- snowflake/ml/modeling/_internal/snowpark_handlers.py +6 -760
- snowflake/ml/modeling/_internal/snowpark_trainer.py +331 -0
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +108 -135
- snowflake/ml/modeling/cluster/affinity_propagation.py +106 -135
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +106 -135
- snowflake/ml/modeling/cluster/birch.py +106 -135
- snowflake/ml/modeling/cluster/bisecting_k_means.py +106 -135
- snowflake/ml/modeling/cluster/dbscan.py +106 -135
- snowflake/ml/modeling/cluster/feature_agglomeration.py +106 -135
- snowflake/ml/modeling/cluster/k_means.py +105 -135
- snowflake/ml/modeling/cluster/mean_shift.py +106 -135
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +105 -135
- snowflake/ml/modeling/cluster/optics.py +106 -135
- snowflake/ml/modeling/cluster/spectral_biclustering.py +106 -135
- snowflake/ml/modeling/cluster/spectral_clustering.py +106 -135
- snowflake/ml/modeling/cluster/spectral_coclustering.py +106 -135
- snowflake/ml/modeling/compose/column_transformer.py +106 -135
- snowflake/ml/modeling/compose/transformed_target_regressor.py +108 -135
- snowflake/ml/modeling/covariance/elliptic_envelope.py +106 -135
- snowflake/ml/modeling/covariance/empirical_covariance.py +99 -128
- snowflake/ml/modeling/covariance/graphical_lasso.py +106 -135
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +106 -135
- snowflake/ml/modeling/covariance/ledoit_wolf.py +104 -133
- snowflake/ml/modeling/covariance/min_cov_det.py +106 -135
- snowflake/ml/modeling/covariance/oas.py +99 -128
- snowflake/ml/modeling/covariance/shrunk_covariance.py +103 -132
- snowflake/ml/modeling/decomposition/dictionary_learning.py +106 -135
- snowflake/ml/modeling/decomposition/factor_analysis.py +106 -135
- snowflake/ml/modeling/decomposition/fast_ica.py +106 -135
- snowflake/ml/modeling/decomposition/incremental_pca.py +106 -135
- snowflake/ml/modeling/decomposition/kernel_pca.py +106 -135
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +106 -135
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +106 -135
- snowflake/ml/modeling/decomposition/pca.py +106 -135
- snowflake/ml/modeling/decomposition/sparse_pca.py +106 -135
- snowflake/ml/modeling/decomposition/truncated_svd.py +106 -135
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +108 -135
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +108 -135
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +108 -135
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +108 -135
- snowflake/ml/modeling/ensemble/bagging_classifier.py +108 -135
- snowflake/ml/modeling/ensemble/bagging_regressor.py +108 -135
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +108 -135
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +108 -135
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +108 -135
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +108 -135
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +108 -135
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +108 -135
- snowflake/ml/modeling/ensemble/isolation_forest.py +106 -135
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +108 -135
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +108 -135
- snowflake/ml/modeling/ensemble/stacking_regressor.py +108 -135
- snowflake/ml/modeling/ensemble/voting_classifier.py +108 -135
- snowflake/ml/modeling/ensemble/voting_regressor.py +108 -135
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +101 -128
- snowflake/ml/modeling/feature_selection/select_fdr.py +99 -126
- snowflake/ml/modeling/feature_selection/select_fpr.py +99 -126
- snowflake/ml/modeling/feature_selection/select_fwe.py +99 -126
- snowflake/ml/modeling/feature_selection/select_k_best.py +100 -127
- snowflake/ml/modeling/feature_selection/select_percentile.py +99 -126
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +106 -135
- snowflake/ml/modeling/feature_selection/variance_threshold.py +95 -124
- snowflake/ml/modeling/framework/base.py +83 -1
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +108 -135
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +108 -135
- snowflake/ml/modeling/impute/iterative_imputer.py +106 -135
- snowflake/ml/modeling/impute/knn_imputer.py +106 -135
- snowflake/ml/modeling/impute/missing_indicator.py +106 -135
- snowflake/ml/modeling/impute/simple_imputer.py +9 -1
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +96 -125
- snowflake/ml/modeling/kernel_approximation/nystroem.py +106 -135
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +106 -135
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +105 -134
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +103 -132
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +108 -135
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +90 -118
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +90 -118
- snowflake/ml/modeling/linear_model/ard_regression.py +108 -135
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +108 -135
- snowflake/ml/modeling/linear_model/elastic_net.py +108 -135
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +108 -135
- snowflake/ml/modeling/linear_model/gamma_regressor.py +108 -135
- snowflake/ml/modeling/linear_model/huber_regressor.py +108 -135
- snowflake/ml/modeling/linear_model/lars.py +108 -135
- snowflake/ml/modeling/linear_model/lars_cv.py +108 -135
- snowflake/ml/modeling/linear_model/lasso.py +108 -135
- snowflake/ml/modeling/linear_model/lasso_cv.py +108 -135
- snowflake/ml/modeling/linear_model/lasso_lars.py +108 -135
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +108 -135
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +108 -135
- snowflake/ml/modeling/linear_model/linear_regression.py +108 -135
- snowflake/ml/modeling/linear_model/logistic_regression.py +108 -135
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +108 -135
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +108 -135
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +108 -135
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +108 -135
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +108 -135
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +108 -135
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +108 -135
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +107 -135
- snowflake/ml/modeling/linear_model/perceptron.py +107 -135
- snowflake/ml/modeling/linear_model/poisson_regressor.py +108 -135
- snowflake/ml/modeling/linear_model/ransac_regressor.py +108 -135
- snowflake/ml/modeling/linear_model/ridge.py +108 -135
- snowflake/ml/modeling/linear_model/ridge_classifier.py +108 -135
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +108 -135
- snowflake/ml/modeling/linear_model/ridge_cv.py +108 -135
- snowflake/ml/modeling/linear_model/sgd_classifier.py +108 -135
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +106 -135
- snowflake/ml/modeling/linear_model/sgd_regressor.py +108 -135
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +108 -135
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +108 -135
- snowflake/ml/modeling/manifold/isomap.py +106 -135
- snowflake/ml/modeling/manifold/mds.py +106 -135
- snowflake/ml/modeling/manifold/spectral_embedding.py +106 -135
- snowflake/ml/modeling/manifold/tsne.py +106 -135
- snowflake/ml/modeling/metrics/classification.py +196 -55
- snowflake/ml/modeling/metrics/correlation.py +4 -2
- snowflake/ml/modeling/metrics/covariance.py +7 -4
- snowflake/ml/modeling/metrics/ranking.py +32 -16
- snowflake/ml/modeling/metrics/regression.py +60 -32
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +106 -135
- snowflake/ml/modeling/mixture/gaussian_mixture.py +106 -135
- snowflake/ml/modeling/model_selection/grid_search_cv.py +91 -148
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +93 -154
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +105 -132
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +108 -135
- snowflake/ml/modeling/multiclass/output_code_classifier.py +108 -135
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +108 -135
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +108 -135
- snowflake/ml/modeling/naive_bayes/complement_nb.py +108 -135
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +98 -125
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +107 -134
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +108 -135
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +108 -135
- snowflake/ml/modeling/neighbors/kernel_density.py +106 -135
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +106 -135
- snowflake/ml/modeling/neighbors/nearest_centroid.py +108 -135
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +106 -135
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +108 -135
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +108 -135
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +108 -135
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +106 -135
- snowflake/ml/modeling/neural_network/mlp_classifier.py +108 -135
- snowflake/ml/modeling/neural_network/mlp_regressor.py +108 -135
- snowflake/ml/modeling/parameters/disable_distributed_hpo.py +2 -6
- snowflake/ml/modeling/preprocessing/binarizer.py +25 -8
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +9 -4
- snowflake/ml/modeling/preprocessing/label_encoder.py +31 -11
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +27 -9
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +42 -14
- snowflake/ml/modeling/preprocessing/normalizer.py +9 -4
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +26 -10
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +37 -13
- snowflake/ml/modeling/preprocessing/polynomial_features.py +106 -135
- snowflake/ml/modeling/preprocessing/robust_scaler.py +39 -13
- snowflake/ml/modeling/preprocessing/standard_scaler.py +36 -12
- snowflake/ml/modeling/semi_supervised/label_propagation.py +108 -135
- snowflake/ml/modeling/semi_supervised/label_spreading.py +108 -135
- snowflake/ml/modeling/svm/linear_svc.py +108 -135
- snowflake/ml/modeling/svm/linear_svr.py +108 -135
- snowflake/ml/modeling/svm/nu_svc.py +108 -135
- snowflake/ml/modeling/svm/nu_svr.py +108 -135
- snowflake/ml/modeling/svm/svc.py +108 -135
- snowflake/ml/modeling/svm/svr.py +108 -135
- snowflake/ml/modeling/tree/decision_tree_classifier.py +108 -135
- snowflake/ml/modeling/tree/decision_tree_regressor.py +108 -135
- snowflake/ml/modeling/tree/extra_tree_classifier.py +108 -135
- snowflake/ml/modeling/tree/extra_tree_regressor.py +108 -135
- snowflake/ml/modeling/xgboost/xgb_classifier.py +108 -136
- snowflake/ml/modeling/xgboost/xgb_regressor.py +108 -136
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +108 -136
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +108 -136
- snowflake/ml/registry/model_registry.py +2 -0
- snowflake/ml/registry/registry.py +215 -0
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.1.0.dist-info → snowflake_ml_python-1.1.2.dist-info}/METADATA +34 -1
- snowflake_ml_python-1.1.2.dist-info/RECORD +347 -0
- snowflake_ml_python-1.1.0.dist-info/RECORD +0 -331
- {snowflake_ml_python-1.1.0.dist-info → snowflake_ml_python-1.1.2.dist-info}/WHEEL +0 -0
@@ -22,17 +22,19 @@ from sklearn.utils.metaestimators import available_if
|
|
22
22
|
from snowflake.ml.modeling.framework.base import BaseTransformer, _process_cols
|
23
23
|
from snowflake.ml._internal import telemetry
|
24
24
|
from snowflake.ml._internal.exceptions import error_codes, exceptions, modeling_error_messages
|
25
|
+
from snowflake.ml._internal.env_utils import SNOWML_SPROC_ENV
|
25
26
|
from snowflake.ml._internal.utils import pkg_version_utils, identifier
|
26
|
-
from snowflake.snowpark import DataFrame
|
27
|
+
from snowflake.snowpark import DataFrame, Session
|
27
28
|
from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
|
28
29
|
from snowflake.ml.modeling._internal.snowpark_handlers import SnowparkHandlers as HandlersImpl
|
30
|
+
from snowflake.ml.modeling._internal.model_trainer_builder import ModelTrainerBuilder
|
31
|
+
from snowflake.ml.modeling._internal.model_trainer import ModelTrainer
|
29
32
|
from snowflake.ml.modeling._internal.estimator_utils import (
|
30
33
|
gather_dependencies,
|
31
34
|
original_estimator_has_callable,
|
32
35
|
transform_snowml_obj_to_sklearn_obj,
|
33
36
|
validate_sklearn_args,
|
34
37
|
)
|
35
|
-
from snowflake.ml.modeling._internal.snowpark_handlers import SklearnWrapperProvider
|
36
38
|
from snowflake.ml.modeling._internal.estimator_protocols import FitPredictHandlers
|
37
39
|
|
38
40
|
from snowflake.ml.model.model_signature import (
|
@@ -52,7 +54,6 @@ _PROJECT = "ModelDevelopment"
|
|
52
54
|
_SUBPROJECT = "".join([s.capitalize() for s in "sklearn.linear_model".replace("sklearn.", "").split("_")])
|
53
55
|
|
54
56
|
|
55
|
-
|
56
57
|
class BayesianRidge(BaseTransformer):
|
57
58
|
r"""Bayesian ridge regression
|
58
59
|
For more details on this class, see [sklearn.linear_model.BayesianRidge]
|
@@ -60,6 +61,51 @@ class BayesianRidge(BaseTransformer):
|
|
60
61
|
|
61
62
|
Parameters
|
62
63
|
----------
|
64
|
+
|
65
|
+
input_cols: Optional[Union[str, List[str]]]
|
66
|
+
A string or list of strings representing column names that contain features.
|
67
|
+
If this parameter is not specified, all columns in the input DataFrame except
|
68
|
+
the columns specified by label_cols, sample_weight_col, and passthrough_cols
|
69
|
+
parameters are considered input columns. Input columns can also be set after
|
70
|
+
initialization with the `set_input_cols` method.
|
71
|
+
|
72
|
+
label_cols: Optional[Union[str, List[str]]]
|
73
|
+
A string or list of strings representing column names that contain labels.
|
74
|
+
Label columns must be specified with this parameter during initialization
|
75
|
+
or with the `set_label_cols` method before fitting.
|
76
|
+
|
77
|
+
output_cols: Optional[Union[str, List[str]]]
|
78
|
+
A string or list of strings representing column names that will store the
|
79
|
+
output of predict and transform operations. The length of output_cols must
|
80
|
+
match the expected number of output columns from the specific predictor or
|
81
|
+
transformer class used.
|
82
|
+
If you omit this parameter, output column names are derived by adding an
|
83
|
+
OUTPUT_ prefix to the label column names for supervised estimators, or
|
84
|
+
OUTPUT_<IDX>for unsupervised estimators. These inferred output column names
|
85
|
+
work for predictors, but output_cols must be set explicitly for transformers.
|
86
|
+
In general, explicitly specifying output column names is clearer, especially
|
87
|
+
if you don’t specify the input column names.
|
88
|
+
To transform in place, pass the same names for input_cols and output_cols.
|
89
|
+
be set explicitly for transformers. Output columns can also be set after
|
90
|
+
initialization with the `set_output_cols` method.
|
91
|
+
|
92
|
+
sample_weight_col: Optional[str]
|
93
|
+
A string representing the column name containing the sample weights.
|
94
|
+
This argument is only required when working with weighted datasets. Sample
|
95
|
+
weight column can also be set after initialization with the
|
96
|
+
`set_sample_weight_col` method.
|
97
|
+
|
98
|
+
passthrough_cols: Optional[Union[str, List[str]]]
|
99
|
+
A string or a list of strings indicating column names to be excluded from any
|
100
|
+
operations (such as train, transform, or inference). These specified column(s)
|
101
|
+
will remain untouched throughout the process. This option is helpful in scenarios
|
102
|
+
requiring automatic input_cols inference, but need to avoid using specific
|
103
|
+
columns, like index columns, during training or inference. Passthrough columns
|
104
|
+
can also be set after initialization with the `set_passthrough_cols` method.
|
105
|
+
|
106
|
+
drop_input_cols: Optional[bool], default=False
|
107
|
+
If set, the response of predict(), transform() methods will not contain input columns.
|
108
|
+
|
63
109
|
max_iter: int, default=None
|
64
110
|
Maximum number of iterations over the complete dataset before
|
65
111
|
stopping independently of any early stopping criterion. If `None`, it
|
@@ -111,35 +157,6 @@ class BayesianRidge(BaseTransformer):
|
|
111
157
|
|
112
158
|
n_iter: int
|
113
159
|
Maximum number of iterations. Should be greater than or equal to 1.
|
114
|
-
|
115
|
-
input_cols: Optional[Union[str, List[str]]]
|
116
|
-
A string or list of strings representing column names that contain features.
|
117
|
-
If this parameter is not specified, all columns in the input DataFrame except
|
118
|
-
the columns specified by label_cols and sample_weight_col parameters are
|
119
|
-
considered input columns.
|
120
|
-
|
121
|
-
label_cols: Optional[Union[str, List[str]]]
|
122
|
-
A string or list of strings representing column names that contain labels.
|
123
|
-
This is a required param for estimators, as there is no way to infer these
|
124
|
-
columns. If this parameter is not specified, then object is fitted without
|
125
|
-
labels (like a transformer).
|
126
|
-
|
127
|
-
output_cols: Optional[Union[str, List[str]]]
|
128
|
-
A string or list of strings representing column names that will store the
|
129
|
-
output of predict and transform operations. The length of output_cols must
|
130
|
-
match the expected number of output columns from the specific estimator or
|
131
|
-
transformer class used.
|
132
|
-
If this parameter is not specified, output column names are derived by
|
133
|
-
adding an OUTPUT_ prefix to the label column names. These inferred output
|
134
|
-
column names work for estimator's predict() method, but output_cols must
|
135
|
-
be set explicitly for transformers.
|
136
|
-
|
137
|
-
sample_weight_col: Optional[str]
|
138
|
-
A string representing the column name containing the sample weights.
|
139
|
-
This argument is only required when working with weighted datasets.
|
140
|
-
|
141
|
-
drop_input_cols: Optional[bool], default=False
|
142
|
-
If set, the response of predict(), transform() methods will not contain input columns.
|
143
160
|
"""
|
144
161
|
|
145
162
|
def __init__( # type: ignore[no-untyped-def]
|
@@ -161,6 +178,7 @@ class BayesianRidge(BaseTransformer):
|
|
161
178
|
input_cols: Optional[Union[str, Iterable[str]]] = None,
|
162
179
|
output_cols: Optional[Union[str, Iterable[str]]] = None,
|
163
180
|
label_cols: Optional[Union[str, Iterable[str]]] = None,
|
181
|
+
passthrough_cols: Optional[Union[str, Iterable[str]]] = None,
|
164
182
|
drop_input_cols: Optional[bool] = False,
|
165
183
|
sample_weight_col: Optional[str] = None,
|
166
184
|
) -> None:
|
@@ -169,9 +187,10 @@ class BayesianRidge(BaseTransformer):
|
|
169
187
|
self.set_input_cols(input_cols)
|
170
188
|
self.set_output_cols(output_cols)
|
171
189
|
self.set_label_cols(label_cols)
|
190
|
+
self.set_passthrough_cols(passthrough_cols)
|
172
191
|
self.set_drop_input_cols(drop_input_cols)
|
173
192
|
self.set_sample_weight_col(sample_weight_col)
|
174
|
-
deps = set(
|
193
|
+
deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
|
175
194
|
|
176
195
|
self._deps = list(deps)
|
177
196
|
|
@@ -192,13 +211,14 @@ class BayesianRidge(BaseTransformer):
|
|
192
211
|
args=init_args,
|
193
212
|
klass=sklearn.linear_model.BayesianRidge
|
194
213
|
)
|
195
|
-
self._sklearn_object = sklearn.linear_model.BayesianRidge(
|
214
|
+
self._sklearn_object: Any = sklearn.linear_model.BayesianRidge(
|
196
215
|
**cleaned_up_init_args,
|
197
216
|
)
|
198
217
|
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
199
218
|
# If user used snowpark dataframe during fit, here it stores the snowpark input_cols, otherwise the processed input_cols
|
200
219
|
self._snowpark_cols: Optional[List[str]] = self.input_cols
|
201
|
-
self._handlers: FitPredictHandlers = HandlersImpl(class_name=BayesianRidge.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True
|
220
|
+
self._handlers: FitPredictHandlers = HandlersImpl(class_name=BayesianRidge.__class__.__name__, subproject=_SUBPROJECT, autogenerated=True)
|
221
|
+
self._autogenerated = True
|
202
222
|
|
203
223
|
def _get_rand_id(self) -> str:
|
204
224
|
"""
|
@@ -209,24 +229,6 @@ class BayesianRidge(BaseTransformer):
|
|
209
229
|
"""
|
210
230
|
return str(uuid4()).replace("-", "_").upper()
|
211
231
|
|
212
|
-
def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
213
|
-
"""
|
214
|
-
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
|
215
|
-
|
216
|
-
Args:
|
217
|
-
dataset: Input dataset.
|
218
|
-
"""
|
219
|
-
if not self.input_cols:
|
220
|
-
cols = [
|
221
|
-
c for c in dataset.columns
|
222
|
-
if c not in self.get_label_cols() and c != self.sample_weight_col
|
223
|
-
]
|
224
|
-
self.set_input_cols(input_cols=cols)
|
225
|
-
|
226
|
-
if not self.output_cols:
|
227
|
-
cols = [identifier.concat_names(ids=['OUTPUT_', c]) for c in self.label_cols]
|
228
|
-
self.set_output_cols(output_cols=cols)
|
229
|
-
|
230
232
|
def set_input_cols(self, input_cols: Optional[Union[str, Iterable[str]]]) -> "BayesianRidge":
|
231
233
|
"""
|
232
234
|
Input columns setter.
|
@@ -272,54 +274,48 @@ class BayesianRidge(BaseTransformer):
|
|
272
274
|
self
|
273
275
|
"""
|
274
276
|
self._infer_input_output_cols(dataset)
|
275
|
-
if isinstance(dataset,
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
self.
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
277
|
+
if isinstance(dataset, DataFrame):
|
278
|
+
session = dataset._session
|
279
|
+
assert session is not None # keep mypy happy
|
280
|
+
# Validate that key package version in user workspace are supported in snowflake conda channel
|
281
|
+
# If customer doesn't have package in conda channel, replace the ones have the closest versions
|
282
|
+
self._deps = pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
|
283
|
+
pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT)
|
284
|
+
|
285
|
+
# Specify input columns so column pruning will be enforced
|
286
|
+
selected_cols = self._get_active_columns()
|
287
|
+
if len(selected_cols) > 0:
|
288
|
+
dataset = dataset.select(selected_cols)
|
289
|
+
|
290
|
+
self._snowpark_cols = dataset.select(self.input_cols).columns
|
291
|
+
|
292
|
+
# If we are already in a stored procedure, no need to kick off another one.
|
293
|
+
if SNOWML_SPROC_ENV in os.environ:
|
294
|
+
statement_params = telemetry.get_function_usage_statement_params(
|
295
|
+
project=_PROJECT,
|
296
|
+
subproject=_SUBPROJECT,
|
297
|
+
function_name=telemetry.get_statement_params_full_func_name(inspect.currentframe(), BayesianRidge.__class__.__name__),
|
298
|
+
api_calls=[Session.call],
|
299
|
+
custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
|
300
|
+
)
|
301
|
+
pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
|
302
|
+
pd_df.columns = dataset.columns
|
303
|
+
dataset = pd_df
|
304
|
+
|
305
|
+
model_trainer = ModelTrainerBuilder.build(
|
306
|
+
estimator=self._sklearn_object,
|
307
|
+
dataset=dataset,
|
308
|
+
input_cols=self.input_cols,
|
309
|
+
label_cols=self.label_cols,
|
310
|
+
sample_weight_col=self.sample_weight_col,
|
311
|
+
autogenerated=self._autogenerated,
|
312
|
+
subproject=_SUBPROJECT
|
313
|
+
)
|
314
|
+
self._sklearn_object = model_trainer.train()
|
291
315
|
self._is_fitted = True
|
292
316
|
self._get_model_signatures(dataset)
|
293
317
|
return self
|
294
318
|
|
295
|
-
def _fit_snowpark(self, dataset: DataFrame) -> None:
|
296
|
-
session = dataset._session
|
297
|
-
assert session is not None # keep mypy happy
|
298
|
-
# Validate that key package version in user workspace are supported in snowflake conda channel
|
299
|
-
# If customer doesn't have package in conda channel, replace the ones have the closest versions
|
300
|
-
self._deps = pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
|
301
|
-
pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT)
|
302
|
-
|
303
|
-
# Specify input columns so column pruning will be enforced
|
304
|
-
selected_cols = self._get_active_columns()
|
305
|
-
if len(selected_cols) > 0:
|
306
|
-
dataset = dataset.select(selected_cols)
|
307
|
-
|
308
|
-
estimator = self._sklearn_object
|
309
|
-
assert estimator is not None # Keep mypy happy
|
310
|
-
|
311
|
-
self._snowpark_cols = dataset.select(self.input_cols).columns
|
312
|
-
|
313
|
-
self._sklearn_object = self._handlers.fit_snowpark(
|
314
|
-
dataset,
|
315
|
-
session,
|
316
|
-
estimator,
|
317
|
-
["snowflake-snowpark-python"] + self._get_dependencies(),
|
318
|
-
self.input_cols,
|
319
|
-
self.label_cols,
|
320
|
-
self.sample_weight_col,
|
321
|
-
)
|
322
|
-
|
323
319
|
def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
|
324
320
|
if self._drop_input_cols:
|
325
321
|
return []
|
@@ -507,11 +503,6 @@ class BayesianRidge(BaseTransformer):
|
|
507
503
|
subproject=_SUBPROJECT,
|
508
504
|
custom_tags=dict([("autogen", True)]),
|
509
505
|
)
|
510
|
-
@telemetry.add_stmt_params_to_df(
|
511
|
-
project=_PROJECT,
|
512
|
-
subproject=_SUBPROJECT,
|
513
|
-
custom_tags=dict([("autogen", True)]),
|
514
|
-
)
|
515
506
|
def predict(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[DataFrame, pd.DataFrame]:
|
516
507
|
"""Predict using the linear model
|
517
508
|
For more details on this function, see [sklearn.linear_model.BayesianRidge.predict]
|
@@ -565,11 +556,6 @@ class BayesianRidge(BaseTransformer):
|
|
565
556
|
subproject=_SUBPROJECT,
|
566
557
|
custom_tags=dict([("autogen", True)]),
|
567
558
|
)
|
568
|
-
@telemetry.add_stmt_params_to_df(
|
569
|
-
project=_PROJECT,
|
570
|
-
subproject=_SUBPROJECT,
|
571
|
-
custom_tags=dict([("autogen", True)]),
|
572
|
-
)
|
573
559
|
def transform(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[DataFrame, pd.DataFrame]:
|
574
560
|
"""Method not supported for this class.
|
575
561
|
|
@@ -626,7 +612,8 @@ class BayesianRidge(BaseTransformer):
|
|
626
612
|
if False:
|
627
613
|
self.fit(dataset)
|
628
614
|
assert self._sklearn_object is not None
|
629
|
-
|
615
|
+
labels : npt.NDArray[Any] = self._sklearn_object.labels_
|
616
|
+
return labels
|
630
617
|
else:
|
631
618
|
raise NotImplementedError
|
632
619
|
|
@@ -662,6 +649,7 @@ class BayesianRidge(BaseTransformer):
|
|
662
649
|
output_cols = []
|
663
650
|
|
664
651
|
# Make sure column names are valid snowflake identifiers.
|
652
|
+
assert output_cols is not None # Make MyPy happy
|
665
653
|
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
666
654
|
|
667
655
|
return rv
|
@@ -672,11 +660,6 @@ class BayesianRidge(BaseTransformer):
|
|
672
660
|
subproject=_SUBPROJECT,
|
673
661
|
custom_tags=dict([("autogen", True)]),
|
674
662
|
)
|
675
|
-
@telemetry.add_stmt_params_to_df(
|
676
|
-
project=_PROJECT,
|
677
|
-
subproject=_SUBPROJECT,
|
678
|
-
custom_tags=dict([("autogen", True)]),
|
679
|
-
)
|
680
663
|
def predict_proba(
|
681
664
|
self, dataset: Union[DataFrame, pd.DataFrame], output_cols_prefix: str = "predict_proba_"
|
682
665
|
) -> Union[DataFrame, pd.DataFrame]:
|
@@ -717,11 +700,6 @@ class BayesianRidge(BaseTransformer):
|
|
717
700
|
subproject=_SUBPROJECT,
|
718
701
|
custom_tags=dict([("autogen", True)]),
|
719
702
|
)
|
720
|
-
@telemetry.add_stmt_params_to_df(
|
721
|
-
project=_PROJECT,
|
722
|
-
subproject=_SUBPROJECT,
|
723
|
-
custom_tags=dict([("autogen", True)]),
|
724
|
-
)
|
725
703
|
def predict_log_proba(
|
726
704
|
self, dataset: Union[DataFrame, pd.DataFrame], output_cols_prefix: str = "predict_log_proba_"
|
727
705
|
) -> Union[DataFrame, pd.DataFrame]:
|
@@ -758,16 +736,6 @@ class BayesianRidge(BaseTransformer):
|
|
758
736
|
return output_df
|
759
737
|
|
760
738
|
@available_if(original_estimator_has_callable("decision_function")) # type: ignore[misc]
|
761
|
-
@telemetry.send_api_usage_telemetry(
|
762
|
-
project=_PROJECT,
|
763
|
-
subproject=_SUBPROJECT,
|
764
|
-
custom_tags=dict([("autogen", True)]),
|
765
|
-
)
|
766
|
-
@telemetry.add_stmt_params_to_df(
|
767
|
-
project=_PROJECT,
|
768
|
-
subproject=_SUBPROJECT,
|
769
|
-
custom_tags=dict([("autogen", True)]),
|
770
|
-
)
|
771
739
|
def decision_function(
|
772
740
|
self, dataset: Union[DataFrame, pd.DataFrame], output_cols_prefix: str = "decision_function_"
|
773
741
|
) -> Union[DataFrame, pd.DataFrame]:
|
@@ -868,11 +836,6 @@ class BayesianRidge(BaseTransformer):
|
|
868
836
|
subproject=_SUBPROJECT,
|
869
837
|
custom_tags=dict([("autogen", True)]),
|
870
838
|
)
|
871
|
-
@telemetry.add_stmt_params_to_df(
|
872
|
-
project=_PROJECT,
|
873
|
-
subproject=_SUBPROJECT,
|
874
|
-
custom_tags=dict([("autogen", True)]),
|
875
|
-
)
|
876
839
|
def kneighbors(
|
877
840
|
self,
|
878
841
|
dataset: Union[DataFrame, pd.DataFrame],
|
@@ -932,18 +895,28 @@ class BayesianRidge(BaseTransformer):
|
|
932
895
|
# For classifier, the type of predict is the same as the type of label
|
933
896
|
if self._sklearn_object._estimator_type == 'classifier':
|
934
897
|
# label columns is the desired type for output
|
935
|
-
outputs = _infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True)
|
898
|
+
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
936
899
|
# rename the output columns
|
937
|
-
outputs = model_signature_utils.rename_features(outputs, self.output_cols)
|
900
|
+
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
938
901
|
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
939
902
|
([] if self._drop_input_cols else inputs)
|
940
903
|
+ outputs)
|
904
|
+
# For mixture models that use the density mixin, `predict` returns the argmax of the log prob.
|
905
|
+
# For outlier models, returns -1 for outliers and 1 for inliers.
|
906
|
+
# Clusterer returns int64 cluster labels.
|
907
|
+
elif self._sklearn_object._estimator_type in ["DensityEstimator", "clusterer", "outlier_detector"]:
|
908
|
+
outputs = [FeatureSpec(dtype=DataType.INT64, name=c) for c in self.output_cols]
|
909
|
+
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
910
|
+
([] if self._drop_input_cols else inputs)
|
911
|
+
+ outputs)
|
912
|
+
|
941
913
|
# For regressor, the type of predict is float64
|
942
914
|
elif self._sklearn_object._estimator_type == 'regressor':
|
943
915
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
944
916
|
self._model_signature_dict["predict"] = ModelSignature(inputs,
|
945
917
|
([] if self._drop_input_cols else inputs)
|
946
918
|
+ outputs)
|
919
|
+
|
947
920
|
for prob_func in PROB_FUNCTIONS:
|
948
921
|
if hasattr(self, prob_func):
|
949
922
|
output_cols_prefix: str = f"{prob_func}_"
|