snowflake-ml-python 1.3.1__py3-none-any.whl → 1.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +11 -1
- snowflake/ml/_internal/human_readable_id/adjectives.txt +128 -0
- snowflake/ml/_internal/human_readable_id/animals.txt +128 -0
- snowflake/ml/_internal/human_readable_id/hrid_generator.py +40 -0
- snowflake/ml/_internal/human_readable_id/hrid_generator_base.py +135 -0
- snowflake/ml/_internal/utils/formatting.py +1 -1
- snowflake/ml/_internal/utils/identifier.py +3 -1
- snowflake/ml/_internal/utils/sql_identifier.py +2 -6
- snowflake/ml/feature_store/feature_store.py +166 -184
- snowflake/ml/feature_store/feature_view.py +12 -24
- snowflake/ml/fileset/sfcfs.py +56 -50
- snowflake/ml/fileset/stage_fs.py +48 -13
- snowflake/ml/model/_client/model/model_version_impl.py +6 -49
- snowflake/ml/model/_client/ops/model_ops.py +78 -29
- snowflake/ml/model/_client/sql/model.py +23 -2
- snowflake/ml/model/_client/sql/model_version.py +22 -1
- snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +1 -3
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +5 -2
- snowflake/ml/model/_model_composer/model_composer.py +7 -5
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +19 -54
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +8 -1
- snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +1 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +6 -10
- snowflake/ml/model/_packager/model_handlers/catboost.py +206 -0
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +218 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +3 -0
- snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +13 -1
- snowflake/ml/model/_packager/model_handlers/xgboost.py +1 -1
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_meta.py +36 -6
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +20 -1
- snowflake/ml/model/_packager/model_meta_migrator/migrator_plans.py +3 -1
- snowflake/ml/model/_packager/model_packager.py +2 -2
- snowflake/ml/model/{_model_composer/model_runtime/_runtime_requirements.py → _packager/model_runtime/_snowml_inference_alternative_requirements.py} +1 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +137 -0
- snowflake/ml/model/custom_model.py +3 -1
- snowflake/ml/model/type_hints.py +21 -2
- snowflake/ml/modeling/_internal/estimator_utils.py +16 -11
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -1
- snowflake/ml/modeling/_internal/model_specifications.py +3 -1
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +545 -0
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +8 -5
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +195 -123
- snowflake/ml/modeling/cluster/affinity_propagation.py +195 -123
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +195 -123
- snowflake/ml/modeling/cluster/birch.py +195 -123
- snowflake/ml/modeling/cluster/bisecting_k_means.py +195 -123
- snowflake/ml/modeling/cluster/dbscan.py +195 -123
- snowflake/ml/modeling/cluster/feature_agglomeration.py +195 -123
- snowflake/ml/modeling/cluster/k_means.py +195 -123
- snowflake/ml/modeling/cluster/mean_shift.py +195 -123
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +195 -123
- snowflake/ml/modeling/cluster/optics.py +195 -123
- snowflake/ml/modeling/cluster/spectral_biclustering.py +195 -123
- snowflake/ml/modeling/cluster/spectral_clustering.py +195 -123
- snowflake/ml/modeling/cluster/spectral_coclustering.py +195 -123
- snowflake/ml/modeling/compose/column_transformer.py +195 -123
- snowflake/ml/modeling/compose/transformed_target_regressor.py +195 -123
- snowflake/ml/modeling/covariance/elliptic_envelope.py +195 -123
- snowflake/ml/modeling/covariance/empirical_covariance.py +195 -123
- snowflake/ml/modeling/covariance/graphical_lasso.py +195 -123
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +195 -123
- snowflake/ml/modeling/covariance/ledoit_wolf.py +195 -123
- snowflake/ml/modeling/covariance/min_cov_det.py +195 -123
- snowflake/ml/modeling/covariance/oas.py +195 -123
- snowflake/ml/modeling/covariance/shrunk_covariance.py +195 -123
- snowflake/ml/modeling/decomposition/dictionary_learning.py +195 -123
- snowflake/ml/modeling/decomposition/factor_analysis.py +195 -123
- snowflake/ml/modeling/decomposition/fast_ica.py +195 -123
- snowflake/ml/modeling/decomposition/incremental_pca.py +195 -123
- snowflake/ml/modeling/decomposition/kernel_pca.py +195 -123
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +195 -123
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +195 -123
- snowflake/ml/modeling/decomposition/pca.py +195 -123
- snowflake/ml/modeling/decomposition/sparse_pca.py +195 -123
- snowflake/ml/modeling/decomposition/truncated_svd.py +195 -123
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +195 -123
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +195 -123
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/bagging_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/bagging_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/isolation_forest.py +195 -123
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/stacking_regressor.py +195 -123
- snowflake/ml/modeling/ensemble/voting_classifier.py +195 -123
- snowflake/ml/modeling/ensemble/voting_regressor.py +195 -123
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +195 -123
- snowflake/ml/modeling/feature_selection/select_fdr.py +195 -123
- snowflake/ml/modeling/feature_selection/select_fpr.py +195 -123
- snowflake/ml/modeling/feature_selection/select_fwe.py +195 -123
- snowflake/ml/modeling/feature_selection/select_k_best.py +195 -123
- snowflake/ml/modeling/feature_selection/select_percentile.py +195 -123
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +195 -123
- snowflake/ml/modeling/feature_selection/variance_threshold.py +195 -123
- snowflake/ml/modeling/framework/_utils.py +8 -1
- snowflake/ml/modeling/framework/base.py +24 -6
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +195 -123
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +195 -123
- snowflake/ml/modeling/impute/iterative_imputer.py +195 -123
- snowflake/ml/modeling/impute/knn_imputer.py +195 -123
- snowflake/ml/modeling/impute/missing_indicator.py +195 -123
- snowflake/ml/modeling/impute/simple_imputer.py +4 -15
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +195 -123
- snowflake/ml/modeling/kernel_approximation/nystroem.py +195 -123
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +195 -123
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +195 -123
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +195 -123
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +195 -123
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +198 -125
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +198 -125
- snowflake/ml/modeling/linear_model/ard_regression.py +195 -123
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +195 -123
- snowflake/ml/modeling/linear_model/elastic_net.py +195 -123
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +195 -123
- snowflake/ml/modeling/linear_model/gamma_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/huber_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/lars.py +195 -123
- snowflake/ml/modeling/linear_model/lars_cv.py +195 -123
- snowflake/ml/modeling/linear_model/lasso.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_cv.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_lars.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +195 -123
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +195 -123
- snowflake/ml/modeling/linear_model/linear_regression.py +195 -123
- snowflake/ml/modeling/linear_model/logistic_regression.py +195 -123
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +195 -123
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +195 -123
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +195 -123
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +195 -123
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/perceptron.py +195 -123
- snowflake/ml/modeling/linear_model/poisson_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/ransac_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/ridge.py +195 -123
- snowflake/ml/modeling/linear_model/ridge_classifier.py +195 -123
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +195 -123
- snowflake/ml/modeling/linear_model/ridge_cv.py +195 -123
- snowflake/ml/modeling/linear_model/sgd_classifier.py +195 -123
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +195 -123
- snowflake/ml/modeling/linear_model/sgd_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +195 -123
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +195 -123
- snowflake/ml/modeling/manifold/isomap.py +195 -123
- snowflake/ml/modeling/manifold/mds.py +195 -123
- snowflake/ml/modeling/manifold/spectral_embedding.py +195 -123
- snowflake/ml/modeling/manifold/tsne.py +195 -123
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +195 -123
- snowflake/ml/modeling/mixture/gaussian_mixture.py +195 -123
- snowflake/ml/modeling/model_selection/grid_search_cv.py +42 -18
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +42 -18
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +195 -123
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +195 -123
- snowflake/ml/modeling/multiclass/output_code_classifier.py +195 -123
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/complement_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +195 -123
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +195 -123
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +195 -123
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +195 -123
- snowflake/ml/modeling/neighbors/kernel_density.py +195 -123
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +195 -123
- snowflake/ml/modeling/neighbors/nearest_centroid.py +195 -123
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +195 -123
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +195 -123
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +195 -123
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +195 -123
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +195 -123
- snowflake/ml/modeling/neural_network/mlp_classifier.py +195 -123
- snowflake/ml/modeling/neural_network/mlp_regressor.py +195 -123
- snowflake/ml/modeling/pipeline/pipeline.py +4 -4
- snowflake/ml/modeling/preprocessing/binarizer.py +1 -5
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +1 -5
- snowflake/ml/modeling/preprocessing/label_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +10 -12
- snowflake/ml/modeling/preprocessing/normalizer.py +1 -5
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/polynomial_features.py +195 -123
- snowflake/ml/modeling/preprocessing/robust_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/standard_scaler.py +11 -11
- snowflake/ml/modeling/semi_supervised/label_propagation.py +195 -123
- snowflake/ml/modeling/semi_supervised/label_spreading.py +195 -123
- snowflake/ml/modeling/svm/linear_svc.py +195 -123
- snowflake/ml/modeling/svm/linear_svr.py +195 -123
- snowflake/ml/modeling/svm/nu_svc.py +195 -123
- snowflake/ml/modeling/svm/nu_svr.py +195 -123
- snowflake/ml/modeling/svm/svc.py +195 -123
- snowflake/ml/modeling/svm/svr.py +195 -123
- snowflake/ml/modeling/tree/decision_tree_classifier.py +195 -123
- snowflake/ml/modeling/tree/decision_tree_regressor.py +195 -123
- snowflake/ml/modeling/tree/extra_tree_classifier.py +195 -123
- snowflake/ml/modeling/tree/extra_tree_regressor.py +195 -123
- snowflake/ml/modeling/xgboost/xgb_classifier.py +195 -123
- snowflake/ml/modeling/xgboost/xgb_regressor.py +195 -123
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +195 -123
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +195 -123
- snowflake/ml/registry/_manager/model_manager.py +5 -1
- snowflake/ml/registry/model_registry.py +99 -26
- snowflake/ml/registry/registry.py +3 -2
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.3.1.dist-info → snowflake_ml_python-1.4.1.dist-info}/METADATA +94 -55
- {snowflake_ml_python-1.3.1.dist-info → snowflake_ml_python-1.4.1.dist-info}/RECORD +218 -212
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -97
- {snowflake_ml_python-1.3.1.dist-info → snowflake_ml_python-1.4.1.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.3.1.dist-info → snowflake_ml_python-1.4.1.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.3.1.dist-info → snowflake_ml_python-1.4.1.dist-info}/top_level.txt +0 -0
@@ -138,11 +138,7 @@ class MinMaxScaler(base.BaseTransformer):
|
|
138
138
|
),
|
139
139
|
)
|
140
140
|
|
141
|
-
|
142
|
-
project=base.PROJECT,
|
143
|
-
subproject=base.SUBPROJECT,
|
144
|
-
)
|
145
|
-
def fit(self, dataset: Union[snowpark.DataFrame, pd.DataFrame]) -> "MinMaxScaler":
|
141
|
+
def _fit(self, dataset: Union[snowpark.DataFrame, pd.DataFrame]) -> "MinMaxScaler":
|
146
142
|
"""
|
147
143
|
Compute min and max values of the dataset.
|
148
144
|
|
@@ -175,11 +171,13 @@ class MinMaxScaler(base.BaseTransformer):
|
|
175
171
|
sklearn_scaler.fit(dataset[self.input_cols])
|
176
172
|
|
177
173
|
for i, input_col in enumerate(self.input_cols):
|
178
|
-
self.min_[input_col] =
|
179
|
-
self.scale_[input_col] =
|
180
|
-
self.data_min_[input_col] =
|
181
|
-
self.data_max_[input_col] =
|
182
|
-
self.data_range_[input_col] =
|
174
|
+
self.min_[input_col] = _utils.to_float_if_valid(sklearn_scaler.min_[i], input_col, "min_")
|
175
|
+
self.scale_[input_col] = _utils.to_float_if_valid(sklearn_scaler.scale_[i], input_col, "scale_")
|
176
|
+
self.data_min_[input_col] = _utils.to_float_if_valid(sklearn_scaler.data_min_[i], input_col, "data_min_")
|
177
|
+
self.data_max_[input_col] = _utils.to_float_if_valid(sklearn_scaler.data_max_[i], input_col, "data_max_")
|
178
|
+
self.data_range_[input_col] = _utils.to_float_if_valid(
|
179
|
+
sklearn_scaler.data_range_[i], input_col, "data_range_"
|
180
|
+
)
|
183
181
|
|
184
182
|
def _fit_snowpark(self, dataset: snowpark.DataFrame) -> None:
|
185
183
|
self._check_input_column_types(dataset)
|
@@ -189,8 +187,8 @@ class MinMaxScaler(base.BaseTransformer):
|
|
189
187
|
for input_col in self.input_cols:
|
190
188
|
numeric_stats = computed_states[input_col]
|
191
189
|
|
192
|
-
data_min =
|
193
|
-
data_max =
|
190
|
+
data_min = _utils.to_float_if_valid(numeric_stats[_utils.NumericStatistics.MIN], input_col, "data_min_")
|
191
|
+
data_max = _utils.to_float_if_valid(numeric_stats[_utils.NumericStatistics.MAX], input_col, "data_max_")
|
194
192
|
data_range = data_max - data_min
|
195
193
|
self.scale_[input_col] = (
|
196
194
|
self.feature_range[1] - self.feature_range[0]
|
@@ -70,11 +70,7 @@ class Normalizer(base.BaseTransformer):
|
|
70
70
|
"""
|
71
71
|
pass
|
72
72
|
|
73
|
-
|
74
|
-
project=base.PROJECT,
|
75
|
-
subproject=base.SUBPROJECT,
|
76
|
-
)
|
77
|
-
def fit(self, dataset: Union[snowpark.DataFrame, pd.DataFrame]) -> "Normalizer":
|
73
|
+
def _fit(self, dataset: Union[snowpark.DataFrame, pd.DataFrame]) -> "Normalizer":
|
78
74
|
"""
|
79
75
|
Does nothing, because the normalizer is a stateless transformer.
|
80
76
|
|
@@ -267,11 +267,7 @@ class OneHotEncoder(base.BaseTransformer):
|
|
267
267
|
if hasattr(self, "_state_pandas"):
|
268
268
|
del self._state_pandas
|
269
269
|
|
270
|
-
|
271
|
-
project=base.PROJECT,
|
272
|
-
subproject=base.SUBPROJECT,
|
273
|
-
)
|
274
|
-
def fit(self, dataset: Union[snowpark.DataFrame, pd.DataFrame]) -> "OneHotEncoder":
|
270
|
+
def _fit(self, dataset: Union[snowpark.DataFrame, pd.DataFrame]) -> "OneHotEncoder":
|
275
271
|
"""
|
276
272
|
Fit OneHotEncoder to dataset.
|
277
273
|
|
@@ -171,11 +171,7 @@ class OrdinalEncoder(base.BaseTransformer):
|
|
171
171
|
if hasattr(self, "_state_pandas"):
|
172
172
|
del self._state_pandas
|
173
173
|
|
174
|
-
|
175
|
-
project=base.PROJECT,
|
176
|
-
subproject=base.SUBPROJECT,
|
177
|
-
)
|
178
|
-
def fit(self, dataset: Union[snowpark.DataFrame, pd.DataFrame]) -> "OrdinalEncoder":
|
174
|
+
def _fit(self, dataset: Union[snowpark.DataFrame, pd.DataFrame]) -> "OrdinalEncoder":
|
179
175
|
"""
|
180
176
|
Fit the OrdinalEncoder to dataset.
|
181
177
|
|
@@ -33,6 +33,15 @@ from snowflake.ml.modeling._internal.transformer_protocols import (
|
|
33
33
|
BatchInferenceKwargsTypedDict,
|
34
34
|
ScoreKwargsTypedDict
|
35
35
|
)
|
36
|
+
from snowflake.ml.model._signatures import utils as model_signature_utils
|
37
|
+
from snowflake.ml.model.model_signature import (
|
38
|
+
BaseFeatureSpec,
|
39
|
+
DataType,
|
40
|
+
FeatureSpec,
|
41
|
+
ModelSignature,
|
42
|
+
_infer_signature,
|
43
|
+
_rename_signature_with_snowflake_identifiers,
|
44
|
+
)
|
36
45
|
|
37
46
|
from snowflake.ml.modeling._internal.model_transformer_builder import ModelTransformerBuilder
|
38
47
|
|
@@ -43,16 +52,6 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
43
52
|
validate_sklearn_args,
|
44
53
|
)
|
45
54
|
|
46
|
-
from snowflake.ml.model.model_signature import (
|
47
|
-
DataType,
|
48
|
-
FeatureSpec,
|
49
|
-
ModelSignature,
|
50
|
-
_infer_signature,
|
51
|
-
_rename_signature_with_snowflake_identifiers,
|
52
|
-
BaseFeatureSpec,
|
53
|
-
)
|
54
|
-
from snowflake.ml.model._signatures import utils as model_signature_utils
|
55
|
-
|
56
55
|
_PROJECT = "ModelDevelopment"
|
57
56
|
# Derive subproject from module name by removing "sklearn"
|
58
57
|
# and converting module name from underscore to CamelCase
|
@@ -222,12 +221,7 @@ class PolynomialFeatures(BaseTransformer):
|
|
222
221
|
)
|
223
222
|
return selected_cols
|
224
223
|
|
225
|
-
|
226
|
-
project=_PROJECT,
|
227
|
-
subproject=_SUBPROJECT,
|
228
|
-
custom_tags=dict([("autogen", True)]),
|
229
|
-
)
|
230
|
-
def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "PolynomialFeatures":
|
224
|
+
def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "PolynomialFeatures":
|
231
225
|
"""Compute number of output features
|
232
226
|
For more details on this function, see [sklearn.preprocessing.PolynomialFeatures.fit]
|
233
227
|
(https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.PolynomialFeatures.html#sklearn.preprocessing.PolynomialFeatures.fit)
|
@@ -254,12 +248,14 @@ class PolynomialFeatures(BaseTransformer):
|
|
254
248
|
|
255
249
|
self._snowpark_cols = dataset.select(self.input_cols).columns
|
256
250
|
|
257
|
-
|
251
|
+
# If we are already in a stored procedure, no need to kick off another one.
|
258
252
|
if SNOWML_SPROC_ENV in os.environ:
|
259
253
|
statement_params = telemetry.get_function_usage_statement_params(
|
260
254
|
project=_PROJECT,
|
261
255
|
subproject=_SUBPROJECT,
|
262
|
-
function_name=telemetry.get_statement_params_full_func_name(
|
256
|
+
function_name=telemetry.get_statement_params_full_func_name(
|
257
|
+
inspect.currentframe(), PolynomialFeatures.__class__.__name__
|
258
|
+
),
|
263
259
|
api_calls=[Session.call],
|
264
260
|
custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
|
265
261
|
)
|
@@ -280,7 +276,7 @@ class PolynomialFeatures(BaseTransformer):
|
|
280
276
|
)
|
281
277
|
self._sklearn_object = model_trainer.train()
|
282
278
|
self._is_fitted = True
|
283
|
-
self.
|
279
|
+
self._generate_model_signatures(dataset)
|
284
280
|
return self
|
285
281
|
|
286
282
|
def _batch_inference_validate_snowpark(
|
@@ -354,7 +350,9 @@ class PolynomialFeatures(BaseTransformer):
|
|
354
350
|
# when it is classifier, infer the datatype from label columns
|
355
351
|
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
356
352
|
# Batch inference takes a single expected output column type. Use the first columns type for now.
|
357
|
-
label_cols_signatures = [
|
353
|
+
label_cols_signatures = [
|
354
|
+
row for row in self.model_signatures['predict'].outputs if row.name in self.output_cols
|
355
|
+
]
|
358
356
|
if len(label_cols_signatures) == 0:
|
359
357
|
error_str = f"Output columns {self.output_cols} do not match model signatures {self.model_signatures['predict'].outputs}."
|
360
358
|
raise exceptions.SnowflakeMLException(
|
@@ -362,25 +360,22 @@ class PolynomialFeatures(BaseTransformer):
|
|
362
360
|
original_exception=ValueError(error_str),
|
363
361
|
)
|
364
362
|
|
365
|
-
expected_type_inferred = convert_sp_to_sf_type(
|
366
|
-
label_cols_signatures[0].as_snowpark_type()
|
367
|
-
)
|
363
|
+
expected_type_inferred = convert_sp_to_sf_type(label_cols_signatures[0].as_snowpark_type())
|
368
364
|
|
369
365
|
self._deps = self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
370
|
-
assert isinstance(
|
366
|
+
assert isinstance(
|
367
|
+
dataset._session, Session
|
368
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
371
369
|
|
372
370
|
transform_kwargs = dict(
|
373
|
-
session
|
374
|
-
dependencies
|
375
|
-
drop_input_cols
|
376
|
-
expected_output_cols_type
|
371
|
+
session=dataset._session,
|
372
|
+
dependencies=self._deps,
|
373
|
+
drop_input_cols=self._drop_input_cols,
|
374
|
+
expected_output_cols_type=expected_type_inferred,
|
377
375
|
)
|
378
376
|
|
379
377
|
elif isinstance(dataset, pd.DataFrame):
|
380
|
-
transform_kwargs = dict(
|
381
|
-
snowpark_input_cols = self._snowpark_cols,
|
382
|
-
drop_input_cols = self._drop_input_cols
|
383
|
-
)
|
378
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
384
379
|
|
385
380
|
transform_handlers = ModelTransformerBuilder.build(
|
386
381
|
dataset=dataset,
|
@@ -422,7 +417,7 @@ class PolynomialFeatures(BaseTransformer):
|
|
422
417
|
Transformed dataset.
|
423
418
|
"""
|
424
419
|
super()._check_dataset_type(dataset)
|
425
|
-
inference_method="transform"
|
420
|
+
inference_method = "transform"
|
426
421
|
|
427
422
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
428
423
|
# are specific to the type of dataset used.
|
@@ -459,17 +454,14 @@ class PolynomialFeatures(BaseTransformer):
|
|
459
454
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
460
455
|
|
461
456
|
transform_kwargs = dict(
|
462
|
-
session
|
463
|
-
dependencies
|
464
|
-
drop_input_cols
|
465
|
-
expected_output_cols_type
|
457
|
+
session=dataset._session,
|
458
|
+
dependencies=self._deps,
|
459
|
+
drop_input_cols=self._drop_input_cols,
|
460
|
+
expected_output_cols_type=expected_dtype,
|
466
461
|
)
|
467
462
|
|
468
463
|
elif isinstance(dataset, pd.DataFrame):
|
469
|
-
transform_kwargs = dict(
|
470
|
-
snowpark_input_cols = self._snowpark_cols,
|
471
|
-
drop_input_cols = self._drop_input_cols
|
472
|
-
)
|
464
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
473
465
|
|
474
466
|
transform_handlers = ModelTransformerBuilder.build(
|
475
467
|
dataset=dataset,
|
@@ -488,7 +480,11 @@ class PolynomialFeatures(BaseTransformer):
|
|
488
480
|
return output_df
|
489
481
|
|
490
482
|
@available_if(original_estimator_has_callable("fit_predict")) # type: ignore[misc]
|
491
|
-
def fit_predict(
|
483
|
+
def fit_predict(
|
484
|
+
self,
|
485
|
+
dataset: Union[DataFrame, pd.DataFrame],
|
486
|
+
output_cols_prefix: str = "fit_predict_",
|
487
|
+
) -> Union[DataFrame, pd.DataFrame]:
|
492
488
|
""" Method not supported for this class.
|
493
489
|
|
494
490
|
|
@@ -513,7 +509,9 @@ class PolynomialFeatures(BaseTransformer):
|
|
513
509
|
)
|
514
510
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
515
511
|
drop_input_cols=self._drop_input_cols,
|
516
|
-
expected_output_cols_list=
|
512
|
+
expected_output_cols_list=(
|
513
|
+
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
514
|
+
),
|
517
515
|
)
|
518
516
|
self._sklearn_object = fitted_estimator
|
519
517
|
self._is_fitted = True
|
@@ -530,6 +528,62 @@ class PolynomialFeatures(BaseTransformer):
|
|
530
528
|
assert self._sklearn_object is not None
|
531
529
|
return self._sklearn_object.embedding_
|
532
530
|
|
531
|
+
|
532
|
+
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
533
|
+
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
534
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
535
|
+
"""
|
536
|
+
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
537
|
+
# The following condition is introduced for kneighbors methods, and not used in other methods
|
538
|
+
if output_cols:
|
539
|
+
output_cols = [
|
540
|
+
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
541
|
+
for c in output_cols
|
542
|
+
]
|
543
|
+
elif getattr(self._sklearn_object, "classes_", None) is None:
|
544
|
+
output_cols = [output_cols_prefix]
|
545
|
+
elif self._sklearn_object is not None:
|
546
|
+
classes = self._sklearn_object.classes_
|
547
|
+
if isinstance(classes, numpy.ndarray):
|
548
|
+
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
549
|
+
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
550
|
+
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
551
|
+
output_cols = []
|
552
|
+
for i, cl in enumerate(classes):
|
553
|
+
# For binary classification, there is only one output column for each class
|
554
|
+
# ndarray as the two classes are complementary.
|
555
|
+
if len(cl) == 2:
|
556
|
+
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
557
|
+
else:
|
558
|
+
output_cols.extend([
|
559
|
+
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
560
|
+
])
|
561
|
+
else:
|
562
|
+
output_cols = []
|
563
|
+
|
564
|
+
# Make sure column names are valid snowflake identifiers.
|
565
|
+
assert output_cols is not None # Make MyPy happy
|
566
|
+
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
567
|
+
|
568
|
+
return rv
|
569
|
+
|
570
|
+
def _align_expected_output_names(
|
571
|
+
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
|
572
|
+
) -> List[str]:
|
573
|
+
# in case the inferred output column names dimension is different
|
574
|
+
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
575
|
+
output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
|
576
|
+
output_df_columns = list(output_df_pd.columns)
|
577
|
+
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
578
|
+
if self.sample_weight_col:
|
579
|
+
output_df_columns_set -= set(self.sample_weight_col)
|
580
|
+
# if the dimension of inferred output column names is correct; use it
|
581
|
+
if len(expected_output_cols_list) == len(output_df_columns_set):
|
582
|
+
return expected_output_cols_list
|
583
|
+
# otherwise, use the sklearn estimator's output
|
584
|
+
else:
|
585
|
+
return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
|
586
|
+
|
533
587
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
534
588
|
@telemetry.send_api_usage_telemetry(
|
535
589
|
project=_PROJECT,
|
@@ -560,24 +614,28 @@ class PolynomialFeatures(BaseTransformer):
|
|
560
614
|
# are specific to the type of dataset used.
|
561
615
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
562
616
|
|
617
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
618
|
+
|
563
619
|
if isinstance(dataset, DataFrame):
|
564
620
|
self._deps = self._batch_inference_validate_snowpark(
|
565
621
|
dataset=dataset,
|
566
622
|
inference_method=inference_method,
|
567
623
|
)
|
568
|
-
assert isinstance(
|
624
|
+
assert isinstance(
|
625
|
+
dataset._session, Session
|
626
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
569
627
|
transform_kwargs = dict(
|
570
628
|
session=dataset._session,
|
571
629
|
dependencies=self._deps,
|
572
|
-
drop_input_cols
|
630
|
+
drop_input_cols=self._drop_input_cols,
|
573
631
|
expected_output_cols_type="float",
|
574
632
|
)
|
633
|
+
expected_output_cols = self._align_expected_output_names(
|
634
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
635
|
+
)
|
575
636
|
|
576
637
|
elif isinstance(dataset, pd.DataFrame):
|
577
|
-
transform_kwargs = dict(
|
578
|
-
snowpark_input_cols = self._snowpark_cols,
|
579
|
-
drop_input_cols = self._drop_input_cols
|
580
|
-
)
|
638
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
581
639
|
|
582
640
|
transform_handlers = ModelTransformerBuilder.build(
|
583
641
|
dataset=dataset,
|
@@ -589,7 +647,7 @@ class PolynomialFeatures(BaseTransformer):
|
|
589
647
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
590
648
|
inference_method=inference_method,
|
591
649
|
input_cols=self.input_cols,
|
592
|
-
expected_output_cols=
|
650
|
+
expected_output_cols=expected_output_cols,
|
593
651
|
**transform_kwargs
|
594
652
|
)
|
595
653
|
return output_df
|
@@ -619,7 +677,8 @@ class PolynomialFeatures(BaseTransformer):
|
|
619
677
|
Output dataset with log probability of the sample for each class in the model.
|
620
678
|
"""
|
621
679
|
super()._check_dataset_type(dataset)
|
622
|
-
inference_method="predict_log_proba"
|
680
|
+
inference_method = "predict_log_proba"
|
681
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
623
682
|
|
624
683
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
625
684
|
# are specific to the type of dataset used.
|
@@ -630,18 +689,20 @@ class PolynomialFeatures(BaseTransformer):
|
|
630
689
|
dataset=dataset,
|
631
690
|
inference_method=inference_method,
|
632
691
|
)
|
633
|
-
assert isinstance(
|
692
|
+
assert isinstance(
|
693
|
+
dataset._session, Session
|
694
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
634
695
|
transform_kwargs = dict(
|
635
696
|
session=dataset._session,
|
636
697
|
dependencies=self._deps,
|
637
|
-
drop_input_cols
|
698
|
+
drop_input_cols=self._drop_input_cols,
|
638
699
|
expected_output_cols_type="float",
|
639
700
|
)
|
701
|
+
expected_output_cols = self._align_expected_output_names(
|
702
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
703
|
+
)
|
640
704
|
elif isinstance(dataset, pd.DataFrame):
|
641
|
-
transform_kwargs = dict(
|
642
|
-
snowpark_input_cols = self._snowpark_cols,
|
643
|
-
drop_input_cols = self._drop_input_cols
|
644
|
-
)
|
705
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
645
706
|
|
646
707
|
transform_handlers = ModelTransformerBuilder.build(
|
647
708
|
dataset=dataset,
|
@@ -654,7 +715,7 @@ class PolynomialFeatures(BaseTransformer):
|
|
654
715
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
655
716
|
inference_method=inference_method,
|
656
717
|
input_cols=self.input_cols,
|
657
|
-
expected_output_cols=
|
718
|
+
expected_output_cols=expected_output_cols,
|
658
719
|
**transform_kwargs
|
659
720
|
)
|
660
721
|
return output_df
|
@@ -680,30 +741,34 @@ class PolynomialFeatures(BaseTransformer):
|
|
680
741
|
Output dataset with results of the decision function for the samples in input dataset.
|
681
742
|
"""
|
682
743
|
super()._check_dataset_type(dataset)
|
683
|
-
inference_method="decision_function"
|
744
|
+
inference_method = "decision_function"
|
684
745
|
|
685
746
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
686
747
|
# are specific to the type of dataset used.
|
687
748
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
688
749
|
|
750
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
751
|
+
|
689
752
|
if isinstance(dataset, DataFrame):
|
690
753
|
self._deps = self._batch_inference_validate_snowpark(
|
691
754
|
dataset=dataset,
|
692
755
|
inference_method=inference_method,
|
693
756
|
)
|
694
|
-
assert isinstance(
|
757
|
+
assert isinstance(
|
758
|
+
dataset._session, Session
|
759
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
695
760
|
transform_kwargs = dict(
|
696
761
|
session=dataset._session,
|
697
762
|
dependencies=self._deps,
|
698
|
-
drop_input_cols
|
763
|
+
drop_input_cols=self._drop_input_cols,
|
699
764
|
expected_output_cols_type="float",
|
700
765
|
)
|
766
|
+
expected_output_cols = self._align_expected_output_names(
|
767
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
768
|
+
)
|
701
769
|
|
702
770
|
elif isinstance(dataset, pd.DataFrame):
|
703
|
-
transform_kwargs = dict(
|
704
|
-
snowpark_input_cols = self._snowpark_cols,
|
705
|
-
drop_input_cols = self._drop_input_cols
|
706
|
-
)
|
771
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
707
772
|
|
708
773
|
transform_handlers = ModelTransformerBuilder.build(
|
709
774
|
dataset=dataset,
|
@@ -716,7 +781,7 @@ class PolynomialFeatures(BaseTransformer):
|
|
716
781
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
717
782
|
inference_method=inference_method,
|
718
783
|
input_cols=self.input_cols,
|
719
|
-
expected_output_cols=
|
784
|
+
expected_output_cols=expected_output_cols,
|
720
785
|
**transform_kwargs
|
721
786
|
)
|
722
787
|
return output_df
|
@@ -745,12 +810,14 @@ class PolynomialFeatures(BaseTransformer):
|
|
745
810
|
Output dataset with probability of the sample for each class in the model.
|
746
811
|
"""
|
747
812
|
super()._check_dataset_type(dataset)
|
748
|
-
inference_method="score_samples"
|
813
|
+
inference_method = "score_samples"
|
749
814
|
|
750
815
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
751
816
|
# are specific to the type of dataset used.
|
752
817
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
753
818
|
|
819
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
820
|
+
|
754
821
|
if isinstance(dataset, DataFrame):
|
755
822
|
self._deps = self._batch_inference_validate_snowpark(
|
756
823
|
dataset=dataset,
|
@@ -763,6 +830,9 @@ class PolynomialFeatures(BaseTransformer):
|
|
763
830
|
drop_input_cols = self._drop_input_cols,
|
764
831
|
expected_output_cols_type="float",
|
765
832
|
)
|
833
|
+
expected_output_cols = self._align_expected_output_names(
|
834
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
835
|
+
)
|
766
836
|
|
767
837
|
elif isinstance(dataset, pd.DataFrame):
|
768
838
|
transform_kwargs = dict(
|
@@ -781,7 +851,7 @@ class PolynomialFeatures(BaseTransformer):
|
|
781
851
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
782
852
|
inference_method=inference_method,
|
783
853
|
input_cols=self.input_cols,
|
784
|
-
expected_output_cols=
|
854
|
+
expected_output_cols=expected_output_cols,
|
785
855
|
**transform_kwargs
|
786
856
|
)
|
787
857
|
return output_df
|
@@ -926,50 +996,84 @@ class PolynomialFeatures(BaseTransformer):
|
|
926
996
|
)
|
927
997
|
return output_df
|
928
998
|
|
999
|
+
|
1000
|
+
|
1001
|
+
def to_sklearn(self) -> Any:
|
1002
|
+
"""Get sklearn.preprocessing.PolynomialFeatures object.
|
1003
|
+
"""
|
1004
|
+
if self._sklearn_object is None:
|
1005
|
+
self._sklearn_object = self._create_sklearn_object()
|
1006
|
+
return self._sklearn_object
|
1007
|
+
|
1008
|
+
def to_xgboost(self) -> Any:
|
1009
|
+
raise exceptions.SnowflakeMLException(
|
1010
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1011
|
+
original_exception=AttributeError(
|
1012
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1013
|
+
"to_xgboost()",
|
1014
|
+
"to_sklearn()"
|
1015
|
+
)
|
1016
|
+
),
|
1017
|
+
)
|
1018
|
+
|
1019
|
+
def to_lightgbm(self) -> Any:
|
1020
|
+
raise exceptions.SnowflakeMLException(
|
1021
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1022
|
+
original_exception=AttributeError(
|
1023
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1024
|
+
"to_lightgbm()",
|
1025
|
+
"to_sklearn()"
|
1026
|
+
)
|
1027
|
+
),
|
1028
|
+
)
|
929
1029
|
|
930
|
-
def
|
1030
|
+
def _get_dependencies(self) -> List[str]:
|
1031
|
+
return self._deps
|
1032
|
+
|
1033
|
+
|
1034
|
+
def _generate_model_signatures(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
931
1035
|
self._model_signature_dict = dict()
|
932
1036
|
|
933
1037
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
934
1038
|
|
935
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input"))
|
1039
|
+
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
936
1040
|
outputs: List[BaseFeatureSpec] = []
|
937
1041
|
if hasattr(self, "predict"):
|
938
1042
|
# keep mypy happy
|
939
|
-
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
1043
|
+
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
940
1044
|
# For classifier, the type of predict is the same as the type of label
|
941
|
-
if self._sklearn_object._estimator_type ==
|
942
|
-
|
1045
|
+
if self._sklearn_object._estimator_type == "classifier":
|
1046
|
+
# label columns is the desired type for output
|
943
1047
|
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
944
1048
|
# rename the output columns
|
945
1049
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
946
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
947
|
-
|
948
|
-
|
1050
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1051
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1052
|
+
)
|
949
1053
|
# For mixture models that use the density mixin, `predict` returns the argmax of the log prob.
|
950
1054
|
# For outlier models, returns -1 for outliers and 1 for inliers.
|
951
|
-
# Clusterer returns int64 cluster labels.
|
1055
|
+
# Clusterer returns int64 cluster labels.
|
952
1056
|
elif self._sklearn_object._estimator_type in ["DensityEstimator", "clusterer", "outlier_detector"]:
|
953
1057
|
outputs = [FeatureSpec(dtype=DataType.INT64, name=c) for c in self.output_cols]
|
954
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
955
|
-
|
956
|
-
|
957
|
-
|
1058
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1059
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1060
|
+
)
|
1061
|
+
|
958
1062
|
# For regressor, the type of predict is float64
|
959
|
-
elif self._sklearn_object._estimator_type ==
|
1063
|
+
elif self._sklearn_object._estimator_type == "regressor":
|
960
1064
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
961
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
962
|
-
|
963
|
-
|
964
|
-
|
1065
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1066
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1067
|
+
)
|
1068
|
+
|
965
1069
|
for prob_func in PROB_FUNCTIONS:
|
966
1070
|
if hasattr(self, prob_func):
|
967
1071
|
output_cols_prefix: str = f"{prob_func}_"
|
968
1072
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
969
1073
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
970
|
-
self._model_signature_dict[prob_func] = ModelSignature(
|
971
|
-
|
972
|
-
|
1074
|
+
self._model_signature_dict[prob_func] = ModelSignature(
|
1075
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1076
|
+
)
|
973
1077
|
|
974
1078
|
# Output signature names may still need to be renamed, since they were not created with `_infer_signature`.
|
975
1079
|
items = list(self._model_signature_dict.items())
|
@@ -982,10 +1086,10 @@ class PolynomialFeatures(BaseTransformer):
|
|
982
1086
|
"""Returns model signature of current class.
|
983
1087
|
|
984
1088
|
Raises:
|
985
|
-
|
1089
|
+
SnowflakeMLException: If estimator is not fitted, then model signature cannot be inferred
|
986
1090
|
|
987
1091
|
Returns:
|
988
|
-
Dict
|
1092
|
+
Dict with each method and its input output signature
|
989
1093
|
"""
|
990
1094
|
if self._model_signature_dict is None:
|
991
1095
|
raise exceptions.SnowflakeMLException(
|
@@ -993,35 +1097,3 @@ class PolynomialFeatures(BaseTransformer):
|
|
993
1097
|
original_exception=RuntimeError("Estimator not fitted before accessing property model_signatures!"),
|
994
1098
|
)
|
995
1099
|
return self._model_signature_dict
|
996
|
-
|
997
|
-
def to_sklearn(self) -> Any:
|
998
|
-
"""Get sklearn.preprocessing.PolynomialFeatures object.
|
999
|
-
"""
|
1000
|
-
if self._sklearn_object is None:
|
1001
|
-
self._sklearn_object = self._create_sklearn_object()
|
1002
|
-
return self._sklearn_object
|
1003
|
-
|
1004
|
-
def to_xgboost(self) -> Any:
|
1005
|
-
raise exceptions.SnowflakeMLException(
|
1006
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1007
|
-
original_exception=AttributeError(
|
1008
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1009
|
-
"to_xgboost()",
|
1010
|
-
"to_sklearn()"
|
1011
|
-
)
|
1012
|
-
),
|
1013
|
-
)
|
1014
|
-
|
1015
|
-
def to_lightgbm(self) -> Any:
|
1016
|
-
raise exceptions.SnowflakeMLException(
|
1017
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1018
|
-
original_exception=AttributeError(
|
1019
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1020
|
-
"to_lightgbm()",
|
1021
|
-
"to_sklearn()"
|
1022
|
-
)
|
1023
|
-
),
|
1024
|
-
)
|
1025
|
-
|
1026
|
-
def _get_dependencies(self) -> List[str]:
|
1027
|
-
return self._deps
|
@@ -144,11 +144,7 @@ class RobustScaler(base.BaseTransformer):
|
|
144
144
|
def scale_(self) -> Optional[Dict[str, float]]:
|
145
145
|
return None if (not self.with_scaling or not self._state_is_set) else self._scale
|
146
146
|
|
147
|
-
|
148
|
-
project=base.PROJECT,
|
149
|
-
subproject=base.SUBPROJECT,
|
150
|
-
)
|
151
|
-
def fit(self, dataset: Union[snowpark.DataFrame, pd.DataFrame]) -> "RobustScaler":
|
147
|
+
def _fit(self, dataset: Union[snowpark.DataFrame, pd.DataFrame]) -> "RobustScaler":
|
152
148
|
"""
|
153
149
|
Compute center, scale and quantile values of the dataset.
|
154
150
|
|