snowflake-ml-python 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +77 -32
- snowflake/ml/_internal/exceptions/dataset_error_messages.py +5 -0
- snowflake/ml/_internal/exceptions/dataset_errors.py +24 -0
- snowflake/ml/_internal/exceptions/error_codes.py +3 -0
- snowflake/ml/_internal/lineage/data_source.py +10 -0
- snowflake/ml/_internal/lineage/dataset_dataframe.py +44 -0
- snowflake/ml/_internal/utils/identifier.py +3 -1
- snowflake/ml/_internal/utils/sql_identifier.py +2 -6
- snowflake/ml/dataset/__init__.py +10 -0
- snowflake/ml/dataset/dataset.py +454 -129
- snowflake/ml/dataset/dataset_factory.py +53 -0
- snowflake/ml/dataset/dataset_metadata.py +103 -0
- snowflake/ml/dataset/dataset_reader.py +202 -0
- snowflake/ml/feature_store/feature_store.py +531 -332
- snowflake/ml/feature_store/feature_view.py +40 -23
- snowflake/ml/fileset/embedded_stage_fs.py +146 -0
- snowflake/ml/fileset/sfcfs.py +56 -54
- snowflake/ml/fileset/snowfs.py +159 -0
- snowflake/ml/fileset/stage_fs.py +49 -17
- snowflake/ml/model/__init__.py +2 -2
- snowflake/ml/model/_api.py +16 -1
- snowflake/ml/model/_client/model/model_impl.py +27 -0
- snowflake/ml/model/_client/model/model_version_impl.py +137 -50
- snowflake/ml/model/_client/ops/model_ops.py +159 -40
- snowflake/ml/model/_client/sql/model.py +25 -2
- snowflake/ml/model/_client/sql/model_version.py +131 -2
- snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +5 -1
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +1 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +0 -5
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +21 -50
- snowflake/ml/model/_model_composer/model_composer.py +22 -1
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +38 -51
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +19 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +6 -10
- snowflake/ml/model/_packager/model_env/model_env.py +41 -0
- snowflake/ml/model/_packager/model_handlers/catboost.py +206 -0
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +218 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +3 -0
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_meta.py +37 -11
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +20 -1
- snowflake/ml/model/_packager/model_meta_migrator/migrator_plans.py +3 -1
- snowflake/ml/model/_packager/model_packager.py +2 -5
- snowflake/ml/model/{_model_composer/model_runtime/_runtime_requirements.py → _packager/model_runtime/_snowml_inference_alternative_requirements.py} +1 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +137 -0
- snowflake/ml/model/type_hints.py +21 -2
- snowflake/ml/modeling/_internal/estimator_utils.py +16 -11
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -1
- snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +55 -3
- snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +34 -18
- snowflake/ml/modeling/_internal/model_trainer.py +7 -0
- snowflake/ml/modeling/_internal/model_trainer_builder.py +42 -9
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +13 -14
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +29 -7
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +261 -16
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +246 -175
- snowflake/ml/modeling/cluster/affinity_propagation.py +246 -175
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +246 -175
- snowflake/ml/modeling/cluster/birch.py +248 -175
- snowflake/ml/modeling/cluster/bisecting_k_means.py +248 -175
- snowflake/ml/modeling/cluster/dbscan.py +246 -175
- snowflake/ml/modeling/cluster/feature_agglomeration.py +248 -175
- snowflake/ml/modeling/cluster/k_means.py +248 -175
- snowflake/ml/modeling/cluster/mean_shift.py +246 -175
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +248 -175
- snowflake/ml/modeling/cluster/optics.py +246 -175
- snowflake/ml/modeling/cluster/spectral_biclustering.py +246 -175
- snowflake/ml/modeling/cluster/spectral_clustering.py +246 -175
- snowflake/ml/modeling/cluster/spectral_coclustering.py +246 -175
- snowflake/ml/modeling/compose/column_transformer.py +248 -175
- snowflake/ml/modeling/compose/transformed_target_regressor.py +246 -175
- snowflake/ml/modeling/covariance/elliptic_envelope.py +246 -175
- snowflake/ml/modeling/covariance/empirical_covariance.py +246 -175
- snowflake/ml/modeling/covariance/graphical_lasso.py +246 -175
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +246 -175
- snowflake/ml/modeling/covariance/ledoit_wolf.py +246 -175
- snowflake/ml/modeling/covariance/min_cov_det.py +246 -175
- snowflake/ml/modeling/covariance/oas.py +246 -175
- snowflake/ml/modeling/covariance/shrunk_covariance.py +246 -175
- snowflake/ml/modeling/decomposition/dictionary_learning.py +248 -175
- snowflake/ml/modeling/decomposition/factor_analysis.py +248 -175
- snowflake/ml/modeling/decomposition/fast_ica.py +248 -175
- snowflake/ml/modeling/decomposition/incremental_pca.py +248 -175
- snowflake/ml/modeling/decomposition/kernel_pca.py +248 -175
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +248 -175
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +248 -175
- snowflake/ml/modeling/decomposition/pca.py +248 -175
- snowflake/ml/modeling/decomposition/sparse_pca.py +248 -175
- snowflake/ml/modeling/decomposition/truncated_svd.py +248 -175
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +248 -175
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +246 -175
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/bagging_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/bagging_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/isolation_forest.py +246 -175
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/stacking_regressor.py +248 -175
- snowflake/ml/modeling/ensemble/voting_classifier.py +248 -175
- snowflake/ml/modeling/ensemble/voting_regressor.py +248 -175
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fdr.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fpr.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fwe.py +248 -175
- snowflake/ml/modeling/feature_selection/select_k_best.py +248 -175
- snowflake/ml/modeling/feature_selection/select_percentile.py +248 -175
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +248 -175
- snowflake/ml/modeling/feature_selection/variance_threshold.py +248 -175
- snowflake/ml/modeling/framework/_utils.py +8 -1
- snowflake/ml/modeling/framework/base.py +72 -37
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +246 -175
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +246 -175
- snowflake/ml/modeling/impute/iterative_imputer.py +248 -175
- snowflake/ml/modeling/impute/knn_imputer.py +248 -175
- snowflake/ml/modeling/impute/missing_indicator.py +248 -175
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +248 -175
- snowflake/ml/modeling/kernel_approximation/nystroem.py +248 -175
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +248 -175
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +248 -175
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +248 -175
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +246 -175
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +246 -175
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ard_regression.py +246 -175
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +246 -175
- snowflake/ml/modeling/linear_model/elastic_net.py +246 -175
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +246 -175
- snowflake/ml/modeling/linear_model/gamma_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/huber_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/lars.py +246 -175
- snowflake/ml/modeling/linear_model/lars_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +246 -175
- snowflake/ml/modeling/linear_model/linear_regression.py +246 -175
- snowflake/ml/modeling/linear_model/logistic_regression.py +246 -175
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +246 -175
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +246 -175
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/perceptron.py +246 -175
- snowflake/ml/modeling/linear_model/poisson_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ransac_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ridge.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_cv.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +246 -175
- snowflake/ml/modeling/manifold/isomap.py +248 -175
- snowflake/ml/modeling/manifold/mds.py +248 -175
- snowflake/ml/modeling/manifold/spectral_embedding.py +248 -175
- snowflake/ml/modeling/manifold/tsne.py +248 -175
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +246 -175
- snowflake/ml/modeling/mixture/gaussian_mixture.py +246 -175
- snowflake/ml/modeling/model_selection/grid_search_cv.py +63 -41
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +80 -38
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +246 -175
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +246 -175
- snowflake/ml/modeling/multiclass/output_code_classifier.py +246 -175
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/complement_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +246 -175
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +246 -175
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +246 -175
- snowflake/ml/modeling/neighbors/kernel_density.py +246 -175
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +246 -175
- snowflake/ml/modeling/neighbors/nearest_centroid.py +246 -175
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +246 -175
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +248 -175
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +246 -175
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +246 -175
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +248 -175
- snowflake/ml/modeling/neural_network/mlp_classifier.py +246 -175
- snowflake/ml/modeling/neural_network/mlp_regressor.py +246 -175
- snowflake/ml/modeling/pipeline/pipeline.py +517 -35
- snowflake/ml/modeling/preprocessing/binarizer.py +1 -5
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +1 -5
- snowflake/ml/modeling/preprocessing/label_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +10 -12
- snowflake/ml/modeling/preprocessing/normalizer.py +1 -5
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +13 -5
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/polynomial_features.py +248 -175
- snowflake/ml/modeling/preprocessing/robust_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/standard_scaler.py +11 -11
- snowflake/ml/modeling/semi_supervised/label_propagation.py +246 -175
- snowflake/ml/modeling/semi_supervised/label_spreading.py +246 -175
- snowflake/ml/modeling/svm/linear_svc.py +246 -175
- snowflake/ml/modeling/svm/linear_svr.py +246 -175
- snowflake/ml/modeling/svm/nu_svc.py +246 -175
- snowflake/ml/modeling/svm/nu_svr.py +246 -175
- snowflake/ml/modeling/svm/svc.py +246 -175
- snowflake/ml/modeling/svm/svr.py +246 -175
- snowflake/ml/modeling/tree/decision_tree_classifier.py +246 -175
- snowflake/ml/modeling/tree/decision_tree_regressor.py +246 -175
- snowflake/ml/modeling/tree/extra_tree_classifier.py +246 -175
- snowflake/ml/modeling/tree/extra_tree_regressor.py +246 -175
- snowflake/ml/modeling/xgboost/xgb_classifier.py +246 -175
- snowflake/ml/modeling/xgboost/xgb_regressor.py +246 -175
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +246 -175
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +246 -175
- snowflake/ml/registry/model_registry.py +3 -149
- snowflake/ml/registry/registry.py +1 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/METADATA +129 -57
- snowflake_ml_python-1.5.0.dist-info/RECORD +380 -0
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -97
- snowflake/ml/registry/_artifact_manager.py +0 -156
- snowflake/ml/registry/artifact.py +0 -46
- snowflake_ml_python-1.4.0.dist-info/RECORD +0 -370
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/top_level.txt +0 -0
@@ -33,6 +33,15 @@ from snowflake.ml.modeling._internal.transformer_protocols import (
|
|
33
33
|
BatchInferenceKwargsTypedDict,
|
34
34
|
ScoreKwargsTypedDict
|
35
35
|
)
|
36
|
+
from snowflake.ml.model._signatures import utils as model_signature_utils
|
37
|
+
from snowflake.ml.model.model_signature import (
|
38
|
+
BaseFeatureSpec,
|
39
|
+
DataType,
|
40
|
+
FeatureSpec,
|
41
|
+
ModelSignature,
|
42
|
+
_infer_signature,
|
43
|
+
_rename_signature_with_snowflake_identifiers,
|
44
|
+
)
|
36
45
|
|
37
46
|
from snowflake.ml.modeling._internal.model_transformer_builder import ModelTransformerBuilder
|
38
47
|
|
@@ -43,16 +52,6 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
43
52
|
validate_sklearn_args,
|
44
53
|
)
|
45
54
|
|
46
|
-
from snowflake.ml.model.model_signature import (
|
47
|
-
DataType,
|
48
|
-
FeatureSpec,
|
49
|
-
ModelSignature,
|
50
|
-
_infer_signature,
|
51
|
-
_rename_signature_with_snowflake_identifiers,
|
52
|
-
BaseFeatureSpec,
|
53
|
-
)
|
54
|
-
from snowflake.ml.model._signatures import utils as model_signature_utils
|
55
|
-
|
56
55
|
_PROJECT = "ModelDevelopment"
|
57
56
|
# Derive subproject from module name by removing "sklearn"
|
58
57
|
# and converting module name from underscore to CamelCase
|
@@ -61,12 +60,6 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.preprocessing".replace("
|
|
61
60
|
|
62
61
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
63
62
|
|
64
|
-
def _is_fit_transform_method_enabled() -> Callable[[Any], bool]:
|
65
|
-
def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
|
66
|
-
return False and callable(getattr(self._sklearn_object, "fit_transform", None))
|
67
|
-
return check
|
68
|
-
|
69
|
-
|
70
63
|
class PolynomialFeatures(BaseTransformer):
|
71
64
|
r"""Generate polynomial and interaction features
|
72
65
|
For more details on this class, see [sklearn.preprocessing.PolynomialFeatures]
|
@@ -222,12 +215,7 @@ class PolynomialFeatures(BaseTransformer):
|
|
222
215
|
)
|
223
216
|
return selected_cols
|
224
217
|
|
225
|
-
|
226
|
-
project=_PROJECT,
|
227
|
-
subproject=_SUBPROJECT,
|
228
|
-
custom_tags=dict([("autogen", True)]),
|
229
|
-
)
|
230
|
-
def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "PolynomialFeatures":
|
218
|
+
def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "PolynomialFeatures":
|
231
219
|
"""Compute number of output features
|
232
220
|
For more details on this function, see [sklearn.preprocessing.PolynomialFeatures.fit]
|
233
221
|
(https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.PolynomialFeatures.html#sklearn.preprocessing.PolynomialFeatures.fit)
|
@@ -254,12 +242,14 @@ class PolynomialFeatures(BaseTransformer):
|
|
254
242
|
|
255
243
|
self._snowpark_cols = dataset.select(self.input_cols).columns
|
256
244
|
|
257
|
-
|
245
|
+
# If we are already in a stored procedure, no need to kick off another one.
|
258
246
|
if SNOWML_SPROC_ENV in os.environ:
|
259
247
|
statement_params = telemetry.get_function_usage_statement_params(
|
260
248
|
project=_PROJECT,
|
261
249
|
subproject=_SUBPROJECT,
|
262
|
-
function_name=telemetry.get_statement_params_full_func_name(
|
250
|
+
function_name=telemetry.get_statement_params_full_func_name(
|
251
|
+
inspect.currentframe(), PolynomialFeatures.__class__.__name__
|
252
|
+
),
|
263
253
|
api_calls=[Session.call],
|
264
254
|
custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
|
265
255
|
)
|
@@ -280,27 +270,24 @@ class PolynomialFeatures(BaseTransformer):
|
|
280
270
|
)
|
281
271
|
self._sklearn_object = model_trainer.train()
|
282
272
|
self._is_fitted = True
|
283
|
-
self.
|
273
|
+
self._generate_model_signatures(dataset)
|
284
274
|
return self
|
285
275
|
|
286
276
|
def _batch_inference_validate_snowpark(
|
287
277
|
self,
|
288
278
|
dataset: DataFrame,
|
289
279
|
inference_method: str,
|
290
|
-
) ->
|
291
|
-
"""Util method to run validate that batch inference can be run on a snowpark dataframe
|
292
|
-
return the available package that exists in the snowflake anaconda channel
|
280
|
+
) -> None:
|
281
|
+
"""Util method to run validate that batch inference can be run on a snowpark dataframe.
|
293
282
|
|
294
283
|
Args:
|
295
284
|
dataset: snowpark dataframe
|
296
285
|
inference_method: the inference method such as predict, score...
|
297
|
-
|
286
|
+
|
298
287
|
Raises:
|
299
288
|
SnowflakeMLException: If the estimator is not fitted, raise error
|
300
289
|
SnowflakeMLException: If the session is None, raise error
|
301
290
|
|
302
|
-
Returns:
|
303
|
-
A list of available package that exists in the snowflake anaconda channel
|
304
291
|
"""
|
305
292
|
if not self._is_fitted:
|
306
293
|
raise exceptions.SnowflakeMLException(
|
@@ -318,9 +305,7 @@ class PolynomialFeatures(BaseTransformer):
|
|
318
305
|
"Session must not specified for snowpark dataset."
|
319
306
|
),
|
320
307
|
)
|
321
|
-
|
322
|
-
return pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
|
323
|
-
pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT)
|
308
|
+
|
324
309
|
|
325
310
|
@available_if(original_estimator_has_callable("predict")) # type: ignore[misc]
|
326
311
|
@telemetry.send_api_usage_telemetry(
|
@@ -354,7 +339,9 @@ class PolynomialFeatures(BaseTransformer):
|
|
354
339
|
# when it is classifier, infer the datatype from label columns
|
355
340
|
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
356
341
|
# Batch inference takes a single expected output column type. Use the first columns type for now.
|
357
|
-
label_cols_signatures = [
|
342
|
+
label_cols_signatures = [
|
343
|
+
row for row in self.model_signatures['predict'].outputs if row.name in self.output_cols
|
344
|
+
]
|
358
345
|
if len(label_cols_signatures) == 0:
|
359
346
|
error_str = f"Output columns {self.output_cols} do not match model signatures {self.model_signatures['predict'].outputs}."
|
360
347
|
raise exceptions.SnowflakeMLException(
|
@@ -362,25 +349,23 @@ class PolynomialFeatures(BaseTransformer):
|
|
362
349
|
original_exception=ValueError(error_str),
|
363
350
|
)
|
364
351
|
|
365
|
-
expected_type_inferred = convert_sp_to_sf_type(
|
366
|
-
label_cols_signatures[0].as_snowpark_type()
|
367
|
-
)
|
352
|
+
expected_type_inferred = convert_sp_to_sf_type(label_cols_signatures[0].as_snowpark_type())
|
368
353
|
|
369
|
-
self.
|
370
|
-
|
354
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
355
|
+
self._deps = self._get_dependencies()
|
356
|
+
assert isinstance(
|
357
|
+
dataset._session, Session
|
358
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
371
359
|
|
372
360
|
transform_kwargs = dict(
|
373
|
-
session
|
374
|
-
dependencies
|
375
|
-
drop_input_cols
|
376
|
-
expected_output_cols_type
|
361
|
+
session=dataset._session,
|
362
|
+
dependencies=self._deps,
|
363
|
+
drop_input_cols=self._drop_input_cols,
|
364
|
+
expected_output_cols_type=expected_type_inferred,
|
377
365
|
)
|
378
366
|
|
379
367
|
elif isinstance(dataset, pd.DataFrame):
|
380
|
-
transform_kwargs = dict(
|
381
|
-
snowpark_input_cols = self._snowpark_cols,
|
382
|
-
drop_input_cols = self._drop_input_cols
|
383
|
-
)
|
368
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
384
369
|
|
385
370
|
transform_handlers = ModelTransformerBuilder.build(
|
386
371
|
dataset=dataset,
|
@@ -422,7 +407,7 @@ class PolynomialFeatures(BaseTransformer):
|
|
422
407
|
Transformed dataset.
|
423
408
|
"""
|
424
409
|
super()._check_dataset_type(dataset)
|
425
|
-
inference_method="transform"
|
410
|
+
inference_method = "transform"
|
426
411
|
|
427
412
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
428
413
|
# are specific to the type of dataset used.
|
@@ -452,24 +437,19 @@ class PolynomialFeatures(BaseTransformer):
|
|
452
437
|
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
453
438
|
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
454
439
|
|
455
|
-
self.
|
456
|
-
|
457
|
-
inference_method=inference_method,
|
458
|
-
)
|
440
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
441
|
+
self._deps = self._get_dependencies()
|
459
442
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
460
443
|
|
461
444
|
transform_kwargs = dict(
|
462
|
-
session
|
463
|
-
dependencies
|
464
|
-
drop_input_cols
|
465
|
-
expected_output_cols_type
|
445
|
+
session=dataset._session,
|
446
|
+
dependencies=self._deps,
|
447
|
+
drop_input_cols=self._drop_input_cols,
|
448
|
+
expected_output_cols_type=expected_dtype,
|
466
449
|
)
|
467
450
|
|
468
451
|
elif isinstance(dataset, pd.DataFrame):
|
469
|
-
transform_kwargs = dict(
|
470
|
-
snowpark_input_cols = self._snowpark_cols,
|
471
|
-
drop_input_cols = self._drop_input_cols
|
472
|
-
)
|
452
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
473
453
|
|
474
454
|
transform_handlers = ModelTransformerBuilder.build(
|
475
455
|
dataset=dataset,
|
@@ -488,7 +468,11 @@ class PolynomialFeatures(BaseTransformer):
|
|
488
468
|
return output_df
|
489
469
|
|
490
470
|
@available_if(original_estimator_has_callable("fit_predict")) # type: ignore[misc]
|
491
|
-
def fit_predict(
|
471
|
+
def fit_predict(
|
472
|
+
self,
|
473
|
+
dataset: Union[DataFrame, pd.DataFrame],
|
474
|
+
output_cols_prefix: str = "fit_predict_",
|
475
|
+
) -> Union[DataFrame, pd.DataFrame]:
|
492
476
|
""" Method not supported for this class.
|
493
477
|
|
494
478
|
|
@@ -513,22 +497,106 @@ class PolynomialFeatures(BaseTransformer):
|
|
513
497
|
)
|
514
498
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
515
499
|
drop_input_cols=self._drop_input_cols,
|
516
|
-
expected_output_cols_list=
|
500
|
+
expected_output_cols_list=(
|
501
|
+
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
502
|
+
),
|
517
503
|
)
|
518
504
|
self._sklearn_object = fitted_estimator
|
519
505
|
self._is_fitted = True
|
520
506
|
return output_result
|
521
507
|
|
508
|
+
|
509
|
+
@available_if(original_estimator_has_callable("fit_transform")) # type: ignore[misc]
|
510
|
+
def fit_transform(self, dataset: Union[DataFrame, pd.DataFrame], output_cols_prefix: str = "fit_transform_",) -> Union[DataFrame, pd.DataFrame]:
|
511
|
+
""" Fit to data, then transform it
|
512
|
+
For more details on this function, see [sklearn.preprocessing.PolynomialFeatures.fit_transform]
|
513
|
+
(https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.PolynomialFeatures.html#sklearn.preprocessing.PolynomialFeatures.fit_transform)
|
514
|
+
|
515
|
+
|
516
|
+
Raises:
|
517
|
+
TypeError: Supported dataset types: snowpark.DataFrame, pandas.DataFrame.
|
522
518
|
|
523
|
-
|
524
|
-
|
525
|
-
|
519
|
+
Args:
|
520
|
+
dataset: Union[snowflake.snowpark.DataFrame, pandas.DataFrame]
|
521
|
+
Snowpark or Pandas DataFrame.
|
522
|
+
output_cols_prefix: Prefix for the response columns
|
526
523
|
Returns:
|
527
524
|
Transformed dataset.
|
528
525
|
"""
|
529
|
-
self.
|
530
|
-
|
531
|
-
|
526
|
+
self._infer_input_output_cols(dataset)
|
527
|
+
super()._check_dataset_type(dataset)
|
528
|
+
model_trainer = ModelTrainerBuilder.build_fit_transform(
|
529
|
+
estimator=self._sklearn_object,
|
530
|
+
dataset=dataset,
|
531
|
+
input_cols=self.input_cols,
|
532
|
+
label_cols=self.label_cols,
|
533
|
+
sample_weight_col=self.sample_weight_col,
|
534
|
+
autogenerated=self._autogenerated,
|
535
|
+
subproject=_SUBPROJECT,
|
536
|
+
)
|
537
|
+
output_result, fitted_estimator = model_trainer.train_fit_transform(
|
538
|
+
drop_input_cols=self._drop_input_cols,
|
539
|
+
expected_output_cols_list=self.output_cols,
|
540
|
+
)
|
541
|
+
self._sklearn_object = fitted_estimator
|
542
|
+
self._is_fitted = True
|
543
|
+
return output_result
|
544
|
+
|
545
|
+
|
546
|
+
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
547
|
+
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
548
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
549
|
+
"""
|
550
|
+
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
551
|
+
# The following condition is introduced for kneighbors methods, and not used in other methods
|
552
|
+
if output_cols:
|
553
|
+
output_cols = [
|
554
|
+
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
555
|
+
for c in output_cols
|
556
|
+
]
|
557
|
+
elif getattr(self._sklearn_object, "classes_", None) is None:
|
558
|
+
output_cols = [output_cols_prefix]
|
559
|
+
elif self._sklearn_object is not None:
|
560
|
+
classes = self._sklearn_object.classes_
|
561
|
+
if isinstance(classes, numpy.ndarray):
|
562
|
+
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
563
|
+
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
564
|
+
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
565
|
+
output_cols = []
|
566
|
+
for i, cl in enumerate(classes):
|
567
|
+
# For binary classification, there is only one output column for each class
|
568
|
+
# ndarray as the two classes are complementary.
|
569
|
+
if len(cl) == 2:
|
570
|
+
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
571
|
+
else:
|
572
|
+
output_cols.extend([
|
573
|
+
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
574
|
+
])
|
575
|
+
else:
|
576
|
+
output_cols = []
|
577
|
+
|
578
|
+
# Make sure column names are valid snowflake identifiers.
|
579
|
+
assert output_cols is not None # Make MyPy happy
|
580
|
+
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
581
|
+
|
582
|
+
return rv
|
583
|
+
|
584
|
+
def _align_expected_output_names(
|
585
|
+
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
|
586
|
+
) -> List[str]:
|
587
|
+
# in case the inferred output column names dimension is different
|
588
|
+
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
589
|
+
output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
|
590
|
+
output_df_columns = list(output_df_pd.columns)
|
591
|
+
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
592
|
+
if self.sample_weight_col:
|
593
|
+
output_df_columns_set -= set(self.sample_weight_col)
|
594
|
+
# if the dimension of inferred output column names is correct; use it
|
595
|
+
if len(expected_output_cols_list) == len(output_df_columns_set):
|
596
|
+
return expected_output_cols_list
|
597
|
+
# otherwise, use the sklearn estimator's output
|
598
|
+
else:
|
599
|
+
return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
|
532
600
|
|
533
601
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
534
602
|
@telemetry.send_api_usage_telemetry(
|
@@ -560,24 +628,26 @@ class PolynomialFeatures(BaseTransformer):
|
|
560
628
|
# are specific to the type of dataset used.
|
561
629
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
562
630
|
|
631
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
632
|
+
|
563
633
|
if isinstance(dataset, DataFrame):
|
564
|
-
self.
|
565
|
-
|
566
|
-
|
567
|
-
|
568
|
-
|
634
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
635
|
+
self._deps = self._get_dependencies()
|
636
|
+
assert isinstance(
|
637
|
+
dataset._session, Session
|
638
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
569
639
|
transform_kwargs = dict(
|
570
640
|
session=dataset._session,
|
571
641
|
dependencies=self._deps,
|
572
|
-
drop_input_cols
|
642
|
+
drop_input_cols=self._drop_input_cols,
|
573
643
|
expected_output_cols_type="float",
|
574
644
|
)
|
645
|
+
expected_output_cols = self._align_expected_output_names(
|
646
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
647
|
+
)
|
575
648
|
|
576
649
|
elif isinstance(dataset, pd.DataFrame):
|
577
|
-
transform_kwargs = dict(
|
578
|
-
snowpark_input_cols = self._snowpark_cols,
|
579
|
-
drop_input_cols = self._drop_input_cols
|
580
|
-
)
|
650
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
581
651
|
|
582
652
|
transform_handlers = ModelTransformerBuilder.build(
|
583
653
|
dataset=dataset,
|
@@ -589,7 +659,7 @@ class PolynomialFeatures(BaseTransformer):
|
|
589
659
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
590
660
|
inference_method=inference_method,
|
591
661
|
input_cols=self.input_cols,
|
592
|
-
expected_output_cols=
|
662
|
+
expected_output_cols=expected_output_cols,
|
593
663
|
**transform_kwargs
|
594
664
|
)
|
595
665
|
return output_df
|
@@ -619,29 +689,30 @@ class PolynomialFeatures(BaseTransformer):
|
|
619
689
|
Output dataset with log probability of the sample for each class in the model.
|
620
690
|
"""
|
621
691
|
super()._check_dataset_type(dataset)
|
622
|
-
inference_method="predict_log_proba"
|
692
|
+
inference_method = "predict_log_proba"
|
693
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
623
694
|
|
624
695
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
625
696
|
# are specific to the type of dataset used.
|
626
697
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
627
698
|
|
628
699
|
if isinstance(dataset, DataFrame):
|
629
|
-
self.
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
700
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
701
|
+
self._deps = self._get_dependencies()
|
702
|
+
assert isinstance(
|
703
|
+
dataset._session, Session
|
704
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
634
705
|
transform_kwargs = dict(
|
635
706
|
session=dataset._session,
|
636
707
|
dependencies=self._deps,
|
637
|
-
drop_input_cols
|
708
|
+
drop_input_cols=self._drop_input_cols,
|
638
709
|
expected_output_cols_type="float",
|
639
710
|
)
|
711
|
+
expected_output_cols = self._align_expected_output_names(
|
712
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
713
|
+
)
|
640
714
|
elif isinstance(dataset, pd.DataFrame):
|
641
|
-
transform_kwargs = dict(
|
642
|
-
snowpark_input_cols = self._snowpark_cols,
|
643
|
-
drop_input_cols = self._drop_input_cols
|
644
|
-
)
|
715
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
645
716
|
|
646
717
|
transform_handlers = ModelTransformerBuilder.build(
|
647
718
|
dataset=dataset,
|
@@ -654,7 +725,7 @@ class PolynomialFeatures(BaseTransformer):
|
|
654
725
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
655
726
|
inference_method=inference_method,
|
656
727
|
input_cols=self.input_cols,
|
657
|
-
expected_output_cols=
|
728
|
+
expected_output_cols=expected_output_cols,
|
658
729
|
**transform_kwargs
|
659
730
|
)
|
660
731
|
return output_df
|
@@ -680,30 +751,32 @@ class PolynomialFeatures(BaseTransformer):
|
|
680
751
|
Output dataset with results of the decision function for the samples in input dataset.
|
681
752
|
"""
|
682
753
|
super()._check_dataset_type(dataset)
|
683
|
-
inference_method="decision_function"
|
754
|
+
inference_method = "decision_function"
|
684
755
|
|
685
756
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
686
757
|
# are specific to the type of dataset used.
|
687
758
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
688
759
|
|
760
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
761
|
+
|
689
762
|
if isinstance(dataset, DataFrame):
|
690
|
-
self.
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
763
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
764
|
+
self._deps = self._get_dependencies()
|
765
|
+
assert isinstance(
|
766
|
+
dataset._session, Session
|
767
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
695
768
|
transform_kwargs = dict(
|
696
769
|
session=dataset._session,
|
697
770
|
dependencies=self._deps,
|
698
|
-
drop_input_cols
|
771
|
+
drop_input_cols=self._drop_input_cols,
|
699
772
|
expected_output_cols_type="float",
|
700
773
|
)
|
774
|
+
expected_output_cols = self._align_expected_output_names(
|
775
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
776
|
+
)
|
701
777
|
|
702
778
|
elif isinstance(dataset, pd.DataFrame):
|
703
|
-
transform_kwargs = dict(
|
704
|
-
snowpark_input_cols = self._snowpark_cols,
|
705
|
-
drop_input_cols = self._drop_input_cols
|
706
|
-
)
|
779
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
707
780
|
|
708
781
|
transform_handlers = ModelTransformerBuilder.build(
|
709
782
|
dataset=dataset,
|
@@ -716,7 +789,7 @@ class PolynomialFeatures(BaseTransformer):
|
|
716
789
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
717
790
|
inference_method=inference_method,
|
718
791
|
input_cols=self.input_cols,
|
719
|
-
expected_output_cols=
|
792
|
+
expected_output_cols=expected_output_cols,
|
720
793
|
**transform_kwargs
|
721
794
|
)
|
722
795
|
return output_df
|
@@ -745,17 +818,17 @@ class PolynomialFeatures(BaseTransformer):
|
|
745
818
|
Output dataset with probability of the sample for each class in the model.
|
746
819
|
"""
|
747
820
|
super()._check_dataset_type(dataset)
|
748
|
-
inference_method="score_samples"
|
821
|
+
inference_method = "score_samples"
|
749
822
|
|
750
823
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
751
824
|
# are specific to the type of dataset used.
|
752
825
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
753
826
|
|
827
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
828
|
+
|
754
829
|
if isinstance(dataset, DataFrame):
|
755
|
-
self.
|
756
|
-
|
757
|
-
inference_method=inference_method,
|
758
|
-
)
|
830
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
831
|
+
self._deps = self._get_dependencies()
|
759
832
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
760
833
|
transform_kwargs = dict(
|
761
834
|
session=dataset._session,
|
@@ -763,6 +836,9 @@ class PolynomialFeatures(BaseTransformer):
|
|
763
836
|
drop_input_cols = self._drop_input_cols,
|
764
837
|
expected_output_cols_type="float",
|
765
838
|
)
|
839
|
+
expected_output_cols = self._align_expected_output_names(
|
840
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
841
|
+
)
|
766
842
|
|
767
843
|
elif isinstance(dataset, pd.DataFrame):
|
768
844
|
transform_kwargs = dict(
|
@@ -781,7 +857,7 @@ class PolynomialFeatures(BaseTransformer):
|
|
781
857
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
782
858
|
inference_method=inference_method,
|
783
859
|
input_cols=self.input_cols,
|
784
|
-
expected_output_cols=
|
860
|
+
expected_output_cols=expected_output_cols,
|
785
861
|
**transform_kwargs
|
786
862
|
)
|
787
863
|
return output_df
|
@@ -814,17 +890,15 @@ class PolynomialFeatures(BaseTransformer):
|
|
814
890
|
transform_kwargs: ScoreKwargsTypedDict = dict()
|
815
891
|
|
816
892
|
if isinstance(dataset, DataFrame):
|
817
|
-
self.
|
818
|
-
|
819
|
-
inference_method="score",
|
820
|
-
)
|
893
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method="score")
|
894
|
+
self._deps = self._get_dependencies()
|
821
895
|
selected_cols = self._get_active_columns()
|
822
896
|
if len(selected_cols) > 0:
|
823
897
|
dataset = dataset.select(selected_cols)
|
824
898
|
assert isinstance(dataset._session, Session) # keep mypy happy
|
825
899
|
transform_kwargs = dict(
|
826
900
|
session=dataset._session,
|
827
|
-
dependencies=
|
901
|
+
dependencies=self._deps,
|
828
902
|
score_sproc_imports=['sklearn'],
|
829
903
|
)
|
830
904
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -889,11 +963,8 @@ class PolynomialFeatures(BaseTransformer):
|
|
889
963
|
|
890
964
|
if isinstance(dataset, DataFrame):
|
891
965
|
|
892
|
-
self.
|
893
|
-
|
894
|
-
inference_method=inference_method,
|
895
|
-
|
896
|
-
)
|
966
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
967
|
+
self._deps = self._get_dependencies()
|
897
968
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
898
969
|
transform_kwargs = dict(
|
899
970
|
session = dataset._session,
|
@@ -926,50 +997,84 @@ class PolynomialFeatures(BaseTransformer):
|
|
926
997
|
)
|
927
998
|
return output_df
|
928
999
|
|
1000
|
+
|
1001
|
+
|
1002
|
+
def to_sklearn(self) -> Any:
|
1003
|
+
"""Get sklearn.preprocessing.PolynomialFeatures object.
|
1004
|
+
"""
|
1005
|
+
if self._sklearn_object is None:
|
1006
|
+
self._sklearn_object = self._create_sklearn_object()
|
1007
|
+
return self._sklearn_object
|
1008
|
+
|
1009
|
+
def to_xgboost(self) -> Any:
|
1010
|
+
raise exceptions.SnowflakeMLException(
|
1011
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1012
|
+
original_exception=AttributeError(
|
1013
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1014
|
+
"to_xgboost()",
|
1015
|
+
"to_sklearn()"
|
1016
|
+
)
|
1017
|
+
),
|
1018
|
+
)
|
929
1019
|
|
930
|
-
def
|
1020
|
+
def to_lightgbm(self) -> Any:
|
1021
|
+
raise exceptions.SnowflakeMLException(
|
1022
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1023
|
+
original_exception=AttributeError(
|
1024
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1025
|
+
"to_lightgbm()",
|
1026
|
+
"to_sklearn()"
|
1027
|
+
)
|
1028
|
+
),
|
1029
|
+
)
|
1030
|
+
|
1031
|
+
def _get_dependencies(self) -> List[str]:
|
1032
|
+
return self._deps
|
1033
|
+
|
1034
|
+
|
1035
|
+
def _generate_model_signatures(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
931
1036
|
self._model_signature_dict = dict()
|
932
1037
|
|
933
1038
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
934
1039
|
|
935
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input"))
|
1040
|
+
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
936
1041
|
outputs: List[BaseFeatureSpec] = []
|
937
1042
|
if hasattr(self, "predict"):
|
938
1043
|
# keep mypy happy
|
939
|
-
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
1044
|
+
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
940
1045
|
# For classifier, the type of predict is the same as the type of label
|
941
|
-
if self._sklearn_object._estimator_type ==
|
942
|
-
|
1046
|
+
if self._sklearn_object._estimator_type == "classifier":
|
1047
|
+
# label columns is the desired type for output
|
943
1048
|
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
944
1049
|
# rename the output columns
|
945
1050
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
946
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
947
|
-
|
948
|
-
|
1051
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1052
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1053
|
+
)
|
949
1054
|
# For mixture models that use the density mixin, `predict` returns the argmax of the log prob.
|
950
1055
|
# For outlier models, returns -1 for outliers and 1 for inliers.
|
951
|
-
# Clusterer returns int64 cluster labels.
|
1056
|
+
# Clusterer returns int64 cluster labels.
|
952
1057
|
elif self._sklearn_object._estimator_type in ["DensityEstimator", "clusterer", "outlier_detector"]:
|
953
1058
|
outputs = [FeatureSpec(dtype=DataType.INT64, name=c) for c in self.output_cols]
|
954
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
955
|
-
|
956
|
-
|
957
|
-
|
1059
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1060
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1061
|
+
)
|
1062
|
+
|
958
1063
|
# For regressor, the type of predict is float64
|
959
|
-
elif self._sklearn_object._estimator_type ==
|
1064
|
+
elif self._sklearn_object._estimator_type == "regressor":
|
960
1065
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
961
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
962
|
-
|
963
|
-
|
964
|
-
|
1066
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1067
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1068
|
+
)
|
1069
|
+
|
965
1070
|
for prob_func in PROB_FUNCTIONS:
|
966
1071
|
if hasattr(self, prob_func):
|
967
1072
|
output_cols_prefix: str = f"{prob_func}_"
|
968
1073
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
969
1074
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
970
|
-
self._model_signature_dict[prob_func] = ModelSignature(
|
971
|
-
|
972
|
-
|
1075
|
+
self._model_signature_dict[prob_func] = ModelSignature(
|
1076
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1077
|
+
)
|
973
1078
|
|
974
1079
|
# Output signature names may still need to be renamed, since they were not created with `_infer_signature`.
|
975
1080
|
items = list(self._model_signature_dict.items())
|
@@ -982,10 +1087,10 @@ class PolynomialFeatures(BaseTransformer):
|
|
982
1087
|
"""Returns model signature of current class.
|
983
1088
|
|
984
1089
|
Raises:
|
985
|
-
|
1090
|
+
SnowflakeMLException: If estimator is not fitted, then model signature cannot be inferred
|
986
1091
|
|
987
1092
|
Returns:
|
988
|
-
Dict
|
1093
|
+
Dict with each method and its input output signature
|
989
1094
|
"""
|
990
1095
|
if self._model_signature_dict is None:
|
991
1096
|
raise exceptions.SnowflakeMLException(
|
@@ -993,35 +1098,3 @@ class PolynomialFeatures(BaseTransformer):
|
|
993
1098
|
original_exception=RuntimeError("Estimator not fitted before accessing property model_signatures!"),
|
994
1099
|
)
|
995
1100
|
return self._model_signature_dict
|
996
|
-
|
997
|
-
def to_sklearn(self) -> Any:
|
998
|
-
"""Get sklearn.preprocessing.PolynomialFeatures object.
|
999
|
-
"""
|
1000
|
-
if self._sklearn_object is None:
|
1001
|
-
self._sklearn_object = self._create_sklearn_object()
|
1002
|
-
return self._sklearn_object
|
1003
|
-
|
1004
|
-
def to_xgboost(self) -> Any:
|
1005
|
-
raise exceptions.SnowflakeMLException(
|
1006
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1007
|
-
original_exception=AttributeError(
|
1008
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1009
|
-
"to_xgboost()",
|
1010
|
-
"to_sklearn()"
|
1011
|
-
)
|
1012
|
-
),
|
1013
|
-
)
|
1014
|
-
|
1015
|
-
def to_lightgbm(self) -> Any:
|
1016
|
-
raise exceptions.SnowflakeMLException(
|
1017
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1018
|
-
original_exception=AttributeError(
|
1019
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1020
|
-
"to_lightgbm()",
|
1021
|
-
"to_sklearn()"
|
1022
|
-
)
|
1023
|
-
),
|
1024
|
-
)
|
1025
|
-
|
1026
|
-
def _get_dependencies(self) -> List[str]:
|
1027
|
-
return self._deps
|