snowflake-ml-python 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +77 -32
- snowflake/ml/_internal/exceptions/dataset_error_messages.py +5 -0
- snowflake/ml/_internal/exceptions/dataset_errors.py +24 -0
- snowflake/ml/_internal/exceptions/error_codes.py +3 -0
- snowflake/ml/_internal/lineage/data_source.py +10 -0
- snowflake/ml/_internal/lineage/dataset_dataframe.py +44 -0
- snowflake/ml/_internal/utils/identifier.py +3 -1
- snowflake/ml/_internal/utils/sql_identifier.py +2 -6
- snowflake/ml/dataset/__init__.py +10 -0
- snowflake/ml/dataset/dataset.py +454 -129
- snowflake/ml/dataset/dataset_factory.py +53 -0
- snowflake/ml/dataset/dataset_metadata.py +103 -0
- snowflake/ml/dataset/dataset_reader.py +202 -0
- snowflake/ml/feature_store/feature_store.py +531 -332
- snowflake/ml/feature_store/feature_view.py +40 -23
- snowflake/ml/fileset/embedded_stage_fs.py +146 -0
- snowflake/ml/fileset/sfcfs.py +56 -54
- snowflake/ml/fileset/snowfs.py +159 -0
- snowflake/ml/fileset/stage_fs.py +49 -17
- snowflake/ml/model/__init__.py +2 -2
- snowflake/ml/model/_api.py +16 -1
- snowflake/ml/model/_client/model/model_impl.py +27 -0
- snowflake/ml/model/_client/model/model_version_impl.py +137 -50
- snowflake/ml/model/_client/ops/model_ops.py +159 -40
- snowflake/ml/model/_client/sql/model.py +25 -2
- snowflake/ml/model/_client/sql/model_version.py +131 -2
- snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +5 -1
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +1 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +0 -5
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +21 -50
- snowflake/ml/model/_model_composer/model_composer.py +22 -1
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +38 -51
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +19 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +6 -10
- snowflake/ml/model/_packager/model_env/model_env.py +41 -0
- snowflake/ml/model/_packager/model_handlers/catboost.py +206 -0
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +218 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +3 -0
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_meta.py +37 -11
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +20 -1
- snowflake/ml/model/_packager/model_meta_migrator/migrator_plans.py +3 -1
- snowflake/ml/model/_packager/model_packager.py +2 -5
- snowflake/ml/model/{_model_composer/model_runtime/_runtime_requirements.py → _packager/model_runtime/_snowml_inference_alternative_requirements.py} +1 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +137 -0
- snowflake/ml/model/type_hints.py +21 -2
- snowflake/ml/modeling/_internal/estimator_utils.py +16 -11
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -1
- snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +55 -3
- snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +34 -18
- snowflake/ml/modeling/_internal/model_trainer.py +7 -0
- snowflake/ml/modeling/_internal/model_trainer_builder.py +42 -9
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +13 -14
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +29 -7
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +261 -16
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +246 -175
- snowflake/ml/modeling/cluster/affinity_propagation.py +246 -175
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +246 -175
- snowflake/ml/modeling/cluster/birch.py +248 -175
- snowflake/ml/modeling/cluster/bisecting_k_means.py +248 -175
- snowflake/ml/modeling/cluster/dbscan.py +246 -175
- snowflake/ml/modeling/cluster/feature_agglomeration.py +248 -175
- snowflake/ml/modeling/cluster/k_means.py +248 -175
- snowflake/ml/modeling/cluster/mean_shift.py +246 -175
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +248 -175
- snowflake/ml/modeling/cluster/optics.py +246 -175
- snowflake/ml/modeling/cluster/spectral_biclustering.py +246 -175
- snowflake/ml/modeling/cluster/spectral_clustering.py +246 -175
- snowflake/ml/modeling/cluster/spectral_coclustering.py +246 -175
- snowflake/ml/modeling/compose/column_transformer.py +248 -175
- snowflake/ml/modeling/compose/transformed_target_regressor.py +246 -175
- snowflake/ml/modeling/covariance/elliptic_envelope.py +246 -175
- snowflake/ml/modeling/covariance/empirical_covariance.py +246 -175
- snowflake/ml/modeling/covariance/graphical_lasso.py +246 -175
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +246 -175
- snowflake/ml/modeling/covariance/ledoit_wolf.py +246 -175
- snowflake/ml/modeling/covariance/min_cov_det.py +246 -175
- snowflake/ml/modeling/covariance/oas.py +246 -175
- snowflake/ml/modeling/covariance/shrunk_covariance.py +246 -175
- snowflake/ml/modeling/decomposition/dictionary_learning.py +248 -175
- snowflake/ml/modeling/decomposition/factor_analysis.py +248 -175
- snowflake/ml/modeling/decomposition/fast_ica.py +248 -175
- snowflake/ml/modeling/decomposition/incremental_pca.py +248 -175
- snowflake/ml/modeling/decomposition/kernel_pca.py +248 -175
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +248 -175
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +248 -175
- snowflake/ml/modeling/decomposition/pca.py +248 -175
- snowflake/ml/modeling/decomposition/sparse_pca.py +248 -175
- snowflake/ml/modeling/decomposition/truncated_svd.py +248 -175
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +248 -175
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +246 -175
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/bagging_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/bagging_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/isolation_forest.py +246 -175
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/stacking_regressor.py +248 -175
- snowflake/ml/modeling/ensemble/voting_classifier.py +248 -175
- snowflake/ml/modeling/ensemble/voting_regressor.py +248 -175
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fdr.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fpr.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fwe.py +248 -175
- snowflake/ml/modeling/feature_selection/select_k_best.py +248 -175
- snowflake/ml/modeling/feature_selection/select_percentile.py +248 -175
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +248 -175
- snowflake/ml/modeling/feature_selection/variance_threshold.py +248 -175
- snowflake/ml/modeling/framework/_utils.py +8 -1
- snowflake/ml/modeling/framework/base.py +72 -37
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +246 -175
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +246 -175
- snowflake/ml/modeling/impute/iterative_imputer.py +248 -175
- snowflake/ml/modeling/impute/knn_imputer.py +248 -175
- snowflake/ml/modeling/impute/missing_indicator.py +248 -175
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +248 -175
- snowflake/ml/modeling/kernel_approximation/nystroem.py +248 -175
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +248 -175
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +248 -175
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +248 -175
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +246 -175
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +246 -175
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ard_regression.py +246 -175
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +246 -175
- snowflake/ml/modeling/linear_model/elastic_net.py +246 -175
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +246 -175
- snowflake/ml/modeling/linear_model/gamma_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/huber_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/lars.py +246 -175
- snowflake/ml/modeling/linear_model/lars_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +246 -175
- snowflake/ml/modeling/linear_model/linear_regression.py +246 -175
- snowflake/ml/modeling/linear_model/logistic_regression.py +246 -175
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +246 -175
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +246 -175
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/perceptron.py +246 -175
- snowflake/ml/modeling/linear_model/poisson_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ransac_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ridge.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_cv.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +246 -175
- snowflake/ml/modeling/manifold/isomap.py +248 -175
- snowflake/ml/modeling/manifold/mds.py +248 -175
- snowflake/ml/modeling/manifold/spectral_embedding.py +248 -175
- snowflake/ml/modeling/manifold/tsne.py +248 -175
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +246 -175
- snowflake/ml/modeling/mixture/gaussian_mixture.py +246 -175
- snowflake/ml/modeling/model_selection/grid_search_cv.py +63 -41
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +80 -38
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +246 -175
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +246 -175
- snowflake/ml/modeling/multiclass/output_code_classifier.py +246 -175
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/complement_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +246 -175
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +246 -175
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +246 -175
- snowflake/ml/modeling/neighbors/kernel_density.py +246 -175
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +246 -175
- snowflake/ml/modeling/neighbors/nearest_centroid.py +246 -175
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +246 -175
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +248 -175
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +246 -175
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +246 -175
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +248 -175
- snowflake/ml/modeling/neural_network/mlp_classifier.py +246 -175
- snowflake/ml/modeling/neural_network/mlp_regressor.py +246 -175
- snowflake/ml/modeling/pipeline/pipeline.py +517 -35
- snowflake/ml/modeling/preprocessing/binarizer.py +1 -5
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +1 -5
- snowflake/ml/modeling/preprocessing/label_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +10 -12
- snowflake/ml/modeling/preprocessing/normalizer.py +1 -5
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +13 -5
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/polynomial_features.py +248 -175
- snowflake/ml/modeling/preprocessing/robust_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/standard_scaler.py +11 -11
- snowflake/ml/modeling/semi_supervised/label_propagation.py +246 -175
- snowflake/ml/modeling/semi_supervised/label_spreading.py +246 -175
- snowflake/ml/modeling/svm/linear_svc.py +246 -175
- snowflake/ml/modeling/svm/linear_svr.py +246 -175
- snowflake/ml/modeling/svm/nu_svc.py +246 -175
- snowflake/ml/modeling/svm/nu_svr.py +246 -175
- snowflake/ml/modeling/svm/svc.py +246 -175
- snowflake/ml/modeling/svm/svr.py +246 -175
- snowflake/ml/modeling/tree/decision_tree_classifier.py +246 -175
- snowflake/ml/modeling/tree/decision_tree_regressor.py +246 -175
- snowflake/ml/modeling/tree/extra_tree_classifier.py +246 -175
- snowflake/ml/modeling/tree/extra_tree_regressor.py +246 -175
- snowflake/ml/modeling/xgboost/xgb_classifier.py +246 -175
- snowflake/ml/modeling/xgboost/xgb_regressor.py +246 -175
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +246 -175
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +246 -175
- snowflake/ml/registry/model_registry.py +3 -149
- snowflake/ml/registry/registry.py +1 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/METADATA +129 -57
- snowflake_ml_python-1.5.0.dist-info/RECORD +380 -0
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -97
- snowflake/ml/registry/_artifact_manager.py +0 -156
- snowflake/ml/registry/artifact.py +0 -46
- snowflake_ml_python-1.4.0.dist-info/RECORD +0 -370
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/top_level.txt +0 -0
@@ -33,6 +33,15 @@ from snowflake.ml.modeling._internal.transformer_protocols import (
|
|
33
33
|
BatchInferenceKwargsTypedDict,
|
34
34
|
ScoreKwargsTypedDict
|
35
35
|
)
|
36
|
+
from snowflake.ml.model._signatures import utils as model_signature_utils
|
37
|
+
from snowflake.ml.model.model_signature import (
|
38
|
+
BaseFeatureSpec,
|
39
|
+
DataType,
|
40
|
+
FeatureSpec,
|
41
|
+
ModelSignature,
|
42
|
+
_infer_signature,
|
43
|
+
_rename_signature_with_snowflake_identifiers,
|
44
|
+
)
|
36
45
|
|
37
46
|
from snowflake.ml.modeling._internal.model_transformer_builder import ModelTransformerBuilder
|
38
47
|
|
@@ -43,16 +52,6 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
43
52
|
validate_sklearn_args,
|
44
53
|
)
|
45
54
|
|
46
|
-
from snowflake.ml.model.model_signature import (
|
47
|
-
DataType,
|
48
|
-
FeatureSpec,
|
49
|
-
ModelSignature,
|
50
|
-
_infer_signature,
|
51
|
-
_rename_signature_with_snowflake_identifiers,
|
52
|
-
BaseFeatureSpec,
|
53
|
-
)
|
54
|
-
from snowflake.ml.model._signatures import utils as model_signature_utils
|
55
|
-
|
56
55
|
_PROJECT = "ModelDevelopment"
|
57
56
|
# Derive subproject from module name by removing "sklearn"
|
58
57
|
# and converting module name from underscore to CamelCase
|
@@ -61,12 +60,6 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.ensemble".replace("sklea
|
|
61
60
|
|
62
61
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
63
62
|
|
64
|
-
def _is_fit_transform_method_enabled() -> Callable[[Any], bool]:
|
65
|
-
def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
|
66
|
-
return False and callable(getattr(self._sklearn_object, "fit_transform", None))
|
67
|
-
return check
|
68
|
-
|
69
|
-
|
70
63
|
class StackingRegressor(BaseTransformer):
|
71
64
|
r"""Stack of estimators with a final regressor
|
72
65
|
For more details on this class, see [sklearn.ensemble.StackingRegressor]
|
@@ -255,12 +248,7 @@ class StackingRegressor(BaseTransformer):
|
|
255
248
|
)
|
256
249
|
return selected_cols
|
257
250
|
|
258
|
-
|
259
|
-
project=_PROJECT,
|
260
|
-
subproject=_SUBPROJECT,
|
261
|
-
custom_tags=dict([("autogen", True)]),
|
262
|
-
)
|
263
|
-
def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "StackingRegressor":
|
251
|
+
def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "StackingRegressor":
|
264
252
|
"""Fit the estimators
|
265
253
|
For more details on this function, see [sklearn.ensemble.StackingRegressor.fit]
|
266
254
|
(https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.StackingRegressor.html#sklearn.ensemble.StackingRegressor.fit)
|
@@ -287,12 +275,14 @@ class StackingRegressor(BaseTransformer):
|
|
287
275
|
|
288
276
|
self._snowpark_cols = dataset.select(self.input_cols).columns
|
289
277
|
|
290
|
-
|
278
|
+
# If we are already in a stored procedure, no need to kick off another one.
|
291
279
|
if SNOWML_SPROC_ENV in os.environ:
|
292
280
|
statement_params = telemetry.get_function_usage_statement_params(
|
293
281
|
project=_PROJECT,
|
294
282
|
subproject=_SUBPROJECT,
|
295
|
-
function_name=telemetry.get_statement_params_full_func_name(
|
283
|
+
function_name=telemetry.get_statement_params_full_func_name(
|
284
|
+
inspect.currentframe(), StackingRegressor.__class__.__name__
|
285
|
+
),
|
296
286
|
api_calls=[Session.call],
|
297
287
|
custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
|
298
288
|
)
|
@@ -313,27 +303,24 @@ class StackingRegressor(BaseTransformer):
|
|
313
303
|
)
|
314
304
|
self._sklearn_object = model_trainer.train()
|
315
305
|
self._is_fitted = True
|
316
|
-
self.
|
306
|
+
self._generate_model_signatures(dataset)
|
317
307
|
return self
|
318
308
|
|
319
309
|
def _batch_inference_validate_snowpark(
|
320
310
|
self,
|
321
311
|
dataset: DataFrame,
|
322
312
|
inference_method: str,
|
323
|
-
) ->
|
324
|
-
"""Util method to run validate that batch inference can be run on a snowpark dataframe
|
325
|
-
return the available package that exists in the snowflake anaconda channel
|
313
|
+
) -> None:
|
314
|
+
"""Util method to run validate that batch inference can be run on a snowpark dataframe.
|
326
315
|
|
327
316
|
Args:
|
328
317
|
dataset: snowpark dataframe
|
329
318
|
inference_method: the inference method such as predict, score...
|
330
|
-
|
319
|
+
|
331
320
|
Raises:
|
332
321
|
SnowflakeMLException: If the estimator is not fitted, raise error
|
333
322
|
SnowflakeMLException: If the session is None, raise error
|
334
323
|
|
335
|
-
Returns:
|
336
|
-
A list of available package that exists in the snowflake anaconda channel
|
337
324
|
"""
|
338
325
|
if not self._is_fitted:
|
339
326
|
raise exceptions.SnowflakeMLException(
|
@@ -351,9 +338,7 @@ class StackingRegressor(BaseTransformer):
|
|
351
338
|
"Session must not specified for snowpark dataset."
|
352
339
|
),
|
353
340
|
)
|
354
|
-
|
355
|
-
return pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
|
356
|
-
pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT)
|
341
|
+
|
357
342
|
|
358
343
|
@available_if(original_estimator_has_callable("predict")) # type: ignore[misc]
|
359
344
|
@telemetry.send_api_usage_telemetry(
|
@@ -389,7 +374,9 @@ class StackingRegressor(BaseTransformer):
|
|
389
374
|
# when it is classifier, infer the datatype from label columns
|
390
375
|
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
391
376
|
# Batch inference takes a single expected output column type. Use the first columns type for now.
|
392
|
-
label_cols_signatures = [
|
377
|
+
label_cols_signatures = [
|
378
|
+
row for row in self.model_signatures['predict'].outputs if row.name in self.output_cols
|
379
|
+
]
|
393
380
|
if len(label_cols_signatures) == 0:
|
394
381
|
error_str = f"Output columns {self.output_cols} do not match model signatures {self.model_signatures['predict'].outputs}."
|
395
382
|
raise exceptions.SnowflakeMLException(
|
@@ -397,25 +384,23 @@ class StackingRegressor(BaseTransformer):
|
|
397
384
|
original_exception=ValueError(error_str),
|
398
385
|
)
|
399
386
|
|
400
|
-
expected_type_inferred = convert_sp_to_sf_type(
|
401
|
-
label_cols_signatures[0].as_snowpark_type()
|
402
|
-
)
|
387
|
+
expected_type_inferred = convert_sp_to_sf_type(label_cols_signatures[0].as_snowpark_type())
|
403
388
|
|
404
|
-
self.
|
405
|
-
|
389
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
390
|
+
self._deps = self._get_dependencies()
|
391
|
+
assert isinstance(
|
392
|
+
dataset._session, Session
|
393
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
406
394
|
|
407
395
|
transform_kwargs = dict(
|
408
|
-
session
|
409
|
-
dependencies
|
410
|
-
drop_input_cols
|
411
|
-
expected_output_cols_type
|
396
|
+
session=dataset._session,
|
397
|
+
dependencies=self._deps,
|
398
|
+
drop_input_cols=self._drop_input_cols,
|
399
|
+
expected_output_cols_type=expected_type_inferred,
|
412
400
|
)
|
413
401
|
|
414
402
|
elif isinstance(dataset, pd.DataFrame):
|
415
|
-
transform_kwargs = dict(
|
416
|
-
snowpark_input_cols = self._snowpark_cols,
|
417
|
-
drop_input_cols = self._drop_input_cols
|
418
|
-
)
|
403
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
419
404
|
|
420
405
|
transform_handlers = ModelTransformerBuilder.build(
|
421
406
|
dataset=dataset,
|
@@ -457,7 +442,7 @@ class StackingRegressor(BaseTransformer):
|
|
457
442
|
Transformed dataset.
|
458
443
|
"""
|
459
444
|
super()._check_dataset_type(dataset)
|
460
|
-
inference_method="transform"
|
445
|
+
inference_method = "transform"
|
461
446
|
|
462
447
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
463
448
|
# are specific to the type of dataset used.
|
@@ -487,24 +472,19 @@ class StackingRegressor(BaseTransformer):
|
|
487
472
|
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
488
473
|
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
489
474
|
|
490
|
-
self.
|
491
|
-
|
492
|
-
inference_method=inference_method,
|
493
|
-
)
|
475
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
476
|
+
self._deps = self._get_dependencies()
|
494
477
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
495
478
|
|
496
479
|
transform_kwargs = dict(
|
497
|
-
session
|
498
|
-
dependencies
|
499
|
-
drop_input_cols
|
500
|
-
expected_output_cols_type
|
480
|
+
session=dataset._session,
|
481
|
+
dependencies=self._deps,
|
482
|
+
drop_input_cols=self._drop_input_cols,
|
483
|
+
expected_output_cols_type=expected_dtype,
|
501
484
|
)
|
502
485
|
|
503
486
|
elif isinstance(dataset, pd.DataFrame):
|
504
|
-
transform_kwargs = dict(
|
505
|
-
snowpark_input_cols = self._snowpark_cols,
|
506
|
-
drop_input_cols = self._drop_input_cols
|
507
|
-
)
|
487
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
508
488
|
|
509
489
|
transform_handlers = ModelTransformerBuilder.build(
|
510
490
|
dataset=dataset,
|
@@ -523,7 +503,11 @@ class StackingRegressor(BaseTransformer):
|
|
523
503
|
return output_df
|
524
504
|
|
525
505
|
@available_if(original_estimator_has_callable("fit_predict")) # type: ignore[misc]
|
526
|
-
def fit_predict(
|
506
|
+
def fit_predict(
|
507
|
+
self,
|
508
|
+
dataset: Union[DataFrame, pd.DataFrame],
|
509
|
+
output_cols_prefix: str = "fit_predict_",
|
510
|
+
) -> Union[DataFrame, pd.DataFrame]:
|
527
511
|
""" Method not supported for this class.
|
528
512
|
|
529
513
|
|
@@ -548,22 +532,106 @@ class StackingRegressor(BaseTransformer):
|
|
548
532
|
)
|
549
533
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
550
534
|
drop_input_cols=self._drop_input_cols,
|
551
|
-
expected_output_cols_list=
|
535
|
+
expected_output_cols_list=(
|
536
|
+
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
537
|
+
),
|
552
538
|
)
|
553
539
|
self._sklearn_object = fitted_estimator
|
554
540
|
self._is_fitted = True
|
555
541
|
return output_result
|
556
542
|
|
543
|
+
|
544
|
+
@available_if(original_estimator_has_callable("fit_transform")) # type: ignore[misc]
|
545
|
+
def fit_transform(self, dataset: Union[DataFrame, pd.DataFrame], output_cols_prefix: str = "fit_transform_",) -> Union[DataFrame, pd.DataFrame]:
|
546
|
+
""" Fit the estimators and return the predictions for X for each estimator
|
547
|
+
For more details on this function, see [sklearn.ensemble.StackingRegressor.fit_transform]
|
548
|
+
(https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.StackingRegressor.html#sklearn.ensemble.StackingRegressor.fit_transform)
|
549
|
+
|
550
|
+
|
551
|
+
Raises:
|
552
|
+
TypeError: Supported dataset types: snowpark.DataFrame, pandas.DataFrame.
|
557
553
|
|
558
|
-
|
559
|
-
|
560
|
-
|
554
|
+
Args:
|
555
|
+
dataset: Union[snowflake.snowpark.DataFrame, pandas.DataFrame]
|
556
|
+
Snowpark or Pandas DataFrame.
|
557
|
+
output_cols_prefix: Prefix for the response columns
|
561
558
|
Returns:
|
562
559
|
Transformed dataset.
|
563
560
|
"""
|
564
|
-
self.
|
565
|
-
|
566
|
-
|
561
|
+
self._infer_input_output_cols(dataset)
|
562
|
+
super()._check_dataset_type(dataset)
|
563
|
+
model_trainer = ModelTrainerBuilder.build_fit_transform(
|
564
|
+
estimator=self._sklearn_object,
|
565
|
+
dataset=dataset,
|
566
|
+
input_cols=self.input_cols,
|
567
|
+
label_cols=self.label_cols,
|
568
|
+
sample_weight_col=self.sample_weight_col,
|
569
|
+
autogenerated=self._autogenerated,
|
570
|
+
subproject=_SUBPROJECT,
|
571
|
+
)
|
572
|
+
output_result, fitted_estimator = model_trainer.train_fit_transform(
|
573
|
+
drop_input_cols=self._drop_input_cols,
|
574
|
+
expected_output_cols_list=self.output_cols,
|
575
|
+
)
|
576
|
+
self._sklearn_object = fitted_estimator
|
577
|
+
self._is_fitted = True
|
578
|
+
return output_result
|
579
|
+
|
580
|
+
|
581
|
+
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
582
|
+
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
583
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
584
|
+
"""
|
585
|
+
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
586
|
+
# The following condition is introduced for kneighbors methods, and not used in other methods
|
587
|
+
if output_cols:
|
588
|
+
output_cols = [
|
589
|
+
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
590
|
+
for c in output_cols
|
591
|
+
]
|
592
|
+
elif getattr(self._sklearn_object, "classes_", None) is None:
|
593
|
+
output_cols = [output_cols_prefix]
|
594
|
+
elif self._sklearn_object is not None:
|
595
|
+
classes = self._sklearn_object.classes_
|
596
|
+
if isinstance(classes, numpy.ndarray):
|
597
|
+
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
598
|
+
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
599
|
+
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
600
|
+
output_cols = []
|
601
|
+
for i, cl in enumerate(classes):
|
602
|
+
# For binary classification, there is only one output column for each class
|
603
|
+
# ndarray as the two classes are complementary.
|
604
|
+
if len(cl) == 2:
|
605
|
+
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
606
|
+
else:
|
607
|
+
output_cols.extend([
|
608
|
+
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
609
|
+
])
|
610
|
+
else:
|
611
|
+
output_cols = []
|
612
|
+
|
613
|
+
# Make sure column names are valid snowflake identifiers.
|
614
|
+
assert output_cols is not None # Make MyPy happy
|
615
|
+
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
616
|
+
|
617
|
+
return rv
|
618
|
+
|
619
|
+
def _align_expected_output_names(
|
620
|
+
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
|
621
|
+
) -> List[str]:
|
622
|
+
# in case the inferred output column names dimension is different
|
623
|
+
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
624
|
+
output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
|
625
|
+
output_df_columns = list(output_df_pd.columns)
|
626
|
+
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
627
|
+
if self.sample_weight_col:
|
628
|
+
output_df_columns_set -= set(self.sample_weight_col)
|
629
|
+
# if the dimension of inferred output column names is correct; use it
|
630
|
+
if len(expected_output_cols_list) == len(output_df_columns_set):
|
631
|
+
return expected_output_cols_list
|
632
|
+
# otherwise, use the sklearn estimator's output
|
633
|
+
else:
|
634
|
+
return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
|
567
635
|
|
568
636
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
569
637
|
@telemetry.send_api_usage_telemetry(
|
@@ -595,24 +663,26 @@ class StackingRegressor(BaseTransformer):
|
|
595
663
|
# are specific to the type of dataset used.
|
596
664
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
597
665
|
|
666
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
667
|
+
|
598
668
|
if isinstance(dataset, DataFrame):
|
599
|
-
self.
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
669
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
670
|
+
self._deps = self._get_dependencies()
|
671
|
+
assert isinstance(
|
672
|
+
dataset._session, Session
|
673
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
604
674
|
transform_kwargs = dict(
|
605
675
|
session=dataset._session,
|
606
676
|
dependencies=self._deps,
|
607
|
-
drop_input_cols
|
677
|
+
drop_input_cols=self._drop_input_cols,
|
608
678
|
expected_output_cols_type="float",
|
609
679
|
)
|
680
|
+
expected_output_cols = self._align_expected_output_names(
|
681
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
682
|
+
)
|
610
683
|
|
611
684
|
elif isinstance(dataset, pd.DataFrame):
|
612
|
-
transform_kwargs = dict(
|
613
|
-
snowpark_input_cols = self._snowpark_cols,
|
614
|
-
drop_input_cols = self._drop_input_cols
|
615
|
-
)
|
685
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
616
686
|
|
617
687
|
transform_handlers = ModelTransformerBuilder.build(
|
618
688
|
dataset=dataset,
|
@@ -624,7 +694,7 @@ class StackingRegressor(BaseTransformer):
|
|
624
694
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
625
695
|
inference_method=inference_method,
|
626
696
|
input_cols=self.input_cols,
|
627
|
-
expected_output_cols=
|
697
|
+
expected_output_cols=expected_output_cols,
|
628
698
|
**transform_kwargs
|
629
699
|
)
|
630
700
|
return output_df
|
@@ -654,29 +724,30 @@ class StackingRegressor(BaseTransformer):
|
|
654
724
|
Output dataset with log probability of the sample for each class in the model.
|
655
725
|
"""
|
656
726
|
super()._check_dataset_type(dataset)
|
657
|
-
inference_method="predict_log_proba"
|
727
|
+
inference_method = "predict_log_proba"
|
728
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
658
729
|
|
659
730
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
660
731
|
# are specific to the type of dataset used.
|
661
732
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
662
733
|
|
663
734
|
if isinstance(dataset, DataFrame):
|
664
|
-
self.
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
|
735
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
736
|
+
self._deps = self._get_dependencies()
|
737
|
+
assert isinstance(
|
738
|
+
dataset._session, Session
|
739
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
669
740
|
transform_kwargs = dict(
|
670
741
|
session=dataset._session,
|
671
742
|
dependencies=self._deps,
|
672
|
-
drop_input_cols
|
743
|
+
drop_input_cols=self._drop_input_cols,
|
673
744
|
expected_output_cols_type="float",
|
674
745
|
)
|
746
|
+
expected_output_cols = self._align_expected_output_names(
|
747
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
748
|
+
)
|
675
749
|
elif isinstance(dataset, pd.DataFrame):
|
676
|
-
transform_kwargs = dict(
|
677
|
-
snowpark_input_cols = self._snowpark_cols,
|
678
|
-
drop_input_cols = self._drop_input_cols
|
679
|
-
)
|
750
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
680
751
|
|
681
752
|
transform_handlers = ModelTransformerBuilder.build(
|
682
753
|
dataset=dataset,
|
@@ -689,7 +760,7 @@ class StackingRegressor(BaseTransformer):
|
|
689
760
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
690
761
|
inference_method=inference_method,
|
691
762
|
input_cols=self.input_cols,
|
692
|
-
expected_output_cols=
|
763
|
+
expected_output_cols=expected_output_cols,
|
693
764
|
**transform_kwargs
|
694
765
|
)
|
695
766
|
return output_df
|
@@ -715,30 +786,32 @@ class StackingRegressor(BaseTransformer):
|
|
715
786
|
Output dataset with results of the decision function for the samples in input dataset.
|
716
787
|
"""
|
717
788
|
super()._check_dataset_type(dataset)
|
718
|
-
inference_method="decision_function"
|
789
|
+
inference_method = "decision_function"
|
719
790
|
|
720
791
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
721
792
|
# are specific to the type of dataset used.
|
722
793
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
723
794
|
|
795
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
796
|
+
|
724
797
|
if isinstance(dataset, DataFrame):
|
725
|
-
self.
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
798
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
799
|
+
self._deps = self._get_dependencies()
|
800
|
+
assert isinstance(
|
801
|
+
dataset._session, Session
|
802
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
730
803
|
transform_kwargs = dict(
|
731
804
|
session=dataset._session,
|
732
805
|
dependencies=self._deps,
|
733
|
-
drop_input_cols
|
806
|
+
drop_input_cols=self._drop_input_cols,
|
734
807
|
expected_output_cols_type="float",
|
735
808
|
)
|
809
|
+
expected_output_cols = self._align_expected_output_names(
|
810
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
811
|
+
)
|
736
812
|
|
737
813
|
elif isinstance(dataset, pd.DataFrame):
|
738
|
-
transform_kwargs = dict(
|
739
|
-
snowpark_input_cols = self._snowpark_cols,
|
740
|
-
drop_input_cols = self._drop_input_cols
|
741
|
-
)
|
814
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
742
815
|
|
743
816
|
transform_handlers = ModelTransformerBuilder.build(
|
744
817
|
dataset=dataset,
|
@@ -751,7 +824,7 @@ class StackingRegressor(BaseTransformer):
|
|
751
824
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
752
825
|
inference_method=inference_method,
|
753
826
|
input_cols=self.input_cols,
|
754
|
-
expected_output_cols=
|
827
|
+
expected_output_cols=expected_output_cols,
|
755
828
|
**transform_kwargs
|
756
829
|
)
|
757
830
|
return output_df
|
@@ -780,17 +853,17 @@ class StackingRegressor(BaseTransformer):
|
|
780
853
|
Output dataset with probability of the sample for each class in the model.
|
781
854
|
"""
|
782
855
|
super()._check_dataset_type(dataset)
|
783
|
-
inference_method="score_samples"
|
856
|
+
inference_method = "score_samples"
|
784
857
|
|
785
858
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
786
859
|
# are specific to the type of dataset used.
|
787
860
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
788
861
|
|
862
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
863
|
+
|
789
864
|
if isinstance(dataset, DataFrame):
|
790
|
-
self.
|
791
|
-
|
792
|
-
inference_method=inference_method,
|
793
|
-
)
|
865
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
866
|
+
self._deps = self._get_dependencies()
|
794
867
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
795
868
|
transform_kwargs = dict(
|
796
869
|
session=dataset._session,
|
@@ -798,6 +871,9 @@ class StackingRegressor(BaseTransformer):
|
|
798
871
|
drop_input_cols = self._drop_input_cols,
|
799
872
|
expected_output_cols_type="float",
|
800
873
|
)
|
874
|
+
expected_output_cols = self._align_expected_output_names(
|
875
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
876
|
+
)
|
801
877
|
|
802
878
|
elif isinstance(dataset, pd.DataFrame):
|
803
879
|
transform_kwargs = dict(
|
@@ -816,7 +892,7 @@ class StackingRegressor(BaseTransformer):
|
|
816
892
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
817
893
|
inference_method=inference_method,
|
818
894
|
input_cols=self.input_cols,
|
819
|
-
expected_output_cols=
|
895
|
+
expected_output_cols=expected_output_cols,
|
820
896
|
**transform_kwargs
|
821
897
|
)
|
822
898
|
return output_df
|
@@ -851,17 +927,15 @@ class StackingRegressor(BaseTransformer):
|
|
851
927
|
transform_kwargs: ScoreKwargsTypedDict = dict()
|
852
928
|
|
853
929
|
if isinstance(dataset, DataFrame):
|
854
|
-
self.
|
855
|
-
|
856
|
-
inference_method="score",
|
857
|
-
)
|
930
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method="score")
|
931
|
+
self._deps = self._get_dependencies()
|
858
932
|
selected_cols = self._get_active_columns()
|
859
933
|
if len(selected_cols) > 0:
|
860
934
|
dataset = dataset.select(selected_cols)
|
861
935
|
assert isinstance(dataset._session, Session) # keep mypy happy
|
862
936
|
transform_kwargs = dict(
|
863
937
|
session=dataset._session,
|
864
|
-
dependencies=
|
938
|
+
dependencies=self._deps,
|
865
939
|
score_sproc_imports=['sklearn'],
|
866
940
|
)
|
867
941
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -926,11 +1000,8 @@ class StackingRegressor(BaseTransformer):
|
|
926
1000
|
|
927
1001
|
if isinstance(dataset, DataFrame):
|
928
1002
|
|
929
|
-
self.
|
930
|
-
|
931
|
-
inference_method=inference_method,
|
932
|
-
|
933
|
-
)
|
1003
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
1004
|
+
self._deps = self._get_dependencies()
|
934
1005
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
935
1006
|
transform_kwargs = dict(
|
936
1007
|
session = dataset._session,
|
@@ -963,50 +1034,84 @@ class StackingRegressor(BaseTransformer):
|
|
963
1034
|
)
|
964
1035
|
return output_df
|
965
1036
|
|
1037
|
+
|
1038
|
+
|
1039
|
+
def to_sklearn(self) -> Any:
|
1040
|
+
"""Get sklearn.ensemble.StackingRegressor object.
|
1041
|
+
"""
|
1042
|
+
if self._sklearn_object is None:
|
1043
|
+
self._sklearn_object = self._create_sklearn_object()
|
1044
|
+
return self._sklearn_object
|
1045
|
+
|
1046
|
+
def to_xgboost(self) -> Any:
|
1047
|
+
raise exceptions.SnowflakeMLException(
|
1048
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1049
|
+
original_exception=AttributeError(
|
1050
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1051
|
+
"to_xgboost()",
|
1052
|
+
"to_sklearn()"
|
1053
|
+
)
|
1054
|
+
),
|
1055
|
+
)
|
966
1056
|
|
967
|
-
def
|
1057
|
+
def to_lightgbm(self) -> Any:
|
1058
|
+
raise exceptions.SnowflakeMLException(
|
1059
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1060
|
+
original_exception=AttributeError(
|
1061
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1062
|
+
"to_lightgbm()",
|
1063
|
+
"to_sklearn()"
|
1064
|
+
)
|
1065
|
+
),
|
1066
|
+
)
|
1067
|
+
|
1068
|
+
def _get_dependencies(self) -> List[str]:
|
1069
|
+
return self._deps
|
1070
|
+
|
1071
|
+
|
1072
|
+
def _generate_model_signatures(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
968
1073
|
self._model_signature_dict = dict()
|
969
1074
|
|
970
1075
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
971
1076
|
|
972
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input"))
|
1077
|
+
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
973
1078
|
outputs: List[BaseFeatureSpec] = []
|
974
1079
|
if hasattr(self, "predict"):
|
975
1080
|
# keep mypy happy
|
976
|
-
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
1081
|
+
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
977
1082
|
# For classifier, the type of predict is the same as the type of label
|
978
|
-
if self._sklearn_object._estimator_type ==
|
979
|
-
|
1083
|
+
if self._sklearn_object._estimator_type == "classifier":
|
1084
|
+
# label columns is the desired type for output
|
980
1085
|
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
981
1086
|
# rename the output columns
|
982
1087
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
983
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
984
|
-
|
985
|
-
|
1088
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1089
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1090
|
+
)
|
986
1091
|
# For mixture models that use the density mixin, `predict` returns the argmax of the log prob.
|
987
1092
|
# For outlier models, returns -1 for outliers and 1 for inliers.
|
988
|
-
# Clusterer returns int64 cluster labels.
|
1093
|
+
# Clusterer returns int64 cluster labels.
|
989
1094
|
elif self._sklearn_object._estimator_type in ["DensityEstimator", "clusterer", "outlier_detector"]:
|
990
1095
|
outputs = [FeatureSpec(dtype=DataType.INT64, name=c) for c in self.output_cols]
|
991
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
992
|
-
|
993
|
-
|
994
|
-
|
1096
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1097
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1098
|
+
)
|
1099
|
+
|
995
1100
|
# For regressor, the type of predict is float64
|
996
|
-
elif self._sklearn_object._estimator_type ==
|
1101
|
+
elif self._sklearn_object._estimator_type == "regressor":
|
997
1102
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
998
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
999
|
-
|
1000
|
-
|
1001
|
-
|
1103
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1104
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1105
|
+
)
|
1106
|
+
|
1002
1107
|
for prob_func in PROB_FUNCTIONS:
|
1003
1108
|
if hasattr(self, prob_func):
|
1004
1109
|
output_cols_prefix: str = f"{prob_func}_"
|
1005
1110
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1006
1111
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1007
|
-
self._model_signature_dict[prob_func] = ModelSignature(
|
1008
|
-
|
1009
|
-
|
1112
|
+
self._model_signature_dict[prob_func] = ModelSignature(
|
1113
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1114
|
+
)
|
1010
1115
|
|
1011
1116
|
# Output signature names may still need to be renamed, since they were not created with `_infer_signature`.
|
1012
1117
|
items = list(self._model_signature_dict.items())
|
@@ -1019,10 +1124,10 @@ class StackingRegressor(BaseTransformer):
|
|
1019
1124
|
"""Returns model signature of current class.
|
1020
1125
|
|
1021
1126
|
Raises:
|
1022
|
-
|
1127
|
+
SnowflakeMLException: If estimator is not fitted, then model signature cannot be inferred
|
1023
1128
|
|
1024
1129
|
Returns:
|
1025
|
-
Dict
|
1130
|
+
Dict with each method and its input output signature
|
1026
1131
|
"""
|
1027
1132
|
if self._model_signature_dict is None:
|
1028
1133
|
raise exceptions.SnowflakeMLException(
|
@@ -1030,35 +1135,3 @@ class StackingRegressor(BaseTransformer):
|
|
1030
1135
|
original_exception=RuntimeError("Estimator not fitted before accessing property model_signatures!"),
|
1031
1136
|
)
|
1032
1137
|
return self._model_signature_dict
|
1033
|
-
|
1034
|
-
def to_sklearn(self) -> Any:
|
1035
|
-
"""Get sklearn.ensemble.StackingRegressor object.
|
1036
|
-
"""
|
1037
|
-
if self._sklearn_object is None:
|
1038
|
-
self._sklearn_object = self._create_sklearn_object()
|
1039
|
-
return self._sklearn_object
|
1040
|
-
|
1041
|
-
def to_xgboost(self) -> Any:
|
1042
|
-
raise exceptions.SnowflakeMLException(
|
1043
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1044
|
-
original_exception=AttributeError(
|
1045
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1046
|
-
"to_xgboost()",
|
1047
|
-
"to_sklearn()"
|
1048
|
-
)
|
1049
|
-
),
|
1050
|
-
)
|
1051
|
-
|
1052
|
-
def to_lightgbm(self) -> Any:
|
1053
|
-
raise exceptions.SnowflakeMLException(
|
1054
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1055
|
-
original_exception=AttributeError(
|
1056
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1057
|
-
"to_lightgbm()",
|
1058
|
-
"to_sklearn()"
|
1059
|
-
)
|
1060
|
-
),
|
1061
|
-
)
|
1062
|
-
|
1063
|
-
def _get_dependencies(self) -> List[str]:
|
1064
|
-
return self._deps
|