snowflake-ml-python 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +77 -32
- snowflake/ml/_internal/exceptions/dataset_error_messages.py +5 -0
- snowflake/ml/_internal/exceptions/dataset_errors.py +24 -0
- snowflake/ml/_internal/exceptions/error_codes.py +3 -0
- snowflake/ml/_internal/lineage/data_source.py +10 -0
- snowflake/ml/_internal/lineage/dataset_dataframe.py +44 -0
- snowflake/ml/_internal/utils/identifier.py +3 -1
- snowflake/ml/_internal/utils/sql_identifier.py +2 -6
- snowflake/ml/dataset/__init__.py +10 -0
- snowflake/ml/dataset/dataset.py +454 -129
- snowflake/ml/dataset/dataset_factory.py +53 -0
- snowflake/ml/dataset/dataset_metadata.py +103 -0
- snowflake/ml/dataset/dataset_reader.py +202 -0
- snowflake/ml/feature_store/feature_store.py +531 -332
- snowflake/ml/feature_store/feature_view.py +40 -23
- snowflake/ml/fileset/embedded_stage_fs.py +146 -0
- snowflake/ml/fileset/sfcfs.py +56 -54
- snowflake/ml/fileset/snowfs.py +159 -0
- snowflake/ml/fileset/stage_fs.py +49 -17
- snowflake/ml/model/__init__.py +2 -2
- snowflake/ml/model/_api.py +16 -1
- snowflake/ml/model/_client/model/model_impl.py +27 -0
- snowflake/ml/model/_client/model/model_version_impl.py +137 -50
- snowflake/ml/model/_client/ops/model_ops.py +159 -40
- snowflake/ml/model/_client/sql/model.py +25 -2
- snowflake/ml/model/_client/sql/model_version.py +131 -2
- snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +5 -1
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +1 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +0 -5
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +21 -50
- snowflake/ml/model/_model_composer/model_composer.py +22 -1
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +38 -51
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +19 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +6 -10
- snowflake/ml/model/_packager/model_env/model_env.py +41 -0
- snowflake/ml/model/_packager/model_handlers/catboost.py +206 -0
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +218 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +3 -0
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_meta.py +37 -11
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +20 -1
- snowflake/ml/model/_packager/model_meta_migrator/migrator_plans.py +3 -1
- snowflake/ml/model/_packager/model_packager.py +2 -5
- snowflake/ml/model/{_model_composer/model_runtime/_runtime_requirements.py → _packager/model_runtime/_snowml_inference_alternative_requirements.py} +1 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +137 -0
- snowflake/ml/model/type_hints.py +21 -2
- snowflake/ml/modeling/_internal/estimator_utils.py +16 -11
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -1
- snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +55 -3
- snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +34 -18
- snowflake/ml/modeling/_internal/model_trainer.py +7 -0
- snowflake/ml/modeling/_internal/model_trainer_builder.py +42 -9
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +13 -14
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +29 -7
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +261 -16
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +246 -175
- snowflake/ml/modeling/cluster/affinity_propagation.py +246 -175
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +246 -175
- snowflake/ml/modeling/cluster/birch.py +248 -175
- snowflake/ml/modeling/cluster/bisecting_k_means.py +248 -175
- snowflake/ml/modeling/cluster/dbscan.py +246 -175
- snowflake/ml/modeling/cluster/feature_agglomeration.py +248 -175
- snowflake/ml/modeling/cluster/k_means.py +248 -175
- snowflake/ml/modeling/cluster/mean_shift.py +246 -175
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +248 -175
- snowflake/ml/modeling/cluster/optics.py +246 -175
- snowflake/ml/modeling/cluster/spectral_biclustering.py +246 -175
- snowflake/ml/modeling/cluster/spectral_clustering.py +246 -175
- snowflake/ml/modeling/cluster/spectral_coclustering.py +246 -175
- snowflake/ml/modeling/compose/column_transformer.py +248 -175
- snowflake/ml/modeling/compose/transformed_target_regressor.py +246 -175
- snowflake/ml/modeling/covariance/elliptic_envelope.py +246 -175
- snowflake/ml/modeling/covariance/empirical_covariance.py +246 -175
- snowflake/ml/modeling/covariance/graphical_lasso.py +246 -175
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +246 -175
- snowflake/ml/modeling/covariance/ledoit_wolf.py +246 -175
- snowflake/ml/modeling/covariance/min_cov_det.py +246 -175
- snowflake/ml/modeling/covariance/oas.py +246 -175
- snowflake/ml/modeling/covariance/shrunk_covariance.py +246 -175
- snowflake/ml/modeling/decomposition/dictionary_learning.py +248 -175
- snowflake/ml/modeling/decomposition/factor_analysis.py +248 -175
- snowflake/ml/modeling/decomposition/fast_ica.py +248 -175
- snowflake/ml/modeling/decomposition/incremental_pca.py +248 -175
- snowflake/ml/modeling/decomposition/kernel_pca.py +248 -175
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +248 -175
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +248 -175
- snowflake/ml/modeling/decomposition/pca.py +248 -175
- snowflake/ml/modeling/decomposition/sparse_pca.py +248 -175
- snowflake/ml/modeling/decomposition/truncated_svd.py +248 -175
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +248 -175
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +246 -175
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/bagging_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/bagging_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/isolation_forest.py +246 -175
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/stacking_regressor.py +248 -175
- snowflake/ml/modeling/ensemble/voting_classifier.py +248 -175
- snowflake/ml/modeling/ensemble/voting_regressor.py +248 -175
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fdr.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fpr.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fwe.py +248 -175
- snowflake/ml/modeling/feature_selection/select_k_best.py +248 -175
- snowflake/ml/modeling/feature_selection/select_percentile.py +248 -175
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +248 -175
- snowflake/ml/modeling/feature_selection/variance_threshold.py +248 -175
- snowflake/ml/modeling/framework/_utils.py +8 -1
- snowflake/ml/modeling/framework/base.py +72 -37
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +246 -175
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +246 -175
- snowflake/ml/modeling/impute/iterative_imputer.py +248 -175
- snowflake/ml/modeling/impute/knn_imputer.py +248 -175
- snowflake/ml/modeling/impute/missing_indicator.py +248 -175
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +248 -175
- snowflake/ml/modeling/kernel_approximation/nystroem.py +248 -175
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +248 -175
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +248 -175
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +248 -175
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +246 -175
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +246 -175
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ard_regression.py +246 -175
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +246 -175
- snowflake/ml/modeling/linear_model/elastic_net.py +246 -175
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +246 -175
- snowflake/ml/modeling/linear_model/gamma_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/huber_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/lars.py +246 -175
- snowflake/ml/modeling/linear_model/lars_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +246 -175
- snowflake/ml/modeling/linear_model/linear_regression.py +246 -175
- snowflake/ml/modeling/linear_model/logistic_regression.py +246 -175
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +246 -175
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +246 -175
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/perceptron.py +246 -175
- snowflake/ml/modeling/linear_model/poisson_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ransac_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ridge.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_cv.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +246 -175
- snowflake/ml/modeling/manifold/isomap.py +248 -175
- snowflake/ml/modeling/manifold/mds.py +248 -175
- snowflake/ml/modeling/manifold/spectral_embedding.py +248 -175
- snowflake/ml/modeling/manifold/tsne.py +248 -175
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +246 -175
- snowflake/ml/modeling/mixture/gaussian_mixture.py +246 -175
- snowflake/ml/modeling/model_selection/grid_search_cv.py +63 -41
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +80 -38
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +246 -175
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +246 -175
- snowflake/ml/modeling/multiclass/output_code_classifier.py +246 -175
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/complement_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +246 -175
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +246 -175
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +246 -175
- snowflake/ml/modeling/neighbors/kernel_density.py +246 -175
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +246 -175
- snowflake/ml/modeling/neighbors/nearest_centroid.py +246 -175
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +246 -175
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +248 -175
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +246 -175
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +246 -175
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +248 -175
- snowflake/ml/modeling/neural_network/mlp_classifier.py +246 -175
- snowflake/ml/modeling/neural_network/mlp_regressor.py +246 -175
- snowflake/ml/modeling/pipeline/pipeline.py +517 -35
- snowflake/ml/modeling/preprocessing/binarizer.py +1 -5
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +1 -5
- snowflake/ml/modeling/preprocessing/label_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +10 -12
- snowflake/ml/modeling/preprocessing/normalizer.py +1 -5
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +13 -5
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/polynomial_features.py +248 -175
- snowflake/ml/modeling/preprocessing/robust_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/standard_scaler.py +11 -11
- snowflake/ml/modeling/semi_supervised/label_propagation.py +246 -175
- snowflake/ml/modeling/semi_supervised/label_spreading.py +246 -175
- snowflake/ml/modeling/svm/linear_svc.py +246 -175
- snowflake/ml/modeling/svm/linear_svr.py +246 -175
- snowflake/ml/modeling/svm/nu_svc.py +246 -175
- snowflake/ml/modeling/svm/nu_svr.py +246 -175
- snowflake/ml/modeling/svm/svc.py +246 -175
- snowflake/ml/modeling/svm/svr.py +246 -175
- snowflake/ml/modeling/tree/decision_tree_classifier.py +246 -175
- snowflake/ml/modeling/tree/decision_tree_regressor.py +246 -175
- snowflake/ml/modeling/tree/extra_tree_classifier.py +246 -175
- snowflake/ml/modeling/tree/extra_tree_regressor.py +246 -175
- snowflake/ml/modeling/xgboost/xgb_classifier.py +246 -175
- snowflake/ml/modeling/xgboost/xgb_regressor.py +246 -175
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +246 -175
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +246 -175
- snowflake/ml/registry/model_registry.py +3 -149
- snowflake/ml/registry/registry.py +1 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/METADATA +129 -57
- snowflake_ml_python-1.5.0.dist-info/RECORD +380 -0
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -97
- snowflake/ml/registry/_artifact_manager.py +0 -156
- snowflake/ml/registry/artifact.py +0 -46
- snowflake_ml_python-1.4.0.dist-info/RECORD +0 -370
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/top_level.txt +0 -0
@@ -33,6 +33,15 @@ from snowflake.ml.modeling._internal.transformer_protocols import (
|
|
33
33
|
BatchInferenceKwargsTypedDict,
|
34
34
|
ScoreKwargsTypedDict
|
35
35
|
)
|
36
|
+
from snowflake.ml.model._signatures import utils as model_signature_utils
|
37
|
+
from snowflake.ml.model.model_signature import (
|
38
|
+
BaseFeatureSpec,
|
39
|
+
DataType,
|
40
|
+
FeatureSpec,
|
41
|
+
ModelSignature,
|
42
|
+
_infer_signature,
|
43
|
+
_rename_signature_with_snowflake_identifiers,
|
44
|
+
)
|
36
45
|
|
37
46
|
from snowflake.ml.modeling._internal.model_transformer_builder import ModelTransformerBuilder
|
38
47
|
|
@@ -43,16 +52,6 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
43
52
|
validate_sklearn_args,
|
44
53
|
)
|
45
54
|
|
46
|
-
from snowflake.ml.model.model_signature import (
|
47
|
-
DataType,
|
48
|
-
FeatureSpec,
|
49
|
-
ModelSignature,
|
50
|
-
_infer_signature,
|
51
|
-
_rename_signature_with_snowflake_identifiers,
|
52
|
-
BaseFeatureSpec,
|
53
|
-
)
|
54
|
-
from snowflake.ml.model._signatures import utils as model_signature_utils
|
55
|
-
|
56
55
|
_PROJECT = "ModelDevelopment"
|
57
56
|
# Derive subproject from module name by removing "sklearn"
|
58
57
|
# and converting module name from underscore to CamelCase
|
@@ -61,12 +60,6 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.ensemble".replace("sklea
|
|
61
60
|
|
62
61
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
63
62
|
|
64
|
-
def _is_fit_transform_method_enabled() -> Callable[[Any], bool]:
|
65
|
-
def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
|
66
|
-
return False and callable(getattr(self._sklearn_object, "fit_transform", None))
|
67
|
-
return check
|
68
|
-
|
69
|
-
|
70
63
|
class GradientBoostingRegressor(BaseTransformer):
|
71
64
|
r"""Gradient Boosting for regression
|
72
65
|
For more details on this class, see [sklearn.ensemble.GradientBoostingRegressor]
|
@@ -400,12 +393,7 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
400
393
|
)
|
401
394
|
return selected_cols
|
402
395
|
|
403
|
-
|
404
|
-
project=_PROJECT,
|
405
|
-
subproject=_SUBPROJECT,
|
406
|
-
custom_tags=dict([("autogen", True)]),
|
407
|
-
)
|
408
|
-
def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "GradientBoostingRegressor":
|
396
|
+
def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "GradientBoostingRegressor":
|
409
397
|
"""Fit the gradient boosting model
|
410
398
|
For more details on this function, see [sklearn.ensemble.GradientBoostingRegressor.fit]
|
411
399
|
(https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingRegressor.html#sklearn.ensemble.GradientBoostingRegressor.fit)
|
@@ -432,12 +420,14 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
432
420
|
|
433
421
|
self._snowpark_cols = dataset.select(self.input_cols).columns
|
434
422
|
|
435
|
-
|
423
|
+
# If we are already in a stored procedure, no need to kick off another one.
|
436
424
|
if SNOWML_SPROC_ENV in os.environ:
|
437
425
|
statement_params = telemetry.get_function_usage_statement_params(
|
438
426
|
project=_PROJECT,
|
439
427
|
subproject=_SUBPROJECT,
|
440
|
-
function_name=telemetry.get_statement_params_full_func_name(
|
428
|
+
function_name=telemetry.get_statement_params_full_func_name(
|
429
|
+
inspect.currentframe(), GradientBoostingRegressor.__class__.__name__
|
430
|
+
),
|
441
431
|
api_calls=[Session.call],
|
442
432
|
custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
|
443
433
|
)
|
@@ -458,27 +448,24 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
458
448
|
)
|
459
449
|
self._sklearn_object = model_trainer.train()
|
460
450
|
self._is_fitted = True
|
461
|
-
self.
|
451
|
+
self._generate_model_signatures(dataset)
|
462
452
|
return self
|
463
453
|
|
464
454
|
def _batch_inference_validate_snowpark(
|
465
455
|
self,
|
466
456
|
dataset: DataFrame,
|
467
457
|
inference_method: str,
|
468
|
-
) ->
|
469
|
-
"""Util method to run validate that batch inference can be run on a snowpark dataframe
|
470
|
-
return the available package that exists in the snowflake anaconda channel
|
458
|
+
) -> None:
|
459
|
+
"""Util method to run validate that batch inference can be run on a snowpark dataframe.
|
471
460
|
|
472
461
|
Args:
|
473
462
|
dataset: snowpark dataframe
|
474
463
|
inference_method: the inference method such as predict, score...
|
475
|
-
|
464
|
+
|
476
465
|
Raises:
|
477
466
|
SnowflakeMLException: If the estimator is not fitted, raise error
|
478
467
|
SnowflakeMLException: If the session is None, raise error
|
479
468
|
|
480
|
-
Returns:
|
481
|
-
A list of available package that exists in the snowflake anaconda channel
|
482
469
|
"""
|
483
470
|
if not self._is_fitted:
|
484
471
|
raise exceptions.SnowflakeMLException(
|
@@ -496,9 +483,7 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
496
483
|
"Session must not specified for snowpark dataset."
|
497
484
|
),
|
498
485
|
)
|
499
|
-
|
500
|
-
return pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
|
501
|
-
pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT)
|
486
|
+
|
502
487
|
|
503
488
|
@available_if(original_estimator_has_callable("predict")) # type: ignore[misc]
|
504
489
|
@telemetry.send_api_usage_telemetry(
|
@@ -534,7 +519,9 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
534
519
|
# when it is classifier, infer the datatype from label columns
|
535
520
|
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
536
521
|
# Batch inference takes a single expected output column type. Use the first columns type for now.
|
537
|
-
label_cols_signatures = [
|
522
|
+
label_cols_signatures = [
|
523
|
+
row for row in self.model_signatures['predict'].outputs if row.name in self.output_cols
|
524
|
+
]
|
538
525
|
if len(label_cols_signatures) == 0:
|
539
526
|
error_str = f"Output columns {self.output_cols} do not match model signatures {self.model_signatures['predict'].outputs}."
|
540
527
|
raise exceptions.SnowflakeMLException(
|
@@ -542,25 +529,23 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
542
529
|
original_exception=ValueError(error_str),
|
543
530
|
)
|
544
531
|
|
545
|
-
expected_type_inferred = convert_sp_to_sf_type(
|
546
|
-
label_cols_signatures[0].as_snowpark_type()
|
547
|
-
)
|
532
|
+
expected_type_inferred = convert_sp_to_sf_type(label_cols_signatures[0].as_snowpark_type())
|
548
533
|
|
549
|
-
self.
|
550
|
-
|
534
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
535
|
+
self._deps = self._get_dependencies()
|
536
|
+
assert isinstance(
|
537
|
+
dataset._session, Session
|
538
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
551
539
|
|
552
540
|
transform_kwargs = dict(
|
553
|
-
session
|
554
|
-
dependencies
|
555
|
-
drop_input_cols
|
556
|
-
expected_output_cols_type
|
541
|
+
session=dataset._session,
|
542
|
+
dependencies=self._deps,
|
543
|
+
drop_input_cols=self._drop_input_cols,
|
544
|
+
expected_output_cols_type=expected_type_inferred,
|
557
545
|
)
|
558
546
|
|
559
547
|
elif isinstance(dataset, pd.DataFrame):
|
560
|
-
transform_kwargs = dict(
|
561
|
-
snowpark_input_cols = self._snowpark_cols,
|
562
|
-
drop_input_cols = self._drop_input_cols
|
563
|
-
)
|
548
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
564
549
|
|
565
550
|
transform_handlers = ModelTransformerBuilder.build(
|
566
551
|
dataset=dataset,
|
@@ -600,7 +585,7 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
600
585
|
Transformed dataset.
|
601
586
|
"""
|
602
587
|
super()._check_dataset_type(dataset)
|
603
|
-
inference_method="transform"
|
588
|
+
inference_method = "transform"
|
604
589
|
|
605
590
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
606
591
|
# are specific to the type of dataset used.
|
@@ -630,24 +615,19 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
630
615
|
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
631
616
|
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
632
617
|
|
633
|
-
self.
|
634
|
-
|
635
|
-
inference_method=inference_method,
|
636
|
-
)
|
618
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
619
|
+
self._deps = self._get_dependencies()
|
637
620
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
638
621
|
|
639
622
|
transform_kwargs = dict(
|
640
|
-
session
|
641
|
-
dependencies
|
642
|
-
drop_input_cols
|
643
|
-
expected_output_cols_type
|
623
|
+
session=dataset._session,
|
624
|
+
dependencies=self._deps,
|
625
|
+
drop_input_cols=self._drop_input_cols,
|
626
|
+
expected_output_cols_type=expected_dtype,
|
644
627
|
)
|
645
628
|
|
646
629
|
elif isinstance(dataset, pd.DataFrame):
|
647
|
-
transform_kwargs = dict(
|
648
|
-
snowpark_input_cols = self._snowpark_cols,
|
649
|
-
drop_input_cols = self._drop_input_cols
|
650
|
-
)
|
630
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
651
631
|
|
652
632
|
transform_handlers = ModelTransformerBuilder.build(
|
653
633
|
dataset=dataset,
|
@@ -666,7 +646,11 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
666
646
|
return output_df
|
667
647
|
|
668
648
|
@available_if(original_estimator_has_callable("fit_predict")) # type: ignore[misc]
|
669
|
-
def fit_predict(
|
649
|
+
def fit_predict(
|
650
|
+
self,
|
651
|
+
dataset: Union[DataFrame, pd.DataFrame],
|
652
|
+
output_cols_prefix: str = "fit_predict_",
|
653
|
+
) -> Union[DataFrame, pd.DataFrame]:
|
670
654
|
""" Method not supported for this class.
|
671
655
|
|
672
656
|
|
@@ -691,22 +675,104 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
691
675
|
)
|
692
676
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
693
677
|
drop_input_cols=self._drop_input_cols,
|
694
|
-
expected_output_cols_list=
|
678
|
+
expected_output_cols_list=(
|
679
|
+
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
680
|
+
),
|
695
681
|
)
|
696
682
|
self._sklearn_object = fitted_estimator
|
697
683
|
self._is_fitted = True
|
698
684
|
return output_result
|
699
685
|
|
686
|
+
|
687
|
+
@available_if(original_estimator_has_callable("fit_transform")) # type: ignore[misc]
|
688
|
+
def fit_transform(self, dataset: Union[DataFrame, pd.DataFrame], output_cols_prefix: str = "fit_transform_",) -> Union[DataFrame, pd.DataFrame]:
|
689
|
+
""" Method not supported for this class.
|
690
|
+
|
700
691
|
|
701
|
-
|
702
|
-
|
703
|
-
|
692
|
+
Raises:
|
693
|
+
TypeError: Supported dataset types: snowpark.DataFrame, pandas.DataFrame.
|
694
|
+
|
695
|
+
Args:
|
696
|
+
dataset: Union[snowflake.snowpark.DataFrame, pandas.DataFrame]
|
697
|
+
Snowpark or Pandas DataFrame.
|
698
|
+
output_cols_prefix: Prefix for the response columns
|
704
699
|
Returns:
|
705
700
|
Transformed dataset.
|
706
701
|
"""
|
707
|
-
self.
|
708
|
-
|
709
|
-
|
702
|
+
self._infer_input_output_cols(dataset)
|
703
|
+
super()._check_dataset_type(dataset)
|
704
|
+
model_trainer = ModelTrainerBuilder.build_fit_transform(
|
705
|
+
estimator=self._sklearn_object,
|
706
|
+
dataset=dataset,
|
707
|
+
input_cols=self.input_cols,
|
708
|
+
label_cols=self.label_cols,
|
709
|
+
sample_weight_col=self.sample_weight_col,
|
710
|
+
autogenerated=self._autogenerated,
|
711
|
+
subproject=_SUBPROJECT,
|
712
|
+
)
|
713
|
+
output_result, fitted_estimator = model_trainer.train_fit_transform(
|
714
|
+
drop_input_cols=self._drop_input_cols,
|
715
|
+
expected_output_cols_list=self.output_cols,
|
716
|
+
)
|
717
|
+
self._sklearn_object = fitted_estimator
|
718
|
+
self._is_fitted = True
|
719
|
+
return output_result
|
720
|
+
|
721
|
+
|
722
|
+
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
723
|
+
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
724
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
725
|
+
"""
|
726
|
+
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
727
|
+
# The following condition is introduced for kneighbors methods, and not used in other methods
|
728
|
+
if output_cols:
|
729
|
+
output_cols = [
|
730
|
+
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
731
|
+
for c in output_cols
|
732
|
+
]
|
733
|
+
elif getattr(self._sklearn_object, "classes_", None) is None:
|
734
|
+
output_cols = [output_cols_prefix]
|
735
|
+
elif self._sklearn_object is not None:
|
736
|
+
classes = self._sklearn_object.classes_
|
737
|
+
if isinstance(classes, numpy.ndarray):
|
738
|
+
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
739
|
+
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
740
|
+
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
741
|
+
output_cols = []
|
742
|
+
for i, cl in enumerate(classes):
|
743
|
+
# For binary classification, there is only one output column for each class
|
744
|
+
# ndarray as the two classes are complementary.
|
745
|
+
if len(cl) == 2:
|
746
|
+
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
747
|
+
else:
|
748
|
+
output_cols.extend([
|
749
|
+
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
750
|
+
])
|
751
|
+
else:
|
752
|
+
output_cols = []
|
753
|
+
|
754
|
+
# Make sure column names are valid snowflake identifiers.
|
755
|
+
assert output_cols is not None # Make MyPy happy
|
756
|
+
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
757
|
+
|
758
|
+
return rv
|
759
|
+
|
760
|
+
def _align_expected_output_names(
|
761
|
+
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
|
762
|
+
) -> List[str]:
|
763
|
+
# in case the inferred output column names dimension is different
|
764
|
+
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
765
|
+
output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
|
766
|
+
output_df_columns = list(output_df_pd.columns)
|
767
|
+
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
768
|
+
if self.sample_weight_col:
|
769
|
+
output_df_columns_set -= set(self.sample_weight_col)
|
770
|
+
# if the dimension of inferred output column names is correct; use it
|
771
|
+
if len(expected_output_cols_list) == len(output_df_columns_set):
|
772
|
+
return expected_output_cols_list
|
773
|
+
# otherwise, use the sklearn estimator's output
|
774
|
+
else:
|
775
|
+
return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
|
710
776
|
|
711
777
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
712
778
|
@telemetry.send_api_usage_telemetry(
|
@@ -738,24 +804,26 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
738
804
|
# are specific to the type of dataset used.
|
739
805
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
740
806
|
|
807
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
808
|
+
|
741
809
|
if isinstance(dataset, DataFrame):
|
742
|
-
self.
|
743
|
-
|
744
|
-
|
745
|
-
|
746
|
-
|
810
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
811
|
+
self._deps = self._get_dependencies()
|
812
|
+
assert isinstance(
|
813
|
+
dataset._session, Session
|
814
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
747
815
|
transform_kwargs = dict(
|
748
816
|
session=dataset._session,
|
749
817
|
dependencies=self._deps,
|
750
|
-
drop_input_cols
|
818
|
+
drop_input_cols=self._drop_input_cols,
|
751
819
|
expected_output_cols_type="float",
|
752
820
|
)
|
821
|
+
expected_output_cols = self._align_expected_output_names(
|
822
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
823
|
+
)
|
753
824
|
|
754
825
|
elif isinstance(dataset, pd.DataFrame):
|
755
|
-
transform_kwargs = dict(
|
756
|
-
snowpark_input_cols = self._snowpark_cols,
|
757
|
-
drop_input_cols = self._drop_input_cols
|
758
|
-
)
|
826
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
759
827
|
|
760
828
|
transform_handlers = ModelTransformerBuilder.build(
|
761
829
|
dataset=dataset,
|
@@ -767,7 +835,7 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
767
835
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
768
836
|
inference_method=inference_method,
|
769
837
|
input_cols=self.input_cols,
|
770
|
-
expected_output_cols=
|
838
|
+
expected_output_cols=expected_output_cols,
|
771
839
|
**transform_kwargs
|
772
840
|
)
|
773
841
|
return output_df
|
@@ -797,29 +865,30 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
797
865
|
Output dataset with log probability of the sample for each class in the model.
|
798
866
|
"""
|
799
867
|
super()._check_dataset_type(dataset)
|
800
|
-
inference_method="predict_log_proba"
|
868
|
+
inference_method = "predict_log_proba"
|
869
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
801
870
|
|
802
871
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
803
872
|
# are specific to the type of dataset used.
|
804
873
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
805
874
|
|
806
875
|
if isinstance(dataset, DataFrame):
|
807
|
-
self.
|
808
|
-
|
809
|
-
|
810
|
-
|
811
|
-
|
876
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
877
|
+
self._deps = self._get_dependencies()
|
878
|
+
assert isinstance(
|
879
|
+
dataset._session, Session
|
880
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
812
881
|
transform_kwargs = dict(
|
813
882
|
session=dataset._session,
|
814
883
|
dependencies=self._deps,
|
815
|
-
drop_input_cols
|
884
|
+
drop_input_cols=self._drop_input_cols,
|
816
885
|
expected_output_cols_type="float",
|
817
886
|
)
|
887
|
+
expected_output_cols = self._align_expected_output_names(
|
888
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
889
|
+
)
|
818
890
|
elif isinstance(dataset, pd.DataFrame):
|
819
|
-
transform_kwargs = dict(
|
820
|
-
snowpark_input_cols = self._snowpark_cols,
|
821
|
-
drop_input_cols = self._drop_input_cols
|
822
|
-
)
|
891
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
823
892
|
|
824
893
|
transform_handlers = ModelTransformerBuilder.build(
|
825
894
|
dataset=dataset,
|
@@ -832,7 +901,7 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
832
901
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
833
902
|
inference_method=inference_method,
|
834
903
|
input_cols=self.input_cols,
|
835
|
-
expected_output_cols=
|
904
|
+
expected_output_cols=expected_output_cols,
|
836
905
|
**transform_kwargs
|
837
906
|
)
|
838
907
|
return output_df
|
@@ -858,30 +927,32 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
858
927
|
Output dataset with results of the decision function for the samples in input dataset.
|
859
928
|
"""
|
860
929
|
super()._check_dataset_type(dataset)
|
861
|
-
inference_method="decision_function"
|
930
|
+
inference_method = "decision_function"
|
862
931
|
|
863
932
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
864
933
|
# are specific to the type of dataset used.
|
865
934
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
866
935
|
|
936
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
937
|
+
|
867
938
|
if isinstance(dataset, DataFrame):
|
868
|
-
self.
|
869
|
-
|
870
|
-
|
871
|
-
|
872
|
-
|
939
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
940
|
+
self._deps = self._get_dependencies()
|
941
|
+
assert isinstance(
|
942
|
+
dataset._session, Session
|
943
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
873
944
|
transform_kwargs = dict(
|
874
945
|
session=dataset._session,
|
875
946
|
dependencies=self._deps,
|
876
|
-
drop_input_cols
|
947
|
+
drop_input_cols=self._drop_input_cols,
|
877
948
|
expected_output_cols_type="float",
|
878
949
|
)
|
950
|
+
expected_output_cols = self._align_expected_output_names(
|
951
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
952
|
+
)
|
879
953
|
|
880
954
|
elif isinstance(dataset, pd.DataFrame):
|
881
|
-
transform_kwargs = dict(
|
882
|
-
snowpark_input_cols = self._snowpark_cols,
|
883
|
-
drop_input_cols = self._drop_input_cols
|
884
|
-
)
|
955
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
885
956
|
|
886
957
|
transform_handlers = ModelTransformerBuilder.build(
|
887
958
|
dataset=dataset,
|
@@ -894,7 +965,7 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
894
965
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
895
966
|
inference_method=inference_method,
|
896
967
|
input_cols=self.input_cols,
|
897
|
-
expected_output_cols=
|
968
|
+
expected_output_cols=expected_output_cols,
|
898
969
|
**transform_kwargs
|
899
970
|
)
|
900
971
|
return output_df
|
@@ -923,17 +994,17 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
923
994
|
Output dataset with probability of the sample for each class in the model.
|
924
995
|
"""
|
925
996
|
super()._check_dataset_type(dataset)
|
926
|
-
inference_method="score_samples"
|
997
|
+
inference_method = "score_samples"
|
927
998
|
|
928
999
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
929
1000
|
# are specific to the type of dataset used.
|
930
1001
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
931
1002
|
|
1003
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
1004
|
+
|
932
1005
|
if isinstance(dataset, DataFrame):
|
933
|
-
self.
|
934
|
-
|
935
|
-
inference_method=inference_method,
|
936
|
-
)
|
1006
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
1007
|
+
self._deps = self._get_dependencies()
|
937
1008
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
938
1009
|
transform_kwargs = dict(
|
939
1010
|
session=dataset._session,
|
@@ -941,6 +1012,9 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
941
1012
|
drop_input_cols = self._drop_input_cols,
|
942
1013
|
expected_output_cols_type="float",
|
943
1014
|
)
|
1015
|
+
expected_output_cols = self._align_expected_output_names(
|
1016
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
1017
|
+
)
|
944
1018
|
|
945
1019
|
elif isinstance(dataset, pd.DataFrame):
|
946
1020
|
transform_kwargs = dict(
|
@@ -959,7 +1033,7 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
959
1033
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
960
1034
|
inference_method=inference_method,
|
961
1035
|
input_cols=self.input_cols,
|
962
|
-
expected_output_cols=
|
1036
|
+
expected_output_cols=expected_output_cols,
|
963
1037
|
**transform_kwargs
|
964
1038
|
)
|
965
1039
|
return output_df
|
@@ -994,17 +1068,15 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
994
1068
|
transform_kwargs: ScoreKwargsTypedDict = dict()
|
995
1069
|
|
996
1070
|
if isinstance(dataset, DataFrame):
|
997
|
-
self.
|
998
|
-
|
999
|
-
inference_method="score",
|
1000
|
-
)
|
1071
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method="score")
|
1072
|
+
self._deps = self._get_dependencies()
|
1001
1073
|
selected_cols = self._get_active_columns()
|
1002
1074
|
if len(selected_cols) > 0:
|
1003
1075
|
dataset = dataset.select(selected_cols)
|
1004
1076
|
assert isinstance(dataset._session, Session) # keep mypy happy
|
1005
1077
|
transform_kwargs = dict(
|
1006
1078
|
session=dataset._session,
|
1007
|
-
dependencies=
|
1079
|
+
dependencies=self._deps,
|
1008
1080
|
score_sproc_imports=['sklearn'],
|
1009
1081
|
)
|
1010
1082
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -1069,11 +1141,8 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
1069
1141
|
|
1070
1142
|
if isinstance(dataset, DataFrame):
|
1071
1143
|
|
1072
|
-
self.
|
1073
|
-
|
1074
|
-
inference_method=inference_method,
|
1075
|
-
|
1076
|
-
)
|
1144
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
1145
|
+
self._deps = self._get_dependencies()
|
1077
1146
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
1078
1147
|
transform_kwargs = dict(
|
1079
1148
|
session = dataset._session,
|
@@ -1106,50 +1175,84 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
1106
1175
|
)
|
1107
1176
|
return output_df
|
1108
1177
|
|
1178
|
+
|
1179
|
+
|
1180
|
+
def to_sklearn(self) -> Any:
|
1181
|
+
"""Get sklearn.ensemble.GradientBoostingRegressor object.
|
1182
|
+
"""
|
1183
|
+
if self._sklearn_object is None:
|
1184
|
+
self._sklearn_object = self._create_sklearn_object()
|
1185
|
+
return self._sklearn_object
|
1186
|
+
|
1187
|
+
def to_xgboost(self) -> Any:
|
1188
|
+
raise exceptions.SnowflakeMLException(
|
1189
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1190
|
+
original_exception=AttributeError(
|
1191
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1192
|
+
"to_xgboost()",
|
1193
|
+
"to_sklearn()"
|
1194
|
+
)
|
1195
|
+
),
|
1196
|
+
)
|
1197
|
+
|
1198
|
+
def to_lightgbm(self) -> Any:
|
1199
|
+
raise exceptions.SnowflakeMLException(
|
1200
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1201
|
+
original_exception=AttributeError(
|
1202
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1203
|
+
"to_lightgbm()",
|
1204
|
+
"to_sklearn()"
|
1205
|
+
)
|
1206
|
+
),
|
1207
|
+
)
|
1208
|
+
|
1209
|
+
def _get_dependencies(self) -> List[str]:
|
1210
|
+
return self._deps
|
1211
|
+
|
1109
1212
|
|
1110
|
-
def
|
1213
|
+
def _generate_model_signatures(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
1111
1214
|
self._model_signature_dict = dict()
|
1112
1215
|
|
1113
1216
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1114
1217
|
|
1115
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input"))
|
1218
|
+
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1116
1219
|
outputs: List[BaseFeatureSpec] = []
|
1117
1220
|
if hasattr(self, "predict"):
|
1118
1221
|
# keep mypy happy
|
1119
|
-
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
1222
|
+
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
1120
1223
|
# For classifier, the type of predict is the same as the type of label
|
1121
|
-
if self._sklearn_object._estimator_type ==
|
1122
|
-
|
1224
|
+
if self._sklearn_object._estimator_type == "classifier":
|
1225
|
+
# label columns is the desired type for output
|
1123
1226
|
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1124
1227
|
# rename the output columns
|
1125
1228
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1126
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1127
|
-
|
1128
|
-
|
1229
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1230
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1231
|
+
)
|
1129
1232
|
# For mixture models that use the density mixin, `predict` returns the argmax of the log prob.
|
1130
1233
|
# For outlier models, returns -1 for outliers and 1 for inliers.
|
1131
|
-
# Clusterer returns int64 cluster labels.
|
1234
|
+
# Clusterer returns int64 cluster labels.
|
1132
1235
|
elif self._sklearn_object._estimator_type in ["DensityEstimator", "clusterer", "outlier_detector"]:
|
1133
1236
|
outputs = [FeatureSpec(dtype=DataType.INT64, name=c) for c in self.output_cols]
|
1134
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1135
|
-
|
1136
|
-
|
1137
|
-
|
1237
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1238
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1239
|
+
)
|
1240
|
+
|
1138
1241
|
# For regressor, the type of predict is float64
|
1139
|
-
elif self._sklearn_object._estimator_type ==
|
1242
|
+
elif self._sklearn_object._estimator_type == "regressor":
|
1140
1243
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1141
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1142
|
-
|
1143
|
-
|
1144
|
-
|
1244
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1245
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1246
|
+
)
|
1247
|
+
|
1145
1248
|
for prob_func in PROB_FUNCTIONS:
|
1146
1249
|
if hasattr(self, prob_func):
|
1147
1250
|
output_cols_prefix: str = f"{prob_func}_"
|
1148
1251
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1149
1252
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1150
|
-
self._model_signature_dict[prob_func] = ModelSignature(
|
1151
|
-
|
1152
|
-
|
1253
|
+
self._model_signature_dict[prob_func] = ModelSignature(
|
1254
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1255
|
+
)
|
1153
1256
|
|
1154
1257
|
# Output signature names may still need to be renamed, since they were not created with `_infer_signature`.
|
1155
1258
|
items = list(self._model_signature_dict.items())
|
@@ -1162,10 +1265,10 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
1162
1265
|
"""Returns model signature of current class.
|
1163
1266
|
|
1164
1267
|
Raises:
|
1165
|
-
|
1268
|
+
SnowflakeMLException: If estimator is not fitted, then model signature cannot be inferred
|
1166
1269
|
|
1167
1270
|
Returns:
|
1168
|
-
Dict
|
1271
|
+
Dict with each method and its input output signature
|
1169
1272
|
"""
|
1170
1273
|
if self._model_signature_dict is None:
|
1171
1274
|
raise exceptions.SnowflakeMLException(
|
@@ -1173,35 +1276,3 @@ class GradientBoostingRegressor(BaseTransformer):
|
|
1173
1276
|
original_exception=RuntimeError("Estimator not fitted before accessing property model_signatures!"),
|
1174
1277
|
)
|
1175
1278
|
return self._model_signature_dict
|
1176
|
-
|
1177
|
-
def to_sklearn(self) -> Any:
|
1178
|
-
"""Get sklearn.ensemble.GradientBoostingRegressor object.
|
1179
|
-
"""
|
1180
|
-
if self._sklearn_object is None:
|
1181
|
-
self._sklearn_object = self._create_sklearn_object()
|
1182
|
-
return self._sklearn_object
|
1183
|
-
|
1184
|
-
def to_xgboost(self) -> Any:
|
1185
|
-
raise exceptions.SnowflakeMLException(
|
1186
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1187
|
-
original_exception=AttributeError(
|
1188
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1189
|
-
"to_xgboost()",
|
1190
|
-
"to_sklearn()"
|
1191
|
-
)
|
1192
|
-
),
|
1193
|
-
)
|
1194
|
-
|
1195
|
-
def to_lightgbm(self) -> Any:
|
1196
|
-
raise exceptions.SnowflakeMLException(
|
1197
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1198
|
-
original_exception=AttributeError(
|
1199
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1200
|
-
"to_lightgbm()",
|
1201
|
-
"to_sklearn()"
|
1202
|
-
)
|
1203
|
-
),
|
1204
|
-
)
|
1205
|
-
|
1206
|
-
def _get_dependencies(self) -> List[str]:
|
1207
|
-
return self._deps
|