snowflake-ml-python 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +77 -32
- snowflake/ml/_internal/exceptions/dataset_error_messages.py +5 -0
- snowflake/ml/_internal/exceptions/dataset_errors.py +24 -0
- snowflake/ml/_internal/exceptions/error_codes.py +3 -0
- snowflake/ml/_internal/lineage/data_source.py +10 -0
- snowflake/ml/_internal/lineage/dataset_dataframe.py +44 -0
- snowflake/ml/_internal/utils/identifier.py +3 -1
- snowflake/ml/_internal/utils/sql_identifier.py +2 -6
- snowflake/ml/dataset/__init__.py +10 -0
- snowflake/ml/dataset/dataset.py +454 -129
- snowflake/ml/dataset/dataset_factory.py +53 -0
- snowflake/ml/dataset/dataset_metadata.py +103 -0
- snowflake/ml/dataset/dataset_reader.py +202 -0
- snowflake/ml/feature_store/feature_store.py +531 -332
- snowflake/ml/feature_store/feature_view.py +40 -23
- snowflake/ml/fileset/embedded_stage_fs.py +146 -0
- snowflake/ml/fileset/sfcfs.py +56 -54
- snowflake/ml/fileset/snowfs.py +159 -0
- snowflake/ml/fileset/stage_fs.py +49 -17
- snowflake/ml/model/__init__.py +2 -2
- snowflake/ml/model/_api.py +16 -1
- snowflake/ml/model/_client/model/model_impl.py +27 -0
- snowflake/ml/model/_client/model/model_version_impl.py +137 -50
- snowflake/ml/model/_client/ops/model_ops.py +159 -40
- snowflake/ml/model/_client/sql/model.py +25 -2
- snowflake/ml/model/_client/sql/model_version.py +131 -2
- snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +5 -1
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +1 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +0 -5
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +21 -50
- snowflake/ml/model/_model_composer/model_composer.py +22 -1
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +38 -51
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +19 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +6 -10
- snowflake/ml/model/_packager/model_env/model_env.py +41 -0
- snowflake/ml/model/_packager/model_handlers/catboost.py +206 -0
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +218 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +3 -0
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_meta.py +37 -11
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +20 -1
- snowflake/ml/model/_packager/model_meta_migrator/migrator_plans.py +3 -1
- snowflake/ml/model/_packager/model_packager.py +2 -5
- snowflake/ml/model/{_model_composer/model_runtime/_runtime_requirements.py → _packager/model_runtime/_snowml_inference_alternative_requirements.py} +1 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +137 -0
- snowflake/ml/model/type_hints.py +21 -2
- snowflake/ml/modeling/_internal/estimator_utils.py +16 -11
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -1
- snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +55 -3
- snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +34 -18
- snowflake/ml/modeling/_internal/model_trainer.py +7 -0
- snowflake/ml/modeling/_internal/model_trainer_builder.py +42 -9
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +13 -14
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +29 -7
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +261 -16
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +246 -175
- snowflake/ml/modeling/cluster/affinity_propagation.py +246 -175
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +246 -175
- snowflake/ml/modeling/cluster/birch.py +248 -175
- snowflake/ml/modeling/cluster/bisecting_k_means.py +248 -175
- snowflake/ml/modeling/cluster/dbscan.py +246 -175
- snowflake/ml/modeling/cluster/feature_agglomeration.py +248 -175
- snowflake/ml/modeling/cluster/k_means.py +248 -175
- snowflake/ml/modeling/cluster/mean_shift.py +246 -175
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +248 -175
- snowflake/ml/modeling/cluster/optics.py +246 -175
- snowflake/ml/modeling/cluster/spectral_biclustering.py +246 -175
- snowflake/ml/modeling/cluster/spectral_clustering.py +246 -175
- snowflake/ml/modeling/cluster/spectral_coclustering.py +246 -175
- snowflake/ml/modeling/compose/column_transformer.py +248 -175
- snowflake/ml/modeling/compose/transformed_target_regressor.py +246 -175
- snowflake/ml/modeling/covariance/elliptic_envelope.py +246 -175
- snowflake/ml/modeling/covariance/empirical_covariance.py +246 -175
- snowflake/ml/modeling/covariance/graphical_lasso.py +246 -175
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +246 -175
- snowflake/ml/modeling/covariance/ledoit_wolf.py +246 -175
- snowflake/ml/modeling/covariance/min_cov_det.py +246 -175
- snowflake/ml/modeling/covariance/oas.py +246 -175
- snowflake/ml/modeling/covariance/shrunk_covariance.py +246 -175
- snowflake/ml/modeling/decomposition/dictionary_learning.py +248 -175
- snowflake/ml/modeling/decomposition/factor_analysis.py +248 -175
- snowflake/ml/modeling/decomposition/fast_ica.py +248 -175
- snowflake/ml/modeling/decomposition/incremental_pca.py +248 -175
- snowflake/ml/modeling/decomposition/kernel_pca.py +248 -175
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +248 -175
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +248 -175
- snowflake/ml/modeling/decomposition/pca.py +248 -175
- snowflake/ml/modeling/decomposition/sparse_pca.py +248 -175
- snowflake/ml/modeling/decomposition/truncated_svd.py +248 -175
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +248 -175
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +246 -175
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/bagging_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/bagging_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/isolation_forest.py +246 -175
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/stacking_regressor.py +248 -175
- snowflake/ml/modeling/ensemble/voting_classifier.py +248 -175
- snowflake/ml/modeling/ensemble/voting_regressor.py +248 -175
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fdr.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fpr.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fwe.py +248 -175
- snowflake/ml/modeling/feature_selection/select_k_best.py +248 -175
- snowflake/ml/modeling/feature_selection/select_percentile.py +248 -175
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +248 -175
- snowflake/ml/modeling/feature_selection/variance_threshold.py +248 -175
- snowflake/ml/modeling/framework/_utils.py +8 -1
- snowflake/ml/modeling/framework/base.py +72 -37
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +246 -175
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +246 -175
- snowflake/ml/modeling/impute/iterative_imputer.py +248 -175
- snowflake/ml/modeling/impute/knn_imputer.py +248 -175
- snowflake/ml/modeling/impute/missing_indicator.py +248 -175
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +248 -175
- snowflake/ml/modeling/kernel_approximation/nystroem.py +248 -175
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +248 -175
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +248 -175
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +248 -175
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +246 -175
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +246 -175
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ard_regression.py +246 -175
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +246 -175
- snowflake/ml/modeling/linear_model/elastic_net.py +246 -175
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +246 -175
- snowflake/ml/modeling/linear_model/gamma_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/huber_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/lars.py +246 -175
- snowflake/ml/modeling/linear_model/lars_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +246 -175
- snowflake/ml/modeling/linear_model/linear_regression.py +246 -175
- snowflake/ml/modeling/linear_model/logistic_regression.py +246 -175
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +246 -175
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +246 -175
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/perceptron.py +246 -175
- snowflake/ml/modeling/linear_model/poisson_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ransac_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ridge.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_cv.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +246 -175
- snowflake/ml/modeling/manifold/isomap.py +248 -175
- snowflake/ml/modeling/manifold/mds.py +248 -175
- snowflake/ml/modeling/manifold/spectral_embedding.py +248 -175
- snowflake/ml/modeling/manifold/tsne.py +248 -175
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +246 -175
- snowflake/ml/modeling/mixture/gaussian_mixture.py +246 -175
- snowflake/ml/modeling/model_selection/grid_search_cv.py +63 -41
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +80 -38
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +246 -175
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +246 -175
- snowflake/ml/modeling/multiclass/output_code_classifier.py +246 -175
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/complement_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +246 -175
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +246 -175
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +246 -175
- snowflake/ml/modeling/neighbors/kernel_density.py +246 -175
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +246 -175
- snowflake/ml/modeling/neighbors/nearest_centroid.py +246 -175
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +246 -175
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +248 -175
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +246 -175
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +246 -175
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +248 -175
- snowflake/ml/modeling/neural_network/mlp_classifier.py +246 -175
- snowflake/ml/modeling/neural_network/mlp_regressor.py +246 -175
- snowflake/ml/modeling/pipeline/pipeline.py +517 -35
- snowflake/ml/modeling/preprocessing/binarizer.py +1 -5
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +1 -5
- snowflake/ml/modeling/preprocessing/label_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +10 -12
- snowflake/ml/modeling/preprocessing/normalizer.py +1 -5
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +13 -5
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/polynomial_features.py +248 -175
- snowflake/ml/modeling/preprocessing/robust_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/standard_scaler.py +11 -11
- snowflake/ml/modeling/semi_supervised/label_propagation.py +246 -175
- snowflake/ml/modeling/semi_supervised/label_spreading.py +246 -175
- snowflake/ml/modeling/svm/linear_svc.py +246 -175
- snowflake/ml/modeling/svm/linear_svr.py +246 -175
- snowflake/ml/modeling/svm/nu_svc.py +246 -175
- snowflake/ml/modeling/svm/nu_svr.py +246 -175
- snowflake/ml/modeling/svm/svc.py +246 -175
- snowflake/ml/modeling/svm/svr.py +246 -175
- snowflake/ml/modeling/tree/decision_tree_classifier.py +246 -175
- snowflake/ml/modeling/tree/decision_tree_regressor.py +246 -175
- snowflake/ml/modeling/tree/extra_tree_classifier.py +246 -175
- snowflake/ml/modeling/tree/extra_tree_regressor.py +246 -175
- snowflake/ml/modeling/xgboost/xgb_classifier.py +246 -175
- snowflake/ml/modeling/xgboost/xgb_regressor.py +246 -175
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +246 -175
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +246 -175
- snowflake/ml/registry/model_registry.py +3 -149
- snowflake/ml/registry/registry.py +1 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/METADATA +129 -57
- snowflake_ml_python-1.5.0.dist-info/RECORD +380 -0
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -97
- snowflake/ml/registry/_artifact_manager.py +0 -156
- snowflake/ml/registry/artifact.py +0 -46
- snowflake_ml_python-1.4.0.dist-info/RECORD +0 -370
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/top_level.txt +0 -0
@@ -32,6 +32,15 @@ from snowflake.ml.modeling._internal.transformer_protocols import (
|
|
32
32
|
BatchInferenceKwargsTypedDict,
|
33
33
|
ScoreKwargsTypedDict
|
34
34
|
)
|
35
|
+
from snowflake.ml.model._signatures import utils as model_signature_utils
|
36
|
+
from snowflake.ml.model.model_signature import (
|
37
|
+
BaseFeatureSpec,
|
38
|
+
DataType,
|
39
|
+
FeatureSpec,
|
40
|
+
ModelSignature,
|
41
|
+
_infer_signature,
|
42
|
+
_rename_signature_with_snowflake_identifiers,
|
43
|
+
)
|
35
44
|
|
36
45
|
from snowflake.ml.modeling._internal.model_transformer_builder import ModelTransformerBuilder
|
37
46
|
|
@@ -42,16 +51,6 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
42
51
|
validate_sklearn_args,
|
43
52
|
)
|
44
53
|
|
45
|
-
from snowflake.ml.model.model_signature import (
|
46
|
-
DataType,
|
47
|
-
FeatureSpec,
|
48
|
-
ModelSignature,
|
49
|
-
_infer_signature,
|
50
|
-
_rename_signature_with_snowflake_identifiers,
|
51
|
-
BaseFeatureSpec,
|
52
|
-
)
|
53
|
-
from snowflake.ml.model._signatures import utils as model_signature_utils
|
54
|
-
|
55
54
|
_PROJECT = "ModelDevelopment"
|
56
55
|
# Derive subproject from module name by removing "sklearn"
|
57
56
|
# and converting module name from underscore to CamelCase
|
@@ -60,12 +59,6 @@ _SUBPROJECT = "".join([s.capitalize() for s in "xgboost".replace("sklearn.", "")
|
|
60
59
|
|
61
60
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
62
61
|
|
63
|
-
def _is_fit_transform_method_enabled() -> Callable[[Any], bool]:
|
64
|
-
def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
|
65
|
-
return False and callable(getattr(self._sklearn_object, "fit_transform", None))
|
66
|
-
return check
|
67
|
-
|
68
|
-
|
69
62
|
class XGBRFClassifier(BaseTransformer):
|
70
63
|
r"""scikit-learn API for XGBoost random forest classification
|
71
64
|
For more details on this class, see [xgboost.XGBRFClassifier]
|
@@ -426,12 +419,7 @@ class XGBRFClassifier(BaseTransformer):
|
|
426
419
|
)
|
427
420
|
return selected_cols
|
428
421
|
|
429
|
-
|
430
|
-
project=_PROJECT,
|
431
|
-
subproject=_SUBPROJECT,
|
432
|
-
custom_tags=dict([("autogen", True)]),
|
433
|
-
)
|
434
|
-
def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "XGBRFClassifier":
|
422
|
+
def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "XGBRFClassifier":
|
435
423
|
"""Fit gradient boosting classifier
|
436
424
|
For more details on this function, see [xgboost.XGBRFClassifier.fit]
|
437
425
|
(https://xgboost.readthedocs.io/en/stable/python/python_api.html#xgboost.XGBRFClassifier.fit)
|
@@ -458,12 +446,14 @@ class XGBRFClassifier(BaseTransformer):
|
|
458
446
|
|
459
447
|
self._snowpark_cols = dataset.select(self.input_cols).columns
|
460
448
|
|
461
|
-
|
449
|
+
# If we are already in a stored procedure, no need to kick off another one.
|
462
450
|
if SNOWML_SPROC_ENV in os.environ:
|
463
451
|
statement_params = telemetry.get_function_usage_statement_params(
|
464
452
|
project=_PROJECT,
|
465
453
|
subproject=_SUBPROJECT,
|
466
|
-
function_name=telemetry.get_statement_params_full_func_name(
|
454
|
+
function_name=telemetry.get_statement_params_full_func_name(
|
455
|
+
inspect.currentframe(), XGBRFClassifier.__class__.__name__
|
456
|
+
),
|
467
457
|
api_calls=[Session.call],
|
468
458
|
custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
|
469
459
|
)
|
@@ -484,27 +474,24 @@ class XGBRFClassifier(BaseTransformer):
|
|
484
474
|
)
|
485
475
|
self._sklearn_object = model_trainer.train()
|
486
476
|
self._is_fitted = True
|
487
|
-
self.
|
477
|
+
self._generate_model_signatures(dataset)
|
488
478
|
return self
|
489
479
|
|
490
480
|
def _batch_inference_validate_snowpark(
|
491
481
|
self,
|
492
482
|
dataset: DataFrame,
|
493
483
|
inference_method: str,
|
494
|
-
) ->
|
495
|
-
"""Util method to run validate that batch inference can be run on a snowpark dataframe
|
496
|
-
return the available package that exists in the snowflake anaconda channel
|
484
|
+
) -> None:
|
485
|
+
"""Util method to run validate that batch inference can be run on a snowpark dataframe.
|
497
486
|
|
498
487
|
Args:
|
499
488
|
dataset: snowpark dataframe
|
500
489
|
inference_method: the inference method such as predict, score...
|
501
|
-
|
490
|
+
|
502
491
|
Raises:
|
503
492
|
SnowflakeMLException: If the estimator is not fitted, raise error
|
504
493
|
SnowflakeMLException: If the session is None, raise error
|
505
494
|
|
506
|
-
Returns:
|
507
|
-
A list of available package that exists in the snowflake anaconda channel
|
508
495
|
"""
|
509
496
|
if not self._is_fitted:
|
510
497
|
raise exceptions.SnowflakeMLException(
|
@@ -522,9 +509,7 @@ class XGBRFClassifier(BaseTransformer):
|
|
522
509
|
"Session must not specified for snowpark dataset."
|
523
510
|
),
|
524
511
|
)
|
525
|
-
|
526
|
-
return pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
|
527
|
-
pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT)
|
512
|
+
|
528
513
|
|
529
514
|
@available_if(original_estimator_has_callable("predict")) # type: ignore[misc]
|
530
515
|
@telemetry.send_api_usage_telemetry(
|
@@ -560,7 +545,9 @@ class XGBRFClassifier(BaseTransformer):
|
|
560
545
|
# when it is classifier, infer the datatype from label columns
|
561
546
|
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
562
547
|
# Batch inference takes a single expected output column type. Use the first columns type for now.
|
563
|
-
label_cols_signatures = [
|
548
|
+
label_cols_signatures = [
|
549
|
+
row for row in self.model_signatures['predict'].outputs if row.name in self.output_cols
|
550
|
+
]
|
564
551
|
if len(label_cols_signatures) == 0:
|
565
552
|
error_str = f"Output columns {self.output_cols} do not match model signatures {self.model_signatures['predict'].outputs}."
|
566
553
|
raise exceptions.SnowflakeMLException(
|
@@ -568,25 +555,23 @@ class XGBRFClassifier(BaseTransformer):
|
|
568
555
|
original_exception=ValueError(error_str),
|
569
556
|
)
|
570
557
|
|
571
|
-
expected_type_inferred = convert_sp_to_sf_type(
|
572
|
-
label_cols_signatures[0].as_snowpark_type()
|
573
|
-
)
|
558
|
+
expected_type_inferred = convert_sp_to_sf_type(label_cols_signatures[0].as_snowpark_type())
|
574
559
|
|
575
|
-
self.
|
576
|
-
|
560
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
561
|
+
self._deps = self._get_dependencies()
|
562
|
+
assert isinstance(
|
563
|
+
dataset._session, Session
|
564
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
577
565
|
|
578
566
|
transform_kwargs = dict(
|
579
|
-
session
|
580
|
-
dependencies
|
581
|
-
drop_input_cols
|
582
|
-
expected_output_cols_type
|
567
|
+
session=dataset._session,
|
568
|
+
dependencies=self._deps,
|
569
|
+
drop_input_cols=self._drop_input_cols,
|
570
|
+
expected_output_cols_type=expected_type_inferred,
|
583
571
|
)
|
584
572
|
|
585
573
|
elif isinstance(dataset, pd.DataFrame):
|
586
|
-
transform_kwargs = dict(
|
587
|
-
snowpark_input_cols = self._snowpark_cols,
|
588
|
-
drop_input_cols = self._drop_input_cols
|
589
|
-
)
|
574
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
590
575
|
|
591
576
|
transform_handlers = ModelTransformerBuilder.build(
|
592
577
|
dataset=dataset,
|
@@ -626,7 +611,7 @@ class XGBRFClassifier(BaseTransformer):
|
|
626
611
|
Transformed dataset.
|
627
612
|
"""
|
628
613
|
super()._check_dataset_type(dataset)
|
629
|
-
inference_method="transform"
|
614
|
+
inference_method = "transform"
|
630
615
|
|
631
616
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
632
617
|
# are specific to the type of dataset used.
|
@@ -656,24 +641,19 @@ class XGBRFClassifier(BaseTransformer):
|
|
656
641
|
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
657
642
|
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
658
643
|
|
659
|
-
self.
|
660
|
-
|
661
|
-
inference_method=inference_method,
|
662
|
-
)
|
644
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
645
|
+
self._deps = self._get_dependencies()
|
663
646
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
664
647
|
|
665
648
|
transform_kwargs = dict(
|
666
|
-
session
|
667
|
-
dependencies
|
668
|
-
drop_input_cols
|
669
|
-
expected_output_cols_type
|
649
|
+
session=dataset._session,
|
650
|
+
dependencies=self._deps,
|
651
|
+
drop_input_cols=self._drop_input_cols,
|
652
|
+
expected_output_cols_type=expected_dtype,
|
670
653
|
)
|
671
654
|
|
672
655
|
elif isinstance(dataset, pd.DataFrame):
|
673
|
-
transform_kwargs = dict(
|
674
|
-
snowpark_input_cols = self._snowpark_cols,
|
675
|
-
drop_input_cols = self._drop_input_cols
|
676
|
-
)
|
656
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
677
657
|
|
678
658
|
transform_handlers = ModelTransformerBuilder.build(
|
679
659
|
dataset=dataset,
|
@@ -692,7 +672,11 @@ class XGBRFClassifier(BaseTransformer):
|
|
692
672
|
return output_df
|
693
673
|
|
694
674
|
@available_if(original_estimator_has_callable("fit_predict")) # type: ignore[misc]
|
695
|
-
def fit_predict(
|
675
|
+
def fit_predict(
|
676
|
+
self,
|
677
|
+
dataset: Union[DataFrame, pd.DataFrame],
|
678
|
+
output_cols_prefix: str = "fit_predict_",
|
679
|
+
) -> Union[DataFrame, pd.DataFrame]:
|
696
680
|
""" Method not supported for this class.
|
697
681
|
|
698
682
|
|
@@ -717,22 +701,104 @@ class XGBRFClassifier(BaseTransformer):
|
|
717
701
|
)
|
718
702
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
719
703
|
drop_input_cols=self._drop_input_cols,
|
720
|
-
expected_output_cols_list=
|
704
|
+
expected_output_cols_list=(
|
705
|
+
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
706
|
+
),
|
721
707
|
)
|
722
708
|
self._sklearn_object = fitted_estimator
|
723
709
|
self._is_fitted = True
|
724
710
|
return output_result
|
725
711
|
|
712
|
+
|
713
|
+
@available_if(original_estimator_has_callable("fit_transform")) # type: ignore[misc]
|
714
|
+
def fit_transform(self, dataset: Union[DataFrame, pd.DataFrame], output_cols_prefix: str = "fit_transform_",) -> Union[DataFrame, pd.DataFrame]:
|
715
|
+
""" Method not supported for this class.
|
716
|
+
|
726
717
|
|
727
|
-
|
728
|
-
|
729
|
-
|
718
|
+
Raises:
|
719
|
+
TypeError: Supported dataset types: snowpark.DataFrame, pandas.DataFrame.
|
720
|
+
|
721
|
+
Args:
|
722
|
+
dataset: Union[snowflake.snowpark.DataFrame, pandas.DataFrame]
|
723
|
+
Snowpark or Pandas DataFrame.
|
724
|
+
output_cols_prefix: Prefix for the response columns
|
730
725
|
Returns:
|
731
726
|
Transformed dataset.
|
732
727
|
"""
|
733
|
-
self.
|
734
|
-
|
735
|
-
|
728
|
+
self._infer_input_output_cols(dataset)
|
729
|
+
super()._check_dataset_type(dataset)
|
730
|
+
model_trainer = ModelTrainerBuilder.build_fit_transform(
|
731
|
+
estimator=self._sklearn_object,
|
732
|
+
dataset=dataset,
|
733
|
+
input_cols=self.input_cols,
|
734
|
+
label_cols=self.label_cols,
|
735
|
+
sample_weight_col=self.sample_weight_col,
|
736
|
+
autogenerated=self._autogenerated,
|
737
|
+
subproject=_SUBPROJECT,
|
738
|
+
)
|
739
|
+
output_result, fitted_estimator = model_trainer.train_fit_transform(
|
740
|
+
drop_input_cols=self._drop_input_cols,
|
741
|
+
expected_output_cols_list=self.output_cols,
|
742
|
+
)
|
743
|
+
self._sklearn_object = fitted_estimator
|
744
|
+
self._is_fitted = True
|
745
|
+
return output_result
|
746
|
+
|
747
|
+
|
748
|
+
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
749
|
+
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
750
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
751
|
+
"""
|
752
|
+
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
753
|
+
# The following condition is introduced for kneighbors methods, and not used in other methods
|
754
|
+
if output_cols:
|
755
|
+
output_cols = [
|
756
|
+
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
757
|
+
for c in output_cols
|
758
|
+
]
|
759
|
+
elif getattr(self._sklearn_object, "classes_", None) is None:
|
760
|
+
output_cols = [output_cols_prefix]
|
761
|
+
elif self._sklearn_object is not None:
|
762
|
+
classes = self._sklearn_object.classes_
|
763
|
+
if isinstance(classes, numpy.ndarray):
|
764
|
+
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
765
|
+
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
766
|
+
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
767
|
+
output_cols = []
|
768
|
+
for i, cl in enumerate(classes):
|
769
|
+
# For binary classification, there is only one output column for each class
|
770
|
+
# ndarray as the two classes are complementary.
|
771
|
+
if len(cl) == 2:
|
772
|
+
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
773
|
+
else:
|
774
|
+
output_cols.extend([
|
775
|
+
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
776
|
+
])
|
777
|
+
else:
|
778
|
+
output_cols = []
|
779
|
+
|
780
|
+
# Make sure column names are valid snowflake identifiers.
|
781
|
+
assert output_cols is not None # Make MyPy happy
|
782
|
+
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
783
|
+
|
784
|
+
return rv
|
785
|
+
|
786
|
+
def _align_expected_output_names(
|
787
|
+
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
|
788
|
+
) -> List[str]:
|
789
|
+
# in case the inferred output column names dimension is different
|
790
|
+
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
791
|
+
output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
|
792
|
+
output_df_columns = list(output_df_pd.columns)
|
793
|
+
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
794
|
+
if self.sample_weight_col:
|
795
|
+
output_df_columns_set -= set(self.sample_weight_col)
|
796
|
+
# if the dimension of inferred output column names is correct; use it
|
797
|
+
if len(expected_output_cols_list) == len(output_df_columns_set):
|
798
|
+
return expected_output_cols_list
|
799
|
+
# otherwise, use the sklearn estimator's output
|
800
|
+
else:
|
801
|
+
return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
|
736
802
|
|
737
803
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
738
804
|
@telemetry.send_api_usage_telemetry(
|
@@ -766,24 +832,26 @@ class XGBRFClassifier(BaseTransformer):
|
|
766
832
|
# are specific to the type of dataset used.
|
767
833
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
768
834
|
|
835
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
836
|
+
|
769
837
|
if isinstance(dataset, DataFrame):
|
770
|
-
self.
|
771
|
-
|
772
|
-
|
773
|
-
|
774
|
-
|
838
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
839
|
+
self._deps = self._get_dependencies()
|
840
|
+
assert isinstance(
|
841
|
+
dataset._session, Session
|
842
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
775
843
|
transform_kwargs = dict(
|
776
844
|
session=dataset._session,
|
777
845
|
dependencies=self._deps,
|
778
|
-
drop_input_cols
|
846
|
+
drop_input_cols=self._drop_input_cols,
|
779
847
|
expected_output_cols_type="float",
|
780
848
|
)
|
849
|
+
expected_output_cols = self._align_expected_output_names(
|
850
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
851
|
+
)
|
781
852
|
|
782
853
|
elif isinstance(dataset, pd.DataFrame):
|
783
|
-
transform_kwargs = dict(
|
784
|
-
snowpark_input_cols = self._snowpark_cols,
|
785
|
-
drop_input_cols = self._drop_input_cols
|
786
|
-
)
|
854
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
787
855
|
|
788
856
|
transform_handlers = ModelTransformerBuilder.build(
|
789
857
|
dataset=dataset,
|
@@ -795,7 +863,7 @@ class XGBRFClassifier(BaseTransformer):
|
|
795
863
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
796
864
|
inference_method=inference_method,
|
797
865
|
input_cols=self.input_cols,
|
798
|
-
expected_output_cols=
|
866
|
+
expected_output_cols=expected_output_cols,
|
799
867
|
**transform_kwargs
|
800
868
|
)
|
801
869
|
return output_df
|
@@ -827,29 +895,30 @@ class XGBRFClassifier(BaseTransformer):
|
|
827
895
|
Output dataset with log probability of the sample for each class in the model.
|
828
896
|
"""
|
829
897
|
super()._check_dataset_type(dataset)
|
830
|
-
inference_method="predict_log_proba"
|
898
|
+
inference_method = "predict_log_proba"
|
899
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
831
900
|
|
832
901
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
833
902
|
# are specific to the type of dataset used.
|
834
903
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
835
904
|
|
836
905
|
if isinstance(dataset, DataFrame):
|
837
|
-
self.
|
838
|
-
|
839
|
-
|
840
|
-
|
841
|
-
|
906
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
907
|
+
self._deps = self._get_dependencies()
|
908
|
+
assert isinstance(
|
909
|
+
dataset._session, Session
|
910
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
842
911
|
transform_kwargs = dict(
|
843
912
|
session=dataset._session,
|
844
913
|
dependencies=self._deps,
|
845
|
-
drop_input_cols
|
914
|
+
drop_input_cols=self._drop_input_cols,
|
846
915
|
expected_output_cols_type="float",
|
847
916
|
)
|
917
|
+
expected_output_cols = self._align_expected_output_names(
|
918
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
919
|
+
)
|
848
920
|
elif isinstance(dataset, pd.DataFrame):
|
849
|
-
transform_kwargs = dict(
|
850
|
-
snowpark_input_cols = self._snowpark_cols,
|
851
|
-
drop_input_cols = self._drop_input_cols
|
852
|
-
)
|
921
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
853
922
|
|
854
923
|
transform_handlers = ModelTransformerBuilder.build(
|
855
924
|
dataset=dataset,
|
@@ -862,7 +931,7 @@ class XGBRFClassifier(BaseTransformer):
|
|
862
931
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
863
932
|
inference_method=inference_method,
|
864
933
|
input_cols=self.input_cols,
|
865
|
-
expected_output_cols=
|
934
|
+
expected_output_cols=expected_output_cols,
|
866
935
|
**transform_kwargs
|
867
936
|
)
|
868
937
|
return output_df
|
@@ -888,30 +957,32 @@ class XGBRFClassifier(BaseTransformer):
|
|
888
957
|
Output dataset with results of the decision function for the samples in input dataset.
|
889
958
|
"""
|
890
959
|
super()._check_dataset_type(dataset)
|
891
|
-
inference_method="decision_function"
|
960
|
+
inference_method = "decision_function"
|
892
961
|
|
893
962
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
894
963
|
# are specific to the type of dataset used.
|
895
964
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
896
965
|
|
966
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
967
|
+
|
897
968
|
if isinstance(dataset, DataFrame):
|
898
|
-
self.
|
899
|
-
|
900
|
-
|
901
|
-
|
902
|
-
|
969
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
970
|
+
self._deps = self._get_dependencies()
|
971
|
+
assert isinstance(
|
972
|
+
dataset._session, Session
|
973
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
903
974
|
transform_kwargs = dict(
|
904
975
|
session=dataset._session,
|
905
976
|
dependencies=self._deps,
|
906
|
-
drop_input_cols
|
977
|
+
drop_input_cols=self._drop_input_cols,
|
907
978
|
expected_output_cols_type="float",
|
908
979
|
)
|
980
|
+
expected_output_cols = self._align_expected_output_names(
|
981
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
982
|
+
)
|
909
983
|
|
910
984
|
elif isinstance(dataset, pd.DataFrame):
|
911
|
-
transform_kwargs = dict(
|
912
|
-
snowpark_input_cols = self._snowpark_cols,
|
913
|
-
drop_input_cols = self._drop_input_cols
|
914
|
-
)
|
985
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
915
986
|
|
916
987
|
transform_handlers = ModelTransformerBuilder.build(
|
917
988
|
dataset=dataset,
|
@@ -924,7 +995,7 @@ class XGBRFClassifier(BaseTransformer):
|
|
924
995
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
925
996
|
inference_method=inference_method,
|
926
997
|
input_cols=self.input_cols,
|
927
|
-
expected_output_cols=
|
998
|
+
expected_output_cols=expected_output_cols,
|
928
999
|
**transform_kwargs
|
929
1000
|
)
|
930
1001
|
return output_df
|
@@ -953,17 +1024,17 @@ class XGBRFClassifier(BaseTransformer):
|
|
953
1024
|
Output dataset with probability of the sample for each class in the model.
|
954
1025
|
"""
|
955
1026
|
super()._check_dataset_type(dataset)
|
956
|
-
inference_method="score_samples"
|
1027
|
+
inference_method = "score_samples"
|
957
1028
|
|
958
1029
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
959
1030
|
# are specific to the type of dataset used.
|
960
1031
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
961
1032
|
|
1033
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
1034
|
+
|
962
1035
|
if isinstance(dataset, DataFrame):
|
963
|
-
self.
|
964
|
-
|
965
|
-
inference_method=inference_method,
|
966
|
-
)
|
1036
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
1037
|
+
self._deps = self._get_dependencies()
|
967
1038
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
968
1039
|
transform_kwargs = dict(
|
969
1040
|
session=dataset._session,
|
@@ -971,6 +1042,9 @@ class XGBRFClassifier(BaseTransformer):
|
|
971
1042
|
drop_input_cols = self._drop_input_cols,
|
972
1043
|
expected_output_cols_type="float",
|
973
1044
|
)
|
1045
|
+
expected_output_cols = self._align_expected_output_names(
|
1046
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
1047
|
+
)
|
974
1048
|
|
975
1049
|
elif isinstance(dataset, pd.DataFrame):
|
976
1050
|
transform_kwargs = dict(
|
@@ -989,7 +1063,7 @@ class XGBRFClassifier(BaseTransformer):
|
|
989
1063
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
990
1064
|
inference_method=inference_method,
|
991
1065
|
input_cols=self.input_cols,
|
992
|
-
expected_output_cols=
|
1066
|
+
expected_output_cols=expected_output_cols,
|
993
1067
|
**transform_kwargs
|
994
1068
|
)
|
995
1069
|
return output_df
|
@@ -1024,17 +1098,15 @@ class XGBRFClassifier(BaseTransformer):
|
|
1024
1098
|
transform_kwargs: ScoreKwargsTypedDict = dict()
|
1025
1099
|
|
1026
1100
|
if isinstance(dataset, DataFrame):
|
1027
|
-
self.
|
1028
|
-
|
1029
|
-
inference_method="score",
|
1030
|
-
)
|
1101
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method="score")
|
1102
|
+
self._deps = self._get_dependencies()
|
1031
1103
|
selected_cols = self._get_active_columns()
|
1032
1104
|
if len(selected_cols) > 0:
|
1033
1105
|
dataset = dataset.select(selected_cols)
|
1034
1106
|
assert isinstance(dataset._session, Session) # keep mypy happy
|
1035
1107
|
transform_kwargs = dict(
|
1036
1108
|
session=dataset._session,
|
1037
|
-
dependencies=
|
1109
|
+
dependencies=self._deps,
|
1038
1110
|
score_sproc_imports=['xgboost'],
|
1039
1111
|
)
|
1040
1112
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -1099,11 +1171,8 @@ class XGBRFClassifier(BaseTransformer):
|
|
1099
1171
|
|
1100
1172
|
if isinstance(dataset, DataFrame):
|
1101
1173
|
|
1102
|
-
self.
|
1103
|
-
|
1104
|
-
inference_method=inference_method,
|
1105
|
-
|
1106
|
-
)
|
1174
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
1175
|
+
self._deps = self._get_dependencies()
|
1107
1176
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
1108
1177
|
transform_kwargs = dict(
|
1109
1178
|
session = dataset._session,
|
@@ -1136,50 +1205,84 @@ class XGBRFClassifier(BaseTransformer):
|
|
1136
1205
|
)
|
1137
1206
|
return output_df
|
1138
1207
|
|
1208
|
+
|
1209
|
+
|
1210
|
+
def to_xgboost(self) -> Any:
|
1211
|
+
"""Get xgboost.XGBRFClassifier object.
|
1212
|
+
"""
|
1213
|
+
if self._sklearn_object is None:
|
1214
|
+
self._sklearn_object = self._create_sklearn_object()
|
1215
|
+
return self._sklearn_object
|
1216
|
+
|
1217
|
+
def to_sklearn(self) -> Any:
|
1218
|
+
raise exceptions.SnowflakeMLException(
|
1219
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1220
|
+
original_exception=AttributeError(
|
1221
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1222
|
+
"to_sklearn()",
|
1223
|
+
"to_xgboost()"
|
1224
|
+
)
|
1225
|
+
),
|
1226
|
+
)
|
1227
|
+
|
1228
|
+
def to_lightgbm(self) -> Any:
|
1229
|
+
raise exceptions.SnowflakeMLException(
|
1230
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1231
|
+
original_exception=AttributeError(
|
1232
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1233
|
+
"to_lightgbm()",
|
1234
|
+
"to_xgboost()"
|
1235
|
+
)
|
1236
|
+
),
|
1237
|
+
)
|
1238
|
+
|
1239
|
+
def _get_dependencies(self) -> List[str]:
|
1240
|
+
return self._deps
|
1241
|
+
|
1139
1242
|
|
1140
|
-
def
|
1243
|
+
def _generate_model_signatures(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
1141
1244
|
self._model_signature_dict = dict()
|
1142
1245
|
|
1143
1246
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1144
1247
|
|
1145
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input"))
|
1248
|
+
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1146
1249
|
outputs: List[BaseFeatureSpec] = []
|
1147
1250
|
if hasattr(self, "predict"):
|
1148
1251
|
# keep mypy happy
|
1149
|
-
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
1252
|
+
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
1150
1253
|
# For classifier, the type of predict is the same as the type of label
|
1151
|
-
if self._sklearn_object._estimator_type ==
|
1152
|
-
|
1254
|
+
if self._sklearn_object._estimator_type == "classifier":
|
1255
|
+
# label columns is the desired type for output
|
1153
1256
|
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1154
1257
|
# rename the output columns
|
1155
1258
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1156
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1157
|
-
|
1158
|
-
|
1259
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1260
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1261
|
+
)
|
1159
1262
|
# For mixture models that use the density mixin, `predict` returns the argmax of the log prob.
|
1160
1263
|
# For outlier models, returns -1 for outliers and 1 for inliers.
|
1161
|
-
# Clusterer returns int64 cluster labels.
|
1264
|
+
# Clusterer returns int64 cluster labels.
|
1162
1265
|
elif self._sklearn_object._estimator_type in ["DensityEstimator", "clusterer", "outlier_detector"]:
|
1163
1266
|
outputs = [FeatureSpec(dtype=DataType.INT64, name=c) for c in self.output_cols]
|
1164
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1165
|
-
|
1166
|
-
|
1167
|
-
|
1267
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1268
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1269
|
+
)
|
1270
|
+
|
1168
1271
|
# For regressor, the type of predict is float64
|
1169
|
-
elif self._sklearn_object._estimator_type ==
|
1272
|
+
elif self._sklearn_object._estimator_type == "regressor":
|
1170
1273
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1171
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1172
|
-
|
1173
|
-
|
1174
|
-
|
1274
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1275
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1276
|
+
)
|
1277
|
+
|
1175
1278
|
for prob_func in PROB_FUNCTIONS:
|
1176
1279
|
if hasattr(self, prob_func):
|
1177
1280
|
output_cols_prefix: str = f"{prob_func}_"
|
1178
1281
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1179
1282
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1180
|
-
self._model_signature_dict[prob_func] = ModelSignature(
|
1181
|
-
|
1182
|
-
|
1283
|
+
self._model_signature_dict[prob_func] = ModelSignature(
|
1284
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1285
|
+
)
|
1183
1286
|
|
1184
1287
|
# Output signature names may still need to be renamed, since they were not created with `_infer_signature`.
|
1185
1288
|
items = list(self._model_signature_dict.items())
|
@@ -1192,10 +1295,10 @@ class XGBRFClassifier(BaseTransformer):
|
|
1192
1295
|
"""Returns model signature of current class.
|
1193
1296
|
|
1194
1297
|
Raises:
|
1195
|
-
|
1298
|
+
SnowflakeMLException: If estimator is not fitted, then model signature cannot be inferred
|
1196
1299
|
|
1197
1300
|
Returns:
|
1198
|
-
Dict
|
1301
|
+
Dict with each method and its input output signature
|
1199
1302
|
"""
|
1200
1303
|
if self._model_signature_dict is None:
|
1201
1304
|
raise exceptions.SnowflakeMLException(
|
@@ -1203,35 +1306,3 @@ class XGBRFClassifier(BaseTransformer):
|
|
1203
1306
|
original_exception=RuntimeError("Estimator not fitted before accessing property model_signatures!"),
|
1204
1307
|
)
|
1205
1308
|
return self._model_signature_dict
|
1206
|
-
|
1207
|
-
def to_xgboost(self) -> Any:
|
1208
|
-
"""Get xgboost.XGBRFClassifier object.
|
1209
|
-
"""
|
1210
|
-
if self._sklearn_object is None:
|
1211
|
-
self._sklearn_object = self._create_sklearn_object()
|
1212
|
-
return self._sklearn_object
|
1213
|
-
|
1214
|
-
def to_sklearn(self) -> Any:
|
1215
|
-
raise exceptions.SnowflakeMLException(
|
1216
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1217
|
-
original_exception=AttributeError(
|
1218
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1219
|
-
"to_sklearn()",
|
1220
|
-
"to_xgboost()"
|
1221
|
-
)
|
1222
|
-
),
|
1223
|
-
)
|
1224
|
-
|
1225
|
-
def to_lightgbm(self) -> Any:
|
1226
|
-
raise exceptions.SnowflakeMLException(
|
1227
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1228
|
-
original_exception=AttributeError(
|
1229
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1230
|
-
"to_lightgbm()",
|
1231
|
-
"to_xgboost()"
|
1232
|
-
)
|
1233
|
-
),
|
1234
|
-
)
|
1235
|
-
|
1236
|
-
def _get_dependencies(self) -> List[str]:
|
1237
|
-
return self._deps
|