snowflake-ml-python 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +77 -32
- snowflake/ml/_internal/exceptions/dataset_error_messages.py +5 -0
- snowflake/ml/_internal/exceptions/dataset_errors.py +24 -0
- snowflake/ml/_internal/exceptions/error_codes.py +3 -0
- snowflake/ml/_internal/lineage/data_source.py +10 -0
- snowflake/ml/_internal/lineage/dataset_dataframe.py +44 -0
- snowflake/ml/_internal/utils/identifier.py +3 -1
- snowflake/ml/_internal/utils/sql_identifier.py +2 -6
- snowflake/ml/dataset/__init__.py +10 -0
- snowflake/ml/dataset/dataset.py +454 -129
- snowflake/ml/dataset/dataset_factory.py +53 -0
- snowflake/ml/dataset/dataset_metadata.py +103 -0
- snowflake/ml/dataset/dataset_reader.py +202 -0
- snowflake/ml/feature_store/feature_store.py +531 -332
- snowflake/ml/feature_store/feature_view.py +40 -23
- snowflake/ml/fileset/embedded_stage_fs.py +146 -0
- snowflake/ml/fileset/sfcfs.py +56 -54
- snowflake/ml/fileset/snowfs.py +159 -0
- snowflake/ml/fileset/stage_fs.py +49 -17
- snowflake/ml/model/__init__.py +2 -2
- snowflake/ml/model/_api.py +16 -1
- snowflake/ml/model/_client/model/model_impl.py +27 -0
- snowflake/ml/model/_client/model/model_version_impl.py +137 -50
- snowflake/ml/model/_client/ops/model_ops.py +159 -40
- snowflake/ml/model/_client/sql/model.py +25 -2
- snowflake/ml/model/_client/sql/model_version.py +131 -2
- snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +5 -1
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +1 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +0 -5
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +21 -50
- snowflake/ml/model/_model_composer/model_composer.py +22 -1
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +38 -51
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +19 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +6 -10
- snowflake/ml/model/_packager/model_env/model_env.py +41 -0
- snowflake/ml/model/_packager/model_handlers/catboost.py +206 -0
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +218 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +3 -0
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_meta.py +37 -11
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +20 -1
- snowflake/ml/model/_packager/model_meta_migrator/migrator_plans.py +3 -1
- snowflake/ml/model/_packager/model_packager.py +2 -5
- snowflake/ml/model/{_model_composer/model_runtime/_runtime_requirements.py → _packager/model_runtime/_snowml_inference_alternative_requirements.py} +1 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +137 -0
- snowflake/ml/model/type_hints.py +21 -2
- snowflake/ml/modeling/_internal/estimator_utils.py +16 -11
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -1
- snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +55 -3
- snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +34 -18
- snowflake/ml/modeling/_internal/model_trainer.py +7 -0
- snowflake/ml/modeling/_internal/model_trainer_builder.py +42 -9
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +13 -14
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +29 -7
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +261 -16
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +246 -175
- snowflake/ml/modeling/cluster/affinity_propagation.py +246 -175
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +246 -175
- snowflake/ml/modeling/cluster/birch.py +248 -175
- snowflake/ml/modeling/cluster/bisecting_k_means.py +248 -175
- snowflake/ml/modeling/cluster/dbscan.py +246 -175
- snowflake/ml/modeling/cluster/feature_agglomeration.py +248 -175
- snowflake/ml/modeling/cluster/k_means.py +248 -175
- snowflake/ml/modeling/cluster/mean_shift.py +246 -175
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +248 -175
- snowflake/ml/modeling/cluster/optics.py +246 -175
- snowflake/ml/modeling/cluster/spectral_biclustering.py +246 -175
- snowflake/ml/modeling/cluster/spectral_clustering.py +246 -175
- snowflake/ml/modeling/cluster/spectral_coclustering.py +246 -175
- snowflake/ml/modeling/compose/column_transformer.py +248 -175
- snowflake/ml/modeling/compose/transformed_target_regressor.py +246 -175
- snowflake/ml/modeling/covariance/elliptic_envelope.py +246 -175
- snowflake/ml/modeling/covariance/empirical_covariance.py +246 -175
- snowflake/ml/modeling/covariance/graphical_lasso.py +246 -175
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +246 -175
- snowflake/ml/modeling/covariance/ledoit_wolf.py +246 -175
- snowflake/ml/modeling/covariance/min_cov_det.py +246 -175
- snowflake/ml/modeling/covariance/oas.py +246 -175
- snowflake/ml/modeling/covariance/shrunk_covariance.py +246 -175
- snowflake/ml/modeling/decomposition/dictionary_learning.py +248 -175
- snowflake/ml/modeling/decomposition/factor_analysis.py +248 -175
- snowflake/ml/modeling/decomposition/fast_ica.py +248 -175
- snowflake/ml/modeling/decomposition/incremental_pca.py +248 -175
- snowflake/ml/modeling/decomposition/kernel_pca.py +248 -175
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +248 -175
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +248 -175
- snowflake/ml/modeling/decomposition/pca.py +248 -175
- snowflake/ml/modeling/decomposition/sparse_pca.py +248 -175
- snowflake/ml/modeling/decomposition/truncated_svd.py +248 -175
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +248 -175
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +246 -175
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/bagging_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/bagging_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/isolation_forest.py +246 -175
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/stacking_regressor.py +248 -175
- snowflake/ml/modeling/ensemble/voting_classifier.py +248 -175
- snowflake/ml/modeling/ensemble/voting_regressor.py +248 -175
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fdr.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fpr.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fwe.py +248 -175
- snowflake/ml/modeling/feature_selection/select_k_best.py +248 -175
- snowflake/ml/modeling/feature_selection/select_percentile.py +248 -175
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +248 -175
- snowflake/ml/modeling/feature_selection/variance_threshold.py +248 -175
- snowflake/ml/modeling/framework/_utils.py +8 -1
- snowflake/ml/modeling/framework/base.py +72 -37
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +246 -175
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +246 -175
- snowflake/ml/modeling/impute/iterative_imputer.py +248 -175
- snowflake/ml/modeling/impute/knn_imputer.py +248 -175
- snowflake/ml/modeling/impute/missing_indicator.py +248 -175
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +248 -175
- snowflake/ml/modeling/kernel_approximation/nystroem.py +248 -175
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +248 -175
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +248 -175
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +248 -175
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +246 -175
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +246 -175
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ard_regression.py +246 -175
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +246 -175
- snowflake/ml/modeling/linear_model/elastic_net.py +246 -175
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +246 -175
- snowflake/ml/modeling/linear_model/gamma_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/huber_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/lars.py +246 -175
- snowflake/ml/modeling/linear_model/lars_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +246 -175
- snowflake/ml/modeling/linear_model/linear_regression.py +246 -175
- snowflake/ml/modeling/linear_model/logistic_regression.py +246 -175
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +246 -175
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +246 -175
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/perceptron.py +246 -175
- snowflake/ml/modeling/linear_model/poisson_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ransac_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ridge.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_cv.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +246 -175
- snowflake/ml/modeling/manifold/isomap.py +248 -175
- snowflake/ml/modeling/manifold/mds.py +248 -175
- snowflake/ml/modeling/manifold/spectral_embedding.py +248 -175
- snowflake/ml/modeling/manifold/tsne.py +248 -175
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +246 -175
- snowflake/ml/modeling/mixture/gaussian_mixture.py +246 -175
- snowflake/ml/modeling/model_selection/grid_search_cv.py +63 -41
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +80 -38
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +246 -175
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +246 -175
- snowflake/ml/modeling/multiclass/output_code_classifier.py +246 -175
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/complement_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +246 -175
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +246 -175
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +246 -175
- snowflake/ml/modeling/neighbors/kernel_density.py +246 -175
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +246 -175
- snowflake/ml/modeling/neighbors/nearest_centroid.py +246 -175
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +246 -175
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +248 -175
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +246 -175
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +246 -175
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +248 -175
- snowflake/ml/modeling/neural_network/mlp_classifier.py +246 -175
- snowflake/ml/modeling/neural_network/mlp_regressor.py +246 -175
- snowflake/ml/modeling/pipeline/pipeline.py +517 -35
- snowflake/ml/modeling/preprocessing/binarizer.py +1 -5
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +1 -5
- snowflake/ml/modeling/preprocessing/label_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +10 -12
- snowflake/ml/modeling/preprocessing/normalizer.py +1 -5
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +13 -5
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/polynomial_features.py +248 -175
- snowflake/ml/modeling/preprocessing/robust_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/standard_scaler.py +11 -11
- snowflake/ml/modeling/semi_supervised/label_propagation.py +246 -175
- snowflake/ml/modeling/semi_supervised/label_spreading.py +246 -175
- snowflake/ml/modeling/svm/linear_svc.py +246 -175
- snowflake/ml/modeling/svm/linear_svr.py +246 -175
- snowflake/ml/modeling/svm/nu_svc.py +246 -175
- snowflake/ml/modeling/svm/nu_svr.py +246 -175
- snowflake/ml/modeling/svm/svc.py +246 -175
- snowflake/ml/modeling/svm/svr.py +246 -175
- snowflake/ml/modeling/tree/decision_tree_classifier.py +246 -175
- snowflake/ml/modeling/tree/decision_tree_regressor.py +246 -175
- snowflake/ml/modeling/tree/extra_tree_classifier.py +246 -175
- snowflake/ml/modeling/tree/extra_tree_regressor.py +246 -175
- snowflake/ml/modeling/xgboost/xgb_classifier.py +246 -175
- snowflake/ml/modeling/xgboost/xgb_regressor.py +246 -175
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +246 -175
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +246 -175
- snowflake/ml/registry/model_registry.py +3 -149
- snowflake/ml/registry/registry.py +1 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/METADATA +129 -57
- snowflake_ml_python-1.5.0.dist-info/RECORD +380 -0
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -97
- snowflake/ml/registry/_artifact_manager.py +0 -156
- snowflake/ml/registry/artifact.py +0 -46
- snowflake_ml_python-1.4.0.dist-info/RECORD +0 -370
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/top_level.txt +0 -0
@@ -33,6 +33,15 @@ from snowflake.ml.modeling._internal.transformer_protocols import (
|
|
33
33
|
BatchInferenceKwargsTypedDict,
|
34
34
|
ScoreKwargsTypedDict
|
35
35
|
)
|
36
|
+
from snowflake.ml.model._signatures import utils as model_signature_utils
|
37
|
+
from snowflake.ml.model.model_signature import (
|
38
|
+
BaseFeatureSpec,
|
39
|
+
DataType,
|
40
|
+
FeatureSpec,
|
41
|
+
ModelSignature,
|
42
|
+
_infer_signature,
|
43
|
+
_rename_signature_with_snowflake_identifiers,
|
44
|
+
)
|
36
45
|
|
37
46
|
from snowflake.ml.modeling._internal.model_transformer_builder import ModelTransformerBuilder
|
38
47
|
|
@@ -43,16 +52,6 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
43
52
|
validate_sklearn_args,
|
44
53
|
)
|
45
54
|
|
46
|
-
from snowflake.ml.model.model_signature import (
|
47
|
-
DataType,
|
48
|
-
FeatureSpec,
|
49
|
-
ModelSignature,
|
50
|
-
_infer_signature,
|
51
|
-
_rename_signature_with_snowflake_identifiers,
|
52
|
-
BaseFeatureSpec,
|
53
|
-
)
|
54
|
-
from snowflake.ml.model._signatures import utils as model_signature_utils
|
55
|
-
|
56
55
|
_PROJECT = "ModelDevelopment"
|
57
56
|
# Derive subproject from module name by removing "sklearn"
|
58
57
|
# and converting module name from underscore to CamelCase
|
@@ -61,12 +60,6 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.calibration".replace("sk
|
|
61
60
|
|
62
61
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
63
62
|
|
64
|
-
def _is_fit_transform_method_enabled() -> Callable[[Any], bool]:
|
65
|
-
def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
|
66
|
-
return False and callable(getattr(self._sklearn_object, "fit_transform", None))
|
67
|
-
return check
|
68
|
-
|
69
|
-
|
70
63
|
class CalibratedClassifierCV(BaseTransformer):
|
71
64
|
r"""Probability calibration with isotonic regression or logistic regression
|
72
65
|
For more details on this class, see [sklearn.calibration.CalibratedClassifierCV]
|
@@ -267,12 +260,7 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
267
260
|
)
|
268
261
|
return selected_cols
|
269
262
|
|
270
|
-
|
271
|
-
project=_PROJECT,
|
272
|
-
subproject=_SUBPROJECT,
|
273
|
-
custom_tags=dict([("autogen", True)]),
|
274
|
-
)
|
275
|
-
def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "CalibratedClassifierCV":
|
263
|
+
def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "CalibratedClassifierCV":
|
276
264
|
"""Fit the calibrated model
|
277
265
|
For more details on this function, see [sklearn.calibration.CalibratedClassifierCV.fit]
|
278
266
|
(https://scikit-learn.org/stable/modules/generated/sklearn.calibration.CalibratedClassifierCV.html#sklearn.calibration.CalibratedClassifierCV.fit)
|
@@ -299,12 +287,14 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
299
287
|
|
300
288
|
self._snowpark_cols = dataset.select(self.input_cols).columns
|
301
289
|
|
302
|
-
|
290
|
+
# If we are already in a stored procedure, no need to kick off another one.
|
303
291
|
if SNOWML_SPROC_ENV in os.environ:
|
304
292
|
statement_params = telemetry.get_function_usage_statement_params(
|
305
293
|
project=_PROJECT,
|
306
294
|
subproject=_SUBPROJECT,
|
307
|
-
function_name=telemetry.get_statement_params_full_func_name(
|
295
|
+
function_name=telemetry.get_statement_params_full_func_name(
|
296
|
+
inspect.currentframe(), CalibratedClassifierCV.__class__.__name__
|
297
|
+
),
|
308
298
|
api_calls=[Session.call],
|
309
299
|
custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
|
310
300
|
)
|
@@ -325,27 +315,24 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
325
315
|
)
|
326
316
|
self._sklearn_object = model_trainer.train()
|
327
317
|
self._is_fitted = True
|
328
|
-
self.
|
318
|
+
self._generate_model_signatures(dataset)
|
329
319
|
return self
|
330
320
|
|
331
321
|
def _batch_inference_validate_snowpark(
|
332
322
|
self,
|
333
323
|
dataset: DataFrame,
|
334
324
|
inference_method: str,
|
335
|
-
) ->
|
336
|
-
"""Util method to run validate that batch inference can be run on a snowpark dataframe
|
337
|
-
return the available package that exists in the snowflake anaconda channel
|
325
|
+
) -> None:
|
326
|
+
"""Util method to run validate that batch inference can be run on a snowpark dataframe.
|
338
327
|
|
339
328
|
Args:
|
340
329
|
dataset: snowpark dataframe
|
341
330
|
inference_method: the inference method such as predict, score...
|
342
|
-
|
331
|
+
|
343
332
|
Raises:
|
344
333
|
SnowflakeMLException: If the estimator is not fitted, raise error
|
345
334
|
SnowflakeMLException: If the session is None, raise error
|
346
335
|
|
347
|
-
Returns:
|
348
|
-
A list of available package that exists in the snowflake anaconda channel
|
349
336
|
"""
|
350
337
|
if not self._is_fitted:
|
351
338
|
raise exceptions.SnowflakeMLException(
|
@@ -363,9 +350,7 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
363
350
|
"Session must not specified for snowpark dataset."
|
364
351
|
),
|
365
352
|
)
|
366
|
-
|
367
|
-
return pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
|
368
|
-
pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT)
|
353
|
+
|
369
354
|
|
370
355
|
@available_if(original_estimator_has_callable("predict")) # type: ignore[misc]
|
371
356
|
@telemetry.send_api_usage_telemetry(
|
@@ -401,7 +386,9 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
401
386
|
# when it is classifier, infer the datatype from label columns
|
402
387
|
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
403
388
|
# Batch inference takes a single expected output column type. Use the first columns type for now.
|
404
|
-
label_cols_signatures = [
|
389
|
+
label_cols_signatures = [
|
390
|
+
row for row in self.model_signatures['predict'].outputs if row.name in self.output_cols
|
391
|
+
]
|
405
392
|
if len(label_cols_signatures) == 0:
|
406
393
|
error_str = f"Output columns {self.output_cols} do not match model signatures {self.model_signatures['predict'].outputs}."
|
407
394
|
raise exceptions.SnowflakeMLException(
|
@@ -409,25 +396,23 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
409
396
|
original_exception=ValueError(error_str),
|
410
397
|
)
|
411
398
|
|
412
|
-
expected_type_inferred = convert_sp_to_sf_type(
|
413
|
-
label_cols_signatures[0].as_snowpark_type()
|
414
|
-
)
|
399
|
+
expected_type_inferred = convert_sp_to_sf_type(label_cols_signatures[0].as_snowpark_type())
|
415
400
|
|
416
|
-
self.
|
417
|
-
|
401
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
402
|
+
self._deps = self._get_dependencies()
|
403
|
+
assert isinstance(
|
404
|
+
dataset._session, Session
|
405
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
418
406
|
|
419
407
|
transform_kwargs = dict(
|
420
|
-
session
|
421
|
-
dependencies
|
422
|
-
drop_input_cols
|
423
|
-
expected_output_cols_type
|
408
|
+
session=dataset._session,
|
409
|
+
dependencies=self._deps,
|
410
|
+
drop_input_cols=self._drop_input_cols,
|
411
|
+
expected_output_cols_type=expected_type_inferred,
|
424
412
|
)
|
425
413
|
|
426
414
|
elif isinstance(dataset, pd.DataFrame):
|
427
|
-
transform_kwargs = dict(
|
428
|
-
snowpark_input_cols = self._snowpark_cols,
|
429
|
-
drop_input_cols = self._drop_input_cols
|
430
|
-
)
|
415
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
431
416
|
|
432
417
|
transform_handlers = ModelTransformerBuilder.build(
|
433
418
|
dataset=dataset,
|
@@ -467,7 +452,7 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
467
452
|
Transformed dataset.
|
468
453
|
"""
|
469
454
|
super()._check_dataset_type(dataset)
|
470
|
-
inference_method="transform"
|
455
|
+
inference_method = "transform"
|
471
456
|
|
472
457
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
473
458
|
# are specific to the type of dataset used.
|
@@ -497,24 +482,19 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
497
482
|
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
498
483
|
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
499
484
|
|
500
|
-
self.
|
501
|
-
|
502
|
-
inference_method=inference_method,
|
503
|
-
)
|
485
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
486
|
+
self._deps = self._get_dependencies()
|
504
487
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
505
488
|
|
506
489
|
transform_kwargs = dict(
|
507
|
-
session
|
508
|
-
dependencies
|
509
|
-
drop_input_cols
|
510
|
-
expected_output_cols_type
|
490
|
+
session=dataset._session,
|
491
|
+
dependencies=self._deps,
|
492
|
+
drop_input_cols=self._drop_input_cols,
|
493
|
+
expected_output_cols_type=expected_dtype,
|
511
494
|
)
|
512
495
|
|
513
496
|
elif isinstance(dataset, pd.DataFrame):
|
514
|
-
transform_kwargs = dict(
|
515
|
-
snowpark_input_cols = self._snowpark_cols,
|
516
|
-
drop_input_cols = self._drop_input_cols
|
517
|
-
)
|
497
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
518
498
|
|
519
499
|
transform_handlers = ModelTransformerBuilder.build(
|
520
500
|
dataset=dataset,
|
@@ -533,7 +513,11 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
533
513
|
return output_df
|
534
514
|
|
535
515
|
@available_if(original_estimator_has_callable("fit_predict")) # type: ignore[misc]
|
536
|
-
def fit_predict(
|
516
|
+
def fit_predict(
|
517
|
+
self,
|
518
|
+
dataset: Union[DataFrame, pd.DataFrame],
|
519
|
+
output_cols_prefix: str = "fit_predict_",
|
520
|
+
) -> Union[DataFrame, pd.DataFrame]:
|
537
521
|
""" Method not supported for this class.
|
538
522
|
|
539
523
|
|
@@ -558,22 +542,104 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
558
542
|
)
|
559
543
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
560
544
|
drop_input_cols=self._drop_input_cols,
|
561
|
-
expected_output_cols_list=
|
545
|
+
expected_output_cols_list=(
|
546
|
+
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
547
|
+
),
|
562
548
|
)
|
563
549
|
self._sklearn_object = fitted_estimator
|
564
550
|
self._is_fitted = True
|
565
551
|
return output_result
|
566
552
|
|
553
|
+
|
554
|
+
@available_if(original_estimator_has_callable("fit_transform")) # type: ignore[misc]
|
555
|
+
def fit_transform(self, dataset: Union[DataFrame, pd.DataFrame], output_cols_prefix: str = "fit_transform_",) -> Union[DataFrame, pd.DataFrame]:
|
556
|
+
""" Method not supported for this class.
|
557
|
+
|
567
558
|
|
568
|
-
|
569
|
-
|
570
|
-
|
559
|
+
Raises:
|
560
|
+
TypeError: Supported dataset types: snowpark.DataFrame, pandas.DataFrame.
|
561
|
+
|
562
|
+
Args:
|
563
|
+
dataset: Union[snowflake.snowpark.DataFrame, pandas.DataFrame]
|
564
|
+
Snowpark or Pandas DataFrame.
|
565
|
+
output_cols_prefix: Prefix for the response columns
|
571
566
|
Returns:
|
572
567
|
Transformed dataset.
|
573
568
|
"""
|
574
|
-
self.
|
575
|
-
|
576
|
-
|
569
|
+
self._infer_input_output_cols(dataset)
|
570
|
+
super()._check_dataset_type(dataset)
|
571
|
+
model_trainer = ModelTrainerBuilder.build_fit_transform(
|
572
|
+
estimator=self._sklearn_object,
|
573
|
+
dataset=dataset,
|
574
|
+
input_cols=self.input_cols,
|
575
|
+
label_cols=self.label_cols,
|
576
|
+
sample_weight_col=self.sample_weight_col,
|
577
|
+
autogenerated=self._autogenerated,
|
578
|
+
subproject=_SUBPROJECT,
|
579
|
+
)
|
580
|
+
output_result, fitted_estimator = model_trainer.train_fit_transform(
|
581
|
+
drop_input_cols=self._drop_input_cols,
|
582
|
+
expected_output_cols_list=self.output_cols,
|
583
|
+
)
|
584
|
+
self._sklearn_object = fitted_estimator
|
585
|
+
self._is_fitted = True
|
586
|
+
return output_result
|
587
|
+
|
588
|
+
|
589
|
+
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
590
|
+
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
591
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
592
|
+
"""
|
593
|
+
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
594
|
+
# The following condition is introduced for kneighbors methods, and not used in other methods
|
595
|
+
if output_cols:
|
596
|
+
output_cols = [
|
597
|
+
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
598
|
+
for c in output_cols
|
599
|
+
]
|
600
|
+
elif getattr(self._sklearn_object, "classes_", None) is None:
|
601
|
+
output_cols = [output_cols_prefix]
|
602
|
+
elif self._sklearn_object is not None:
|
603
|
+
classes = self._sklearn_object.classes_
|
604
|
+
if isinstance(classes, numpy.ndarray):
|
605
|
+
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
606
|
+
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
607
|
+
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
608
|
+
output_cols = []
|
609
|
+
for i, cl in enumerate(classes):
|
610
|
+
# For binary classification, there is only one output column for each class
|
611
|
+
# ndarray as the two classes are complementary.
|
612
|
+
if len(cl) == 2:
|
613
|
+
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
614
|
+
else:
|
615
|
+
output_cols.extend([
|
616
|
+
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
617
|
+
])
|
618
|
+
else:
|
619
|
+
output_cols = []
|
620
|
+
|
621
|
+
# Make sure column names are valid snowflake identifiers.
|
622
|
+
assert output_cols is not None # Make MyPy happy
|
623
|
+
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
624
|
+
|
625
|
+
return rv
|
626
|
+
|
627
|
+
def _align_expected_output_names(
|
628
|
+
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
|
629
|
+
) -> List[str]:
|
630
|
+
# in case the inferred output column names dimension is different
|
631
|
+
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
632
|
+
output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
|
633
|
+
output_df_columns = list(output_df_pd.columns)
|
634
|
+
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
635
|
+
if self.sample_weight_col:
|
636
|
+
output_df_columns_set -= set(self.sample_weight_col)
|
637
|
+
# if the dimension of inferred output column names is correct; use it
|
638
|
+
if len(expected_output_cols_list) == len(output_df_columns_set):
|
639
|
+
return expected_output_cols_list
|
640
|
+
# otherwise, use the sklearn estimator's output
|
641
|
+
else:
|
642
|
+
return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
|
577
643
|
|
578
644
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
579
645
|
@telemetry.send_api_usage_telemetry(
|
@@ -607,24 +673,26 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
607
673
|
# are specific to the type of dataset used.
|
608
674
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
609
675
|
|
676
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
677
|
+
|
610
678
|
if isinstance(dataset, DataFrame):
|
611
|
-
self.
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
679
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
680
|
+
self._deps = self._get_dependencies()
|
681
|
+
assert isinstance(
|
682
|
+
dataset._session, Session
|
683
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
616
684
|
transform_kwargs = dict(
|
617
685
|
session=dataset._session,
|
618
686
|
dependencies=self._deps,
|
619
|
-
drop_input_cols
|
687
|
+
drop_input_cols=self._drop_input_cols,
|
620
688
|
expected_output_cols_type="float",
|
621
689
|
)
|
690
|
+
expected_output_cols = self._align_expected_output_names(
|
691
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
692
|
+
)
|
622
693
|
|
623
694
|
elif isinstance(dataset, pd.DataFrame):
|
624
|
-
transform_kwargs = dict(
|
625
|
-
snowpark_input_cols = self._snowpark_cols,
|
626
|
-
drop_input_cols = self._drop_input_cols
|
627
|
-
)
|
695
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
628
696
|
|
629
697
|
transform_handlers = ModelTransformerBuilder.build(
|
630
698
|
dataset=dataset,
|
@@ -636,7 +704,7 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
636
704
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
637
705
|
inference_method=inference_method,
|
638
706
|
input_cols=self.input_cols,
|
639
|
-
expected_output_cols=
|
707
|
+
expected_output_cols=expected_output_cols,
|
640
708
|
**transform_kwargs
|
641
709
|
)
|
642
710
|
return output_df
|
@@ -668,29 +736,30 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
668
736
|
Output dataset with log probability of the sample for each class in the model.
|
669
737
|
"""
|
670
738
|
super()._check_dataset_type(dataset)
|
671
|
-
inference_method="predict_log_proba"
|
739
|
+
inference_method = "predict_log_proba"
|
740
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
672
741
|
|
673
742
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
674
743
|
# are specific to the type of dataset used.
|
675
744
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
676
745
|
|
677
746
|
if isinstance(dataset, DataFrame):
|
678
|
-
self.
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
747
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
748
|
+
self._deps = self._get_dependencies()
|
749
|
+
assert isinstance(
|
750
|
+
dataset._session, Session
|
751
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
683
752
|
transform_kwargs = dict(
|
684
753
|
session=dataset._session,
|
685
754
|
dependencies=self._deps,
|
686
|
-
drop_input_cols
|
755
|
+
drop_input_cols=self._drop_input_cols,
|
687
756
|
expected_output_cols_type="float",
|
688
757
|
)
|
758
|
+
expected_output_cols = self._align_expected_output_names(
|
759
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
760
|
+
)
|
689
761
|
elif isinstance(dataset, pd.DataFrame):
|
690
|
-
transform_kwargs = dict(
|
691
|
-
snowpark_input_cols = self._snowpark_cols,
|
692
|
-
drop_input_cols = self._drop_input_cols
|
693
|
-
)
|
762
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
694
763
|
|
695
764
|
transform_handlers = ModelTransformerBuilder.build(
|
696
765
|
dataset=dataset,
|
@@ -703,7 +772,7 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
703
772
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
704
773
|
inference_method=inference_method,
|
705
774
|
input_cols=self.input_cols,
|
706
|
-
expected_output_cols=
|
775
|
+
expected_output_cols=expected_output_cols,
|
707
776
|
**transform_kwargs
|
708
777
|
)
|
709
778
|
return output_df
|
@@ -729,30 +798,32 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
729
798
|
Output dataset with results of the decision function for the samples in input dataset.
|
730
799
|
"""
|
731
800
|
super()._check_dataset_type(dataset)
|
732
|
-
inference_method="decision_function"
|
801
|
+
inference_method = "decision_function"
|
733
802
|
|
734
803
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
735
804
|
# are specific to the type of dataset used.
|
736
805
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
737
806
|
|
807
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
808
|
+
|
738
809
|
if isinstance(dataset, DataFrame):
|
739
|
-
self.
|
740
|
-
|
741
|
-
|
742
|
-
|
743
|
-
|
810
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
811
|
+
self._deps = self._get_dependencies()
|
812
|
+
assert isinstance(
|
813
|
+
dataset._session, Session
|
814
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
744
815
|
transform_kwargs = dict(
|
745
816
|
session=dataset._session,
|
746
817
|
dependencies=self._deps,
|
747
|
-
drop_input_cols
|
818
|
+
drop_input_cols=self._drop_input_cols,
|
748
819
|
expected_output_cols_type="float",
|
749
820
|
)
|
821
|
+
expected_output_cols = self._align_expected_output_names(
|
822
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
823
|
+
)
|
750
824
|
|
751
825
|
elif isinstance(dataset, pd.DataFrame):
|
752
|
-
transform_kwargs = dict(
|
753
|
-
snowpark_input_cols = self._snowpark_cols,
|
754
|
-
drop_input_cols = self._drop_input_cols
|
755
|
-
)
|
826
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
756
827
|
|
757
828
|
transform_handlers = ModelTransformerBuilder.build(
|
758
829
|
dataset=dataset,
|
@@ -765,7 +836,7 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
765
836
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
766
837
|
inference_method=inference_method,
|
767
838
|
input_cols=self.input_cols,
|
768
|
-
expected_output_cols=
|
839
|
+
expected_output_cols=expected_output_cols,
|
769
840
|
**transform_kwargs
|
770
841
|
)
|
771
842
|
return output_df
|
@@ -794,17 +865,17 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
794
865
|
Output dataset with probability of the sample for each class in the model.
|
795
866
|
"""
|
796
867
|
super()._check_dataset_type(dataset)
|
797
|
-
inference_method="score_samples"
|
868
|
+
inference_method = "score_samples"
|
798
869
|
|
799
870
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
800
871
|
# are specific to the type of dataset used.
|
801
872
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
802
873
|
|
874
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
875
|
+
|
803
876
|
if isinstance(dataset, DataFrame):
|
804
|
-
self.
|
805
|
-
|
806
|
-
inference_method=inference_method,
|
807
|
-
)
|
877
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
878
|
+
self._deps = self._get_dependencies()
|
808
879
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
809
880
|
transform_kwargs = dict(
|
810
881
|
session=dataset._session,
|
@@ -812,6 +883,9 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
812
883
|
drop_input_cols = self._drop_input_cols,
|
813
884
|
expected_output_cols_type="float",
|
814
885
|
)
|
886
|
+
expected_output_cols = self._align_expected_output_names(
|
887
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
888
|
+
)
|
815
889
|
|
816
890
|
elif isinstance(dataset, pd.DataFrame):
|
817
891
|
transform_kwargs = dict(
|
@@ -830,7 +904,7 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
830
904
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
831
905
|
inference_method=inference_method,
|
832
906
|
input_cols=self.input_cols,
|
833
|
-
expected_output_cols=
|
907
|
+
expected_output_cols=expected_output_cols,
|
834
908
|
**transform_kwargs
|
835
909
|
)
|
836
910
|
return output_df
|
@@ -865,17 +939,15 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
865
939
|
transform_kwargs: ScoreKwargsTypedDict = dict()
|
866
940
|
|
867
941
|
if isinstance(dataset, DataFrame):
|
868
|
-
self.
|
869
|
-
|
870
|
-
inference_method="score",
|
871
|
-
)
|
942
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method="score")
|
943
|
+
self._deps = self._get_dependencies()
|
872
944
|
selected_cols = self._get_active_columns()
|
873
945
|
if len(selected_cols) > 0:
|
874
946
|
dataset = dataset.select(selected_cols)
|
875
947
|
assert isinstance(dataset._session, Session) # keep mypy happy
|
876
948
|
transform_kwargs = dict(
|
877
949
|
session=dataset._session,
|
878
|
-
dependencies=
|
950
|
+
dependencies=self._deps,
|
879
951
|
score_sproc_imports=['sklearn'],
|
880
952
|
)
|
881
953
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -940,11 +1012,8 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
940
1012
|
|
941
1013
|
if isinstance(dataset, DataFrame):
|
942
1014
|
|
943
|
-
self.
|
944
|
-
|
945
|
-
inference_method=inference_method,
|
946
|
-
|
947
|
-
)
|
1015
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
1016
|
+
self._deps = self._get_dependencies()
|
948
1017
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
949
1018
|
transform_kwargs = dict(
|
950
1019
|
session = dataset._session,
|
@@ -977,50 +1046,84 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
977
1046
|
)
|
978
1047
|
return output_df
|
979
1048
|
|
1049
|
+
|
1050
|
+
|
1051
|
+
def to_sklearn(self) -> Any:
|
1052
|
+
"""Get sklearn.calibration.CalibratedClassifierCV object.
|
1053
|
+
"""
|
1054
|
+
if self._sklearn_object is None:
|
1055
|
+
self._sklearn_object = self._create_sklearn_object()
|
1056
|
+
return self._sklearn_object
|
1057
|
+
|
1058
|
+
def to_xgboost(self) -> Any:
|
1059
|
+
raise exceptions.SnowflakeMLException(
|
1060
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1061
|
+
original_exception=AttributeError(
|
1062
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1063
|
+
"to_xgboost()",
|
1064
|
+
"to_sklearn()"
|
1065
|
+
)
|
1066
|
+
),
|
1067
|
+
)
|
1068
|
+
|
1069
|
+
def to_lightgbm(self) -> Any:
|
1070
|
+
raise exceptions.SnowflakeMLException(
|
1071
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1072
|
+
original_exception=AttributeError(
|
1073
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1074
|
+
"to_lightgbm()",
|
1075
|
+
"to_sklearn()"
|
1076
|
+
)
|
1077
|
+
),
|
1078
|
+
)
|
1079
|
+
|
1080
|
+
def _get_dependencies(self) -> List[str]:
|
1081
|
+
return self._deps
|
1082
|
+
|
980
1083
|
|
981
|
-
def
|
1084
|
+
def _generate_model_signatures(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
982
1085
|
self._model_signature_dict = dict()
|
983
1086
|
|
984
1087
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
985
1088
|
|
986
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input"))
|
1089
|
+
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
987
1090
|
outputs: List[BaseFeatureSpec] = []
|
988
1091
|
if hasattr(self, "predict"):
|
989
1092
|
# keep mypy happy
|
990
|
-
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
1093
|
+
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
991
1094
|
# For classifier, the type of predict is the same as the type of label
|
992
|
-
if self._sklearn_object._estimator_type ==
|
993
|
-
|
1095
|
+
if self._sklearn_object._estimator_type == "classifier":
|
1096
|
+
# label columns is the desired type for output
|
994
1097
|
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
995
1098
|
# rename the output columns
|
996
1099
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
997
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
998
|
-
|
999
|
-
|
1100
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1101
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1102
|
+
)
|
1000
1103
|
# For mixture models that use the density mixin, `predict` returns the argmax of the log prob.
|
1001
1104
|
# For outlier models, returns -1 for outliers and 1 for inliers.
|
1002
|
-
# Clusterer returns int64 cluster labels.
|
1105
|
+
# Clusterer returns int64 cluster labels.
|
1003
1106
|
elif self._sklearn_object._estimator_type in ["DensityEstimator", "clusterer", "outlier_detector"]:
|
1004
1107
|
outputs = [FeatureSpec(dtype=DataType.INT64, name=c) for c in self.output_cols]
|
1005
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1006
|
-
|
1007
|
-
|
1008
|
-
|
1108
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1109
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1110
|
+
)
|
1111
|
+
|
1009
1112
|
# For regressor, the type of predict is float64
|
1010
|
-
elif self._sklearn_object._estimator_type ==
|
1113
|
+
elif self._sklearn_object._estimator_type == "regressor":
|
1011
1114
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1012
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1013
|
-
|
1014
|
-
|
1015
|
-
|
1115
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1116
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1117
|
+
)
|
1118
|
+
|
1016
1119
|
for prob_func in PROB_FUNCTIONS:
|
1017
1120
|
if hasattr(self, prob_func):
|
1018
1121
|
output_cols_prefix: str = f"{prob_func}_"
|
1019
1122
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1020
1123
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1021
|
-
self._model_signature_dict[prob_func] = ModelSignature(
|
1022
|
-
|
1023
|
-
|
1124
|
+
self._model_signature_dict[prob_func] = ModelSignature(
|
1125
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1126
|
+
)
|
1024
1127
|
|
1025
1128
|
# Output signature names may still need to be renamed, since they were not created with `_infer_signature`.
|
1026
1129
|
items = list(self._model_signature_dict.items())
|
@@ -1033,10 +1136,10 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
1033
1136
|
"""Returns model signature of current class.
|
1034
1137
|
|
1035
1138
|
Raises:
|
1036
|
-
|
1139
|
+
SnowflakeMLException: If estimator is not fitted, then model signature cannot be inferred
|
1037
1140
|
|
1038
1141
|
Returns:
|
1039
|
-
Dict
|
1142
|
+
Dict with each method and its input output signature
|
1040
1143
|
"""
|
1041
1144
|
if self._model_signature_dict is None:
|
1042
1145
|
raise exceptions.SnowflakeMLException(
|
@@ -1044,35 +1147,3 @@ class CalibratedClassifierCV(BaseTransformer):
|
|
1044
1147
|
original_exception=RuntimeError("Estimator not fitted before accessing property model_signatures!"),
|
1045
1148
|
)
|
1046
1149
|
return self._model_signature_dict
|
1047
|
-
|
1048
|
-
def to_sklearn(self) -> Any:
|
1049
|
-
"""Get sklearn.calibration.CalibratedClassifierCV object.
|
1050
|
-
"""
|
1051
|
-
if self._sklearn_object is None:
|
1052
|
-
self._sklearn_object = self._create_sklearn_object()
|
1053
|
-
return self._sklearn_object
|
1054
|
-
|
1055
|
-
def to_xgboost(self) -> Any:
|
1056
|
-
raise exceptions.SnowflakeMLException(
|
1057
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1058
|
-
original_exception=AttributeError(
|
1059
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1060
|
-
"to_xgboost()",
|
1061
|
-
"to_sklearn()"
|
1062
|
-
)
|
1063
|
-
),
|
1064
|
-
)
|
1065
|
-
|
1066
|
-
def to_lightgbm(self) -> Any:
|
1067
|
-
raise exceptions.SnowflakeMLException(
|
1068
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1069
|
-
original_exception=AttributeError(
|
1070
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1071
|
-
"to_lightgbm()",
|
1072
|
-
"to_sklearn()"
|
1073
|
-
)
|
1074
|
-
),
|
1075
|
-
)
|
1076
|
-
|
1077
|
-
def _get_dependencies(self) -> List[str]:
|
1078
|
-
return self._deps
|