snowflake-ml-python 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/env_utils.py +77 -32
- snowflake/ml/_internal/exceptions/dataset_error_messages.py +5 -0
- snowflake/ml/_internal/exceptions/dataset_errors.py +24 -0
- snowflake/ml/_internal/exceptions/error_codes.py +3 -0
- snowflake/ml/_internal/lineage/data_source.py +10 -0
- snowflake/ml/_internal/lineage/dataset_dataframe.py +44 -0
- snowflake/ml/_internal/utils/identifier.py +3 -1
- snowflake/ml/_internal/utils/sql_identifier.py +2 -6
- snowflake/ml/dataset/__init__.py +10 -0
- snowflake/ml/dataset/dataset.py +454 -129
- snowflake/ml/dataset/dataset_factory.py +53 -0
- snowflake/ml/dataset/dataset_metadata.py +103 -0
- snowflake/ml/dataset/dataset_reader.py +202 -0
- snowflake/ml/feature_store/feature_store.py +531 -332
- snowflake/ml/feature_store/feature_view.py +40 -23
- snowflake/ml/fileset/embedded_stage_fs.py +146 -0
- snowflake/ml/fileset/sfcfs.py +56 -54
- snowflake/ml/fileset/snowfs.py +159 -0
- snowflake/ml/fileset/stage_fs.py +49 -17
- snowflake/ml/model/__init__.py +2 -2
- snowflake/ml/model/_api.py +16 -1
- snowflake/ml/model/_client/model/model_impl.py +27 -0
- snowflake/ml/model/_client/model/model_version_impl.py +137 -50
- snowflake/ml/model/_client/ops/model_ops.py +159 -40
- snowflake/ml/model/_client/sql/model.py +25 -2
- snowflake/ml/model/_client/sql/model_version.py +131 -2
- snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +5 -1
- snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +1 -0
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +2 -0
- snowflake/ml/model/_deploy_client/utils/constants.py +0 -5
- snowflake/ml/model/_deploy_client/utils/snowservice_client.py +21 -50
- snowflake/ml/model/_model_composer/model_composer.py +22 -1
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +38 -51
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +19 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +6 -10
- snowflake/ml/model/_packager/model_env/model_env.py +41 -0
- snowflake/ml/model/_packager/model_handlers/catboost.py +206 -0
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +218 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +3 -0
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_meta.py +37 -11
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +20 -1
- snowflake/ml/model/_packager/model_meta_migrator/migrator_plans.py +3 -1
- snowflake/ml/model/_packager/model_packager.py +2 -5
- snowflake/ml/model/{_model_composer/model_runtime/_runtime_requirements.py → _packager/model_runtime/_snowml_inference_alternative_requirements.py} +1 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +137 -0
- snowflake/ml/model/type_hints.py +21 -2
- snowflake/ml/modeling/_internal/estimator_utils.py +16 -11
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -1
- snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +55 -3
- snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +34 -18
- snowflake/ml/modeling/_internal/model_trainer.py +7 -0
- snowflake/ml/modeling/_internal/model_trainer_builder.py +42 -9
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +13 -14
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +29 -7
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +261 -16
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +246 -175
- snowflake/ml/modeling/cluster/affinity_propagation.py +246 -175
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +246 -175
- snowflake/ml/modeling/cluster/birch.py +248 -175
- snowflake/ml/modeling/cluster/bisecting_k_means.py +248 -175
- snowflake/ml/modeling/cluster/dbscan.py +246 -175
- snowflake/ml/modeling/cluster/feature_agglomeration.py +248 -175
- snowflake/ml/modeling/cluster/k_means.py +248 -175
- snowflake/ml/modeling/cluster/mean_shift.py +246 -175
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +248 -175
- snowflake/ml/modeling/cluster/optics.py +246 -175
- snowflake/ml/modeling/cluster/spectral_biclustering.py +246 -175
- snowflake/ml/modeling/cluster/spectral_clustering.py +246 -175
- snowflake/ml/modeling/cluster/spectral_coclustering.py +246 -175
- snowflake/ml/modeling/compose/column_transformer.py +248 -175
- snowflake/ml/modeling/compose/transformed_target_regressor.py +246 -175
- snowflake/ml/modeling/covariance/elliptic_envelope.py +246 -175
- snowflake/ml/modeling/covariance/empirical_covariance.py +246 -175
- snowflake/ml/modeling/covariance/graphical_lasso.py +246 -175
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +246 -175
- snowflake/ml/modeling/covariance/ledoit_wolf.py +246 -175
- snowflake/ml/modeling/covariance/min_cov_det.py +246 -175
- snowflake/ml/modeling/covariance/oas.py +246 -175
- snowflake/ml/modeling/covariance/shrunk_covariance.py +246 -175
- snowflake/ml/modeling/decomposition/dictionary_learning.py +248 -175
- snowflake/ml/modeling/decomposition/factor_analysis.py +248 -175
- snowflake/ml/modeling/decomposition/fast_ica.py +248 -175
- snowflake/ml/modeling/decomposition/incremental_pca.py +248 -175
- snowflake/ml/modeling/decomposition/kernel_pca.py +248 -175
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +248 -175
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +248 -175
- snowflake/ml/modeling/decomposition/pca.py +248 -175
- snowflake/ml/modeling/decomposition/sparse_pca.py +248 -175
- snowflake/ml/modeling/decomposition/truncated_svd.py +248 -175
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +248 -175
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +246 -175
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/bagging_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/bagging_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/isolation_forest.py +246 -175
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +246 -175
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +246 -175
- snowflake/ml/modeling/ensemble/stacking_regressor.py +248 -175
- snowflake/ml/modeling/ensemble/voting_classifier.py +248 -175
- snowflake/ml/modeling/ensemble/voting_regressor.py +248 -175
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fdr.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fpr.py +248 -175
- snowflake/ml/modeling/feature_selection/select_fwe.py +248 -175
- snowflake/ml/modeling/feature_selection/select_k_best.py +248 -175
- snowflake/ml/modeling/feature_selection/select_percentile.py +248 -175
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +248 -175
- snowflake/ml/modeling/feature_selection/variance_threshold.py +248 -175
- snowflake/ml/modeling/framework/_utils.py +8 -1
- snowflake/ml/modeling/framework/base.py +72 -37
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +246 -175
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +246 -175
- snowflake/ml/modeling/impute/iterative_imputer.py +248 -175
- snowflake/ml/modeling/impute/knn_imputer.py +248 -175
- snowflake/ml/modeling/impute/missing_indicator.py +248 -175
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +248 -175
- snowflake/ml/modeling/kernel_approximation/nystroem.py +248 -175
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +248 -175
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +248 -175
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +248 -175
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +246 -175
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +246 -175
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ard_regression.py +246 -175
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +246 -175
- snowflake/ml/modeling/linear_model/elastic_net.py +246 -175
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +246 -175
- snowflake/ml/modeling/linear_model/gamma_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/huber_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/lars.py +246 -175
- snowflake/ml/modeling/linear_model/lars_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +246 -175
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +246 -175
- snowflake/ml/modeling/linear_model/linear_regression.py +246 -175
- snowflake/ml/modeling/linear_model/logistic_regression.py +246 -175
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +246 -175
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +246 -175
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +246 -175
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/perceptron.py +246 -175
- snowflake/ml/modeling/linear_model/poisson_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ransac_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/ridge.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +246 -175
- snowflake/ml/modeling/linear_model/ridge_cv.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_classifier.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +246 -175
- snowflake/ml/modeling/linear_model/sgd_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +246 -175
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +246 -175
- snowflake/ml/modeling/manifold/isomap.py +248 -175
- snowflake/ml/modeling/manifold/mds.py +248 -175
- snowflake/ml/modeling/manifold/spectral_embedding.py +248 -175
- snowflake/ml/modeling/manifold/tsne.py +248 -175
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +246 -175
- snowflake/ml/modeling/mixture/gaussian_mixture.py +246 -175
- snowflake/ml/modeling/model_selection/grid_search_cv.py +63 -41
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +80 -38
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +246 -175
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +246 -175
- snowflake/ml/modeling/multiclass/output_code_classifier.py +246 -175
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/complement_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +246 -175
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +246 -175
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +246 -175
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +246 -175
- snowflake/ml/modeling/neighbors/kernel_density.py +246 -175
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +246 -175
- snowflake/ml/modeling/neighbors/nearest_centroid.py +246 -175
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +246 -175
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +248 -175
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +246 -175
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +246 -175
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +248 -175
- snowflake/ml/modeling/neural_network/mlp_classifier.py +246 -175
- snowflake/ml/modeling/neural_network/mlp_regressor.py +246 -175
- snowflake/ml/modeling/pipeline/pipeline.py +517 -35
- snowflake/ml/modeling/preprocessing/binarizer.py +1 -5
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +1 -5
- snowflake/ml/modeling/preprocessing/label_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +10 -12
- snowflake/ml/modeling/preprocessing/normalizer.py +1 -5
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +13 -5
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +1 -5
- snowflake/ml/modeling/preprocessing/polynomial_features.py +248 -175
- snowflake/ml/modeling/preprocessing/robust_scaler.py +1 -5
- snowflake/ml/modeling/preprocessing/standard_scaler.py +11 -11
- snowflake/ml/modeling/semi_supervised/label_propagation.py +246 -175
- snowflake/ml/modeling/semi_supervised/label_spreading.py +246 -175
- snowflake/ml/modeling/svm/linear_svc.py +246 -175
- snowflake/ml/modeling/svm/linear_svr.py +246 -175
- snowflake/ml/modeling/svm/nu_svc.py +246 -175
- snowflake/ml/modeling/svm/nu_svr.py +246 -175
- snowflake/ml/modeling/svm/svc.py +246 -175
- snowflake/ml/modeling/svm/svr.py +246 -175
- snowflake/ml/modeling/tree/decision_tree_classifier.py +246 -175
- snowflake/ml/modeling/tree/decision_tree_regressor.py +246 -175
- snowflake/ml/modeling/tree/extra_tree_classifier.py +246 -175
- snowflake/ml/modeling/tree/extra_tree_regressor.py +246 -175
- snowflake/ml/modeling/xgboost/xgb_classifier.py +246 -175
- snowflake/ml/modeling/xgboost/xgb_regressor.py +246 -175
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +246 -175
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +246 -175
- snowflake/ml/registry/model_registry.py +3 -149
- snowflake/ml/registry/registry.py +1 -1
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/METADATA +129 -57
- snowflake_ml_python-1.5.0.dist-info/RECORD +380 -0
- snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -97
- snowflake/ml/registry/_artifact_manager.py +0 -156
- snowflake/ml/registry/artifact.py +0 -46
- snowflake_ml_python-1.4.0.dist-info/RECORD +0 -370
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.5.0.dist-info}/top_level.txt +0 -0
@@ -33,6 +33,15 @@ from snowflake.ml.modeling._internal.transformer_protocols import (
|
|
33
33
|
BatchInferenceKwargsTypedDict,
|
34
34
|
ScoreKwargsTypedDict
|
35
35
|
)
|
36
|
+
from snowflake.ml.model._signatures import utils as model_signature_utils
|
37
|
+
from snowflake.ml.model.model_signature import (
|
38
|
+
BaseFeatureSpec,
|
39
|
+
DataType,
|
40
|
+
FeatureSpec,
|
41
|
+
ModelSignature,
|
42
|
+
_infer_signature,
|
43
|
+
_rename_signature_with_snowflake_identifiers,
|
44
|
+
)
|
36
45
|
|
37
46
|
from snowflake.ml.modeling._internal.model_transformer_builder import ModelTransformerBuilder
|
38
47
|
|
@@ -43,16 +52,6 @@ from snowflake.ml.modeling._internal.estimator_utils import (
|
|
43
52
|
validate_sklearn_args,
|
44
53
|
)
|
45
54
|
|
46
|
-
from snowflake.ml.model.model_signature import (
|
47
|
-
DataType,
|
48
|
-
FeatureSpec,
|
49
|
-
ModelSignature,
|
50
|
-
_infer_signature,
|
51
|
-
_rename_signature_with_snowflake_identifiers,
|
52
|
-
BaseFeatureSpec,
|
53
|
-
)
|
54
|
-
from snowflake.ml.model._signatures import utils as model_signature_utils
|
55
|
-
|
56
55
|
_PROJECT = "ModelDevelopment"
|
57
56
|
# Derive subproject from module name by removing "sklearn"
|
58
57
|
# and converting module name from underscore to CamelCase
|
@@ -61,12 +60,6 @@ _SUBPROJECT = "".join([s.capitalize() for s in "sklearn.neighbors".replace("skle
|
|
61
60
|
|
62
61
|
DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
|
63
62
|
|
64
|
-
def _is_fit_transform_method_enabled() -> Callable[[Any], bool]:
|
65
|
-
def check(self: BaseTransformer) -> TypeGuard[Callable[..., object]]:
|
66
|
-
return False and callable(getattr(self._sklearn_object, "fit_transform", None))
|
67
|
-
return check
|
68
|
-
|
69
|
-
|
70
63
|
class RadiusNeighborsClassifier(BaseTransformer):
|
71
64
|
r"""Classifier implementing a vote among neighbors within a given radius
|
72
65
|
For more details on this class, see [sklearn.neighbors.RadiusNeighborsClassifier]
|
@@ -285,12 +278,7 @@ class RadiusNeighborsClassifier(BaseTransformer):
|
|
285
278
|
)
|
286
279
|
return selected_cols
|
287
280
|
|
288
|
-
|
289
|
-
project=_PROJECT,
|
290
|
-
subproject=_SUBPROJECT,
|
291
|
-
custom_tags=dict([("autogen", True)]),
|
292
|
-
)
|
293
|
-
def fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "RadiusNeighborsClassifier":
|
281
|
+
def _fit(self, dataset: Union[DataFrame, pd.DataFrame]) -> "RadiusNeighborsClassifier":
|
294
282
|
"""Fit the radius neighbors classifier from the training dataset
|
295
283
|
For more details on this function, see [sklearn.neighbors.RadiusNeighborsClassifier.fit]
|
296
284
|
(https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.RadiusNeighborsClassifier.html#sklearn.neighbors.RadiusNeighborsClassifier.fit)
|
@@ -317,12 +305,14 @@ class RadiusNeighborsClassifier(BaseTransformer):
|
|
317
305
|
|
318
306
|
self._snowpark_cols = dataset.select(self.input_cols).columns
|
319
307
|
|
320
|
-
|
308
|
+
# If we are already in a stored procedure, no need to kick off another one.
|
321
309
|
if SNOWML_SPROC_ENV in os.environ:
|
322
310
|
statement_params = telemetry.get_function_usage_statement_params(
|
323
311
|
project=_PROJECT,
|
324
312
|
subproject=_SUBPROJECT,
|
325
|
-
function_name=telemetry.get_statement_params_full_func_name(
|
313
|
+
function_name=telemetry.get_statement_params_full_func_name(
|
314
|
+
inspect.currentframe(), RadiusNeighborsClassifier.__class__.__name__
|
315
|
+
),
|
326
316
|
api_calls=[Session.call],
|
327
317
|
custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
|
328
318
|
)
|
@@ -343,27 +333,24 @@ class RadiusNeighborsClassifier(BaseTransformer):
|
|
343
333
|
)
|
344
334
|
self._sklearn_object = model_trainer.train()
|
345
335
|
self._is_fitted = True
|
346
|
-
self.
|
336
|
+
self._generate_model_signatures(dataset)
|
347
337
|
return self
|
348
338
|
|
349
339
|
def _batch_inference_validate_snowpark(
|
350
340
|
self,
|
351
341
|
dataset: DataFrame,
|
352
342
|
inference_method: str,
|
353
|
-
) ->
|
354
|
-
"""Util method to run validate that batch inference can be run on a snowpark dataframe
|
355
|
-
return the available package that exists in the snowflake anaconda channel
|
343
|
+
) -> None:
|
344
|
+
"""Util method to run validate that batch inference can be run on a snowpark dataframe.
|
356
345
|
|
357
346
|
Args:
|
358
347
|
dataset: snowpark dataframe
|
359
348
|
inference_method: the inference method such as predict, score...
|
360
|
-
|
349
|
+
|
361
350
|
Raises:
|
362
351
|
SnowflakeMLException: If the estimator is not fitted, raise error
|
363
352
|
SnowflakeMLException: If the session is None, raise error
|
364
353
|
|
365
|
-
Returns:
|
366
|
-
A list of available package that exists in the snowflake anaconda channel
|
367
354
|
"""
|
368
355
|
if not self._is_fitted:
|
369
356
|
raise exceptions.SnowflakeMLException(
|
@@ -381,9 +368,7 @@ class RadiusNeighborsClassifier(BaseTransformer):
|
|
381
368
|
"Session must not specified for snowpark dataset."
|
382
369
|
),
|
383
370
|
)
|
384
|
-
|
385
|
-
return pkg_version_utils.get_valid_pkg_versions_supported_in_snowflake_conda_channel(
|
386
|
-
pkg_versions=self._get_dependencies(), session=session, subproject=_SUBPROJECT)
|
371
|
+
|
387
372
|
|
388
373
|
@available_if(original_estimator_has_callable("predict")) # type: ignore[misc]
|
389
374
|
@telemetry.send_api_usage_telemetry(
|
@@ -419,7 +404,9 @@ class RadiusNeighborsClassifier(BaseTransformer):
|
|
419
404
|
# when it is classifier, infer the datatype from label columns
|
420
405
|
if expected_type_inferred == "" and 'predict' in self.model_signatures:
|
421
406
|
# Batch inference takes a single expected output column type. Use the first columns type for now.
|
422
|
-
label_cols_signatures = [
|
407
|
+
label_cols_signatures = [
|
408
|
+
row for row in self.model_signatures['predict'].outputs if row.name in self.output_cols
|
409
|
+
]
|
423
410
|
if len(label_cols_signatures) == 0:
|
424
411
|
error_str = f"Output columns {self.output_cols} do not match model signatures {self.model_signatures['predict'].outputs}."
|
425
412
|
raise exceptions.SnowflakeMLException(
|
@@ -427,25 +414,23 @@ class RadiusNeighborsClassifier(BaseTransformer):
|
|
427
414
|
original_exception=ValueError(error_str),
|
428
415
|
)
|
429
416
|
|
430
|
-
expected_type_inferred = convert_sp_to_sf_type(
|
431
|
-
label_cols_signatures[0].as_snowpark_type()
|
432
|
-
)
|
417
|
+
expected_type_inferred = convert_sp_to_sf_type(label_cols_signatures[0].as_snowpark_type())
|
433
418
|
|
434
|
-
self.
|
435
|
-
|
419
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
420
|
+
self._deps = self._get_dependencies()
|
421
|
+
assert isinstance(
|
422
|
+
dataset._session, Session
|
423
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
436
424
|
|
437
425
|
transform_kwargs = dict(
|
438
|
-
session
|
439
|
-
dependencies
|
440
|
-
drop_input_cols
|
441
|
-
expected_output_cols_type
|
426
|
+
session=dataset._session,
|
427
|
+
dependencies=self._deps,
|
428
|
+
drop_input_cols=self._drop_input_cols,
|
429
|
+
expected_output_cols_type=expected_type_inferred,
|
442
430
|
)
|
443
431
|
|
444
432
|
elif isinstance(dataset, pd.DataFrame):
|
445
|
-
transform_kwargs = dict(
|
446
|
-
snowpark_input_cols = self._snowpark_cols,
|
447
|
-
drop_input_cols = self._drop_input_cols
|
448
|
-
)
|
433
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
449
434
|
|
450
435
|
transform_handlers = ModelTransformerBuilder.build(
|
451
436
|
dataset=dataset,
|
@@ -485,7 +470,7 @@ class RadiusNeighborsClassifier(BaseTransformer):
|
|
485
470
|
Transformed dataset.
|
486
471
|
"""
|
487
472
|
super()._check_dataset_type(dataset)
|
488
|
-
inference_method="transform"
|
473
|
+
inference_method = "transform"
|
489
474
|
|
490
475
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
491
476
|
# are specific to the type of dataset used.
|
@@ -515,24 +500,19 @@ class RadiusNeighborsClassifier(BaseTransformer):
|
|
515
500
|
if all(x == output_types[0] for x in output_types) and len(output_types) == len(self.output_cols):
|
516
501
|
expected_dtype = convert_sp_to_sf_type(output_types[0])
|
517
502
|
|
518
|
-
self.
|
519
|
-
|
520
|
-
inference_method=inference_method,
|
521
|
-
)
|
503
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
504
|
+
self._deps = self._get_dependencies()
|
522
505
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
523
506
|
|
524
507
|
transform_kwargs = dict(
|
525
|
-
session
|
526
|
-
dependencies
|
527
|
-
drop_input_cols
|
528
|
-
expected_output_cols_type
|
508
|
+
session=dataset._session,
|
509
|
+
dependencies=self._deps,
|
510
|
+
drop_input_cols=self._drop_input_cols,
|
511
|
+
expected_output_cols_type=expected_dtype,
|
529
512
|
)
|
530
513
|
|
531
514
|
elif isinstance(dataset, pd.DataFrame):
|
532
|
-
transform_kwargs = dict(
|
533
|
-
snowpark_input_cols = self._snowpark_cols,
|
534
|
-
drop_input_cols = self._drop_input_cols
|
535
|
-
)
|
515
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
536
516
|
|
537
517
|
transform_handlers = ModelTransformerBuilder.build(
|
538
518
|
dataset=dataset,
|
@@ -551,7 +531,11 @@ class RadiusNeighborsClassifier(BaseTransformer):
|
|
551
531
|
return output_df
|
552
532
|
|
553
533
|
@available_if(original_estimator_has_callable("fit_predict")) # type: ignore[misc]
|
554
|
-
def fit_predict(
|
534
|
+
def fit_predict(
|
535
|
+
self,
|
536
|
+
dataset: Union[DataFrame, pd.DataFrame],
|
537
|
+
output_cols_prefix: str = "fit_predict_",
|
538
|
+
) -> Union[DataFrame, pd.DataFrame]:
|
555
539
|
""" Method not supported for this class.
|
556
540
|
|
557
541
|
|
@@ -576,22 +560,104 @@ class RadiusNeighborsClassifier(BaseTransformer):
|
|
576
560
|
)
|
577
561
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
578
562
|
drop_input_cols=self._drop_input_cols,
|
579
|
-
expected_output_cols_list=
|
563
|
+
expected_output_cols_list=(
|
564
|
+
self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix)
|
565
|
+
),
|
580
566
|
)
|
581
567
|
self._sklearn_object = fitted_estimator
|
582
568
|
self._is_fitted = True
|
583
569
|
return output_result
|
584
570
|
|
571
|
+
|
572
|
+
@available_if(original_estimator_has_callable("fit_transform")) # type: ignore[misc]
|
573
|
+
def fit_transform(self, dataset: Union[DataFrame, pd.DataFrame], output_cols_prefix: str = "fit_transform_",) -> Union[DataFrame, pd.DataFrame]:
|
574
|
+
""" Method not supported for this class.
|
575
|
+
|
585
576
|
|
586
|
-
|
587
|
-
|
588
|
-
|
577
|
+
Raises:
|
578
|
+
TypeError: Supported dataset types: snowpark.DataFrame, pandas.DataFrame.
|
579
|
+
|
580
|
+
Args:
|
581
|
+
dataset: Union[snowflake.snowpark.DataFrame, pandas.DataFrame]
|
582
|
+
Snowpark or Pandas DataFrame.
|
583
|
+
output_cols_prefix: Prefix for the response columns
|
589
584
|
Returns:
|
590
585
|
Transformed dataset.
|
591
586
|
"""
|
592
|
-
self.
|
593
|
-
|
594
|
-
|
587
|
+
self._infer_input_output_cols(dataset)
|
588
|
+
super()._check_dataset_type(dataset)
|
589
|
+
model_trainer = ModelTrainerBuilder.build_fit_transform(
|
590
|
+
estimator=self._sklearn_object,
|
591
|
+
dataset=dataset,
|
592
|
+
input_cols=self.input_cols,
|
593
|
+
label_cols=self.label_cols,
|
594
|
+
sample_weight_col=self.sample_weight_col,
|
595
|
+
autogenerated=self._autogenerated,
|
596
|
+
subproject=_SUBPROJECT,
|
597
|
+
)
|
598
|
+
output_result, fitted_estimator = model_trainer.train_fit_transform(
|
599
|
+
drop_input_cols=self._drop_input_cols,
|
600
|
+
expected_output_cols_list=self.output_cols,
|
601
|
+
)
|
602
|
+
self._sklearn_object = fitted_estimator
|
603
|
+
self._is_fitted = True
|
604
|
+
return output_result
|
605
|
+
|
606
|
+
|
607
|
+
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
608
|
+
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
609
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
610
|
+
"""
|
611
|
+
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
612
|
+
# The following condition is introduced for kneighbors methods, and not used in other methods
|
613
|
+
if output_cols:
|
614
|
+
output_cols = [
|
615
|
+
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
616
|
+
for c in output_cols
|
617
|
+
]
|
618
|
+
elif getattr(self._sklearn_object, "classes_", None) is None:
|
619
|
+
output_cols = [output_cols_prefix]
|
620
|
+
elif self._sklearn_object is not None:
|
621
|
+
classes = self._sklearn_object.classes_
|
622
|
+
if isinstance(classes, numpy.ndarray):
|
623
|
+
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
624
|
+
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
625
|
+
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
626
|
+
output_cols = []
|
627
|
+
for i, cl in enumerate(classes):
|
628
|
+
# For binary classification, there is only one output column for each class
|
629
|
+
# ndarray as the two classes are complementary.
|
630
|
+
if len(cl) == 2:
|
631
|
+
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
632
|
+
else:
|
633
|
+
output_cols.extend([
|
634
|
+
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
635
|
+
])
|
636
|
+
else:
|
637
|
+
output_cols = []
|
638
|
+
|
639
|
+
# Make sure column names are valid snowflake identifiers.
|
640
|
+
assert output_cols is not None # Make MyPy happy
|
641
|
+
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
642
|
+
|
643
|
+
return rv
|
644
|
+
|
645
|
+
def _align_expected_output_names(
|
646
|
+
self, method: str, dataset: DataFrame, expected_output_cols_list: List[str], output_cols_prefix: str
|
647
|
+
) -> List[str]:
|
648
|
+
# in case the inferred output column names dimension is different
|
649
|
+
# we use one line of snowpark dataframe and put it into sklearn estimator using pandas
|
650
|
+
output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
|
651
|
+
output_df_columns = list(output_df_pd.columns)
|
652
|
+
output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
|
653
|
+
if self.sample_weight_col:
|
654
|
+
output_df_columns_set -= set(self.sample_weight_col)
|
655
|
+
# if the dimension of inferred output column names is correct; use it
|
656
|
+
if len(expected_output_cols_list) == len(output_df_columns_set):
|
657
|
+
return expected_output_cols_list
|
658
|
+
# otherwise, use the sklearn estimator's output
|
659
|
+
else:
|
660
|
+
return sorted(list(output_df_columns_set), key=lambda x: output_df_columns.index(x))
|
595
661
|
|
596
662
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
597
663
|
@telemetry.send_api_usage_telemetry(
|
@@ -625,24 +691,26 @@ class RadiusNeighborsClassifier(BaseTransformer):
|
|
625
691
|
# are specific to the type of dataset used.
|
626
692
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
627
693
|
|
694
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
695
|
+
|
628
696
|
if isinstance(dataset, DataFrame):
|
629
|
-
self.
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
697
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
698
|
+
self._deps = self._get_dependencies()
|
699
|
+
assert isinstance(
|
700
|
+
dataset._session, Session
|
701
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
634
702
|
transform_kwargs = dict(
|
635
703
|
session=dataset._session,
|
636
704
|
dependencies=self._deps,
|
637
|
-
drop_input_cols
|
705
|
+
drop_input_cols=self._drop_input_cols,
|
638
706
|
expected_output_cols_type="float",
|
639
707
|
)
|
708
|
+
expected_output_cols = self._align_expected_output_names(
|
709
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
710
|
+
)
|
640
711
|
|
641
712
|
elif isinstance(dataset, pd.DataFrame):
|
642
|
-
transform_kwargs = dict(
|
643
|
-
snowpark_input_cols = self._snowpark_cols,
|
644
|
-
drop_input_cols = self._drop_input_cols
|
645
|
-
)
|
713
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
646
714
|
|
647
715
|
transform_handlers = ModelTransformerBuilder.build(
|
648
716
|
dataset=dataset,
|
@@ -654,7 +722,7 @@ class RadiusNeighborsClassifier(BaseTransformer):
|
|
654
722
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
655
723
|
inference_method=inference_method,
|
656
724
|
input_cols=self.input_cols,
|
657
|
-
expected_output_cols=
|
725
|
+
expected_output_cols=expected_output_cols,
|
658
726
|
**transform_kwargs
|
659
727
|
)
|
660
728
|
return output_df
|
@@ -686,29 +754,30 @@ class RadiusNeighborsClassifier(BaseTransformer):
|
|
686
754
|
Output dataset with log probability of the sample for each class in the model.
|
687
755
|
"""
|
688
756
|
super()._check_dataset_type(dataset)
|
689
|
-
inference_method="predict_log_proba"
|
757
|
+
inference_method = "predict_log_proba"
|
758
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
690
759
|
|
691
760
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
692
761
|
# are specific to the type of dataset used.
|
693
762
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
694
763
|
|
695
764
|
if isinstance(dataset, DataFrame):
|
696
|
-
self.
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
765
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
766
|
+
self._deps = self._get_dependencies()
|
767
|
+
assert isinstance(
|
768
|
+
dataset._session, Session
|
769
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
701
770
|
transform_kwargs = dict(
|
702
771
|
session=dataset._session,
|
703
772
|
dependencies=self._deps,
|
704
|
-
drop_input_cols
|
773
|
+
drop_input_cols=self._drop_input_cols,
|
705
774
|
expected_output_cols_type="float",
|
706
775
|
)
|
776
|
+
expected_output_cols = self._align_expected_output_names(
|
777
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
778
|
+
)
|
707
779
|
elif isinstance(dataset, pd.DataFrame):
|
708
|
-
transform_kwargs = dict(
|
709
|
-
snowpark_input_cols = self._snowpark_cols,
|
710
|
-
drop_input_cols = self._drop_input_cols
|
711
|
-
)
|
780
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
712
781
|
|
713
782
|
transform_handlers = ModelTransformerBuilder.build(
|
714
783
|
dataset=dataset,
|
@@ -721,7 +790,7 @@ class RadiusNeighborsClassifier(BaseTransformer):
|
|
721
790
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
722
791
|
inference_method=inference_method,
|
723
792
|
input_cols=self.input_cols,
|
724
|
-
expected_output_cols=
|
793
|
+
expected_output_cols=expected_output_cols,
|
725
794
|
**transform_kwargs
|
726
795
|
)
|
727
796
|
return output_df
|
@@ -747,30 +816,32 @@ class RadiusNeighborsClassifier(BaseTransformer):
|
|
747
816
|
Output dataset with results of the decision function for the samples in input dataset.
|
748
817
|
"""
|
749
818
|
super()._check_dataset_type(dataset)
|
750
|
-
inference_method="decision_function"
|
819
|
+
inference_method = "decision_function"
|
751
820
|
|
752
821
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
753
822
|
# are specific to the type of dataset used.
|
754
823
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
755
824
|
|
825
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
826
|
+
|
756
827
|
if isinstance(dataset, DataFrame):
|
757
|
-
self.
|
758
|
-
|
759
|
-
|
760
|
-
|
761
|
-
|
828
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
829
|
+
self._deps = self._get_dependencies()
|
830
|
+
assert isinstance(
|
831
|
+
dataset._session, Session
|
832
|
+
) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
762
833
|
transform_kwargs = dict(
|
763
834
|
session=dataset._session,
|
764
835
|
dependencies=self._deps,
|
765
|
-
drop_input_cols
|
836
|
+
drop_input_cols=self._drop_input_cols,
|
766
837
|
expected_output_cols_type="float",
|
767
838
|
)
|
839
|
+
expected_output_cols = self._align_expected_output_names(
|
840
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
841
|
+
)
|
768
842
|
|
769
843
|
elif isinstance(dataset, pd.DataFrame):
|
770
|
-
transform_kwargs = dict(
|
771
|
-
snowpark_input_cols = self._snowpark_cols,
|
772
|
-
drop_input_cols = self._drop_input_cols
|
773
|
-
)
|
844
|
+
transform_kwargs = dict(snowpark_input_cols=self._snowpark_cols, drop_input_cols=self._drop_input_cols)
|
774
845
|
|
775
846
|
transform_handlers = ModelTransformerBuilder.build(
|
776
847
|
dataset=dataset,
|
@@ -783,7 +854,7 @@ class RadiusNeighborsClassifier(BaseTransformer):
|
|
783
854
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
784
855
|
inference_method=inference_method,
|
785
856
|
input_cols=self.input_cols,
|
786
|
-
expected_output_cols=
|
857
|
+
expected_output_cols=expected_output_cols,
|
787
858
|
**transform_kwargs
|
788
859
|
)
|
789
860
|
return output_df
|
@@ -812,17 +883,17 @@ class RadiusNeighborsClassifier(BaseTransformer):
|
|
812
883
|
Output dataset with probability of the sample for each class in the model.
|
813
884
|
"""
|
814
885
|
super()._check_dataset_type(dataset)
|
815
|
-
inference_method="score_samples"
|
886
|
+
inference_method = "score_samples"
|
816
887
|
|
817
888
|
# This dictionary contains optional kwargs for batch inference. These kwargs
|
818
889
|
# are specific to the type of dataset used.
|
819
890
|
transform_kwargs: BatchInferenceKwargsTypedDict = dict()
|
820
891
|
|
892
|
+
expected_output_cols = self._get_output_column_names(output_cols_prefix)
|
893
|
+
|
821
894
|
if isinstance(dataset, DataFrame):
|
822
|
-
self.
|
823
|
-
|
824
|
-
inference_method=inference_method,
|
825
|
-
)
|
895
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
896
|
+
self._deps = self._get_dependencies()
|
826
897
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
827
898
|
transform_kwargs = dict(
|
828
899
|
session=dataset._session,
|
@@ -830,6 +901,9 @@ class RadiusNeighborsClassifier(BaseTransformer):
|
|
830
901
|
drop_input_cols = self._drop_input_cols,
|
831
902
|
expected_output_cols_type="float",
|
832
903
|
)
|
904
|
+
expected_output_cols = self._align_expected_output_names(
|
905
|
+
inference_method, dataset, expected_output_cols, output_cols_prefix
|
906
|
+
)
|
833
907
|
|
834
908
|
elif isinstance(dataset, pd.DataFrame):
|
835
909
|
transform_kwargs = dict(
|
@@ -848,7 +922,7 @@ class RadiusNeighborsClassifier(BaseTransformer):
|
|
848
922
|
output_df: DATAFRAME_TYPE = transform_handlers.batch_inference(
|
849
923
|
inference_method=inference_method,
|
850
924
|
input_cols=self.input_cols,
|
851
|
-
expected_output_cols=
|
925
|
+
expected_output_cols=expected_output_cols,
|
852
926
|
**transform_kwargs
|
853
927
|
)
|
854
928
|
return output_df
|
@@ -883,17 +957,15 @@ class RadiusNeighborsClassifier(BaseTransformer):
|
|
883
957
|
transform_kwargs: ScoreKwargsTypedDict = dict()
|
884
958
|
|
885
959
|
if isinstance(dataset, DataFrame):
|
886
|
-
self.
|
887
|
-
|
888
|
-
inference_method="score",
|
889
|
-
)
|
960
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method="score")
|
961
|
+
self._deps = self._get_dependencies()
|
890
962
|
selected_cols = self._get_active_columns()
|
891
963
|
if len(selected_cols) > 0:
|
892
964
|
dataset = dataset.select(selected_cols)
|
893
965
|
assert isinstance(dataset._session, Session) # keep mypy happy
|
894
966
|
transform_kwargs = dict(
|
895
967
|
session=dataset._session,
|
896
|
-
dependencies=
|
968
|
+
dependencies=self._deps,
|
897
969
|
score_sproc_imports=['sklearn'],
|
898
970
|
)
|
899
971
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -958,11 +1030,8 @@ class RadiusNeighborsClassifier(BaseTransformer):
|
|
958
1030
|
|
959
1031
|
if isinstance(dataset, DataFrame):
|
960
1032
|
|
961
|
-
self.
|
962
|
-
|
963
|
-
inference_method=inference_method,
|
964
|
-
|
965
|
-
)
|
1033
|
+
self._batch_inference_validate_snowpark(dataset=dataset, inference_method=inference_method)
|
1034
|
+
self._deps = self._get_dependencies()
|
966
1035
|
assert isinstance(dataset._session, Session) # mypy does not recognize the check in _batch_inference_validate_snowpark()
|
967
1036
|
transform_kwargs = dict(
|
968
1037
|
session = dataset._session,
|
@@ -995,50 +1064,84 @@ class RadiusNeighborsClassifier(BaseTransformer):
|
|
995
1064
|
)
|
996
1065
|
return output_df
|
997
1066
|
|
1067
|
+
|
1068
|
+
|
1069
|
+
def to_sklearn(self) -> Any:
|
1070
|
+
"""Get sklearn.neighbors.RadiusNeighborsClassifier object.
|
1071
|
+
"""
|
1072
|
+
if self._sklearn_object is None:
|
1073
|
+
self._sklearn_object = self._create_sklearn_object()
|
1074
|
+
return self._sklearn_object
|
1075
|
+
|
1076
|
+
def to_xgboost(self) -> Any:
|
1077
|
+
raise exceptions.SnowflakeMLException(
|
1078
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1079
|
+
original_exception=AttributeError(
|
1080
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1081
|
+
"to_xgboost()",
|
1082
|
+
"to_sklearn()"
|
1083
|
+
)
|
1084
|
+
),
|
1085
|
+
)
|
1086
|
+
|
1087
|
+
def to_lightgbm(self) -> Any:
|
1088
|
+
raise exceptions.SnowflakeMLException(
|
1089
|
+
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1090
|
+
original_exception=AttributeError(
|
1091
|
+
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1092
|
+
"to_lightgbm()",
|
1093
|
+
"to_sklearn()"
|
1094
|
+
)
|
1095
|
+
),
|
1096
|
+
)
|
1097
|
+
|
1098
|
+
def _get_dependencies(self) -> List[str]:
|
1099
|
+
return self._deps
|
1100
|
+
|
998
1101
|
|
999
|
-
def
|
1102
|
+
def _generate_model_signatures(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
|
1000
1103
|
self._model_signature_dict = dict()
|
1001
1104
|
|
1002
1105
|
PROB_FUNCTIONS = ["predict_log_proba", "predict_proba", "decision_function"]
|
1003
1106
|
|
1004
|
-
inputs = list(_infer_signature(dataset[self.input_cols], "input"))
|
1107
|
+
inputs = list(_infer_signature(dataset[self.input_cols], "input", use_snowflake_identifiers=True))
|
1005
1108
|
outputs: List[BaseFeatureSpec] = []
|
1006
1109
|
if hasattr(self, "predict"):
|
1007
1110
|
# keep mypy happy
|
1008
|
-
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
1111
|
+
assert self._sklearn_object is not None and hasattr(self._sklearn_object, "_estimator_type")
|
1009
1112
|
# For classifier, the type of predict is the same as the type of label
|
1010
|
-
if self._sklearn_object._estimator_type ==
|
1011
|
-
|
1113
|
+
if self._sklearn_object._estimator_type == "classifier":
|
1114
|
+
# label columns is the desired type for output
|
1012
1115
|
outputs = list(_infer_signature(dataset[self.label_cols], "output", use_snowflake_identifiers=True))
|
1013
1116
|
# rename the output columns
|
1014
1117
|
outputs = list(model_signature_utils.rename_features(outputs, self.output_cols))
|
1015
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1016
|
-
|
1017
|
-
|
1118
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1119
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1120
|
+
)
|
1018
1121
|
# For mixture models that use the density mixin, `predict` returns the argmax of the log prob.
|
1019
1122
|
# For outlier models, returns -1 for outliers and 1 for inliers.
|
1020
|
-
# Clusterer returns int64 cluster labels.
|
1123
|
+
# Clusterer returns int64 cluster labels.
|
1021
1124
|
elif self._sklearn_object._estimator_type in ["DensityEstimator", "clusterer", "outlier_detector"]:
|
1022
1125
|
outputs = [FeatureSpec(dtype=DataType.INT64, name=c) for c in self.output_cols]
|
1023
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1024
|
-
|
1025
|
-
|
1026
|
-
|
1126
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1127
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1128
|
+
)
|
1129
|
+
|
1027
1130
|
# For regressor, the type of predict is float64
|
1028
|
-
elif self._sklearn_object._estimator_type ==
|
1131
|
+
elif self._sklearn_object._estimator_type == "regressor":
|
1029
1132
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
|
1030
|
-
self._model_signature_dict["predict"] = ModelSignature(
|
1031
|
-
|
1032
|
-
|
1033
|
-
|
1133
|
+
self._model_signature_dict["predict"] = ModelSignature(
|
1134
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1135
|
+
)
|
1136
|
+
|
1034
1137
|
for prob_func in PROB_FUNCTIONS:
|
1035
1138
|
if hasattr(self, prob_func):
|
1036
1139
|
output_cols_prefix: str = f"{prob_func}_"
|
1037
1140
|
output_column_names = self._get_output_column_names(output_cols_prefix)
|
1038
1141
|
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
|
1039
|
-
self._model_signature_dict[prob_func] = ModelSignature(
|
1040
|
-
|
1041
|
-
|
1142
|
+
self._model_signature_dict[prob_func] = ModelSignature(
|
1143
|
+
inputs, ([] if self._drop_input_cols else inputs) + outputs
|
1144
|
+
)
|
1042
1145
|
|
1043
1146
|
# Output signature names may still need to be renamed, since they were not created with `_infer_signature`.
|
1044
1147
|
items = list(self._model_signature_dict.items())
|
@@ -1051,10 +1154,10 @@ class RadiusNeighborsClassifier(BaseTransformer):
|
|
1051
1154
|
"""Returns model signature of current class.
|
1052
1155
|
|
1053
1156
|
Raises:
|
1054
|
-
|
1157
|
+
SnowflakeMLException: If estimator is not fitted, then model signature cannot be inferred
|
1055
1158
|
|
1056
1159
|
Returns:
|
1057
|
-
Dict
|
1160
|
+
Dict with each method and its input output signature
|
1058
1161
|
"""
|
1059
1162
|
if self._model_signature_dict is None:
|
1060
1163
|
raise exceptions.SnowflakeMLException(
|
@@ -1062,35 +1165,3 @@ class RadiusNeighborsClassifier(BaseTransformer):
|
|
1062
1165
|
original_exception=RuntimeError("Estimator not fitted before accessing property model_signatures!"),
|
1063
1166
|
)
|
1064
1167
|
return self._model_signature_dict
|
1065
|
-
|
1066
|
-
def to_sklearn(self) -> Any:
|
1067
|
-
"""Get sklearn.neighbors.RadiusNeighborsClassifier object.
|
1068
|
-
"""
|
1069
|
-
if self._sklearn_object is None:
|
1070
|
-
self._sklearn_object = self._create_sklearn_object()
|
1071
|
-
return self._sklearn_object
|
1072
|
-
|
1073
|
-
def to_xgboost(self) -> Any:
|
1074
|
-
raise exceptions.SnowflakeMLException(
|
1075
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1076
|
-
original_exception=AttributeError(
|
1077
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1078
|
-
"to_xgboost()",
|
1079
|
-
"to_sklearn()"
|
1080
|
-
)
|
1081
|
-
),
|
1082
|
-
)
|
1083
|
-
|
1084
|
-
def to_lightgbm(self) -> Any:
|
1085
|
-
raise exceptions.SnowflakeMLException(
|
1086
|
-
error_code=error_codes.METHOD_NOT_ALLOWED,
|
1087
|
-
original_exception=AttributeError(
|
1088
|
-
modeling_error_messages.UNSUPPORTED_MODEL_CONVERSION.format(
|
1089
|
-
"to_lightgbm()",
|
1090
|
-
"to_sklearn()"
|
1091
|
-
)
|
1092
|
-
),
|
1093
|
-
)
|
1094
|
-
|
1095
|
-
def _get_dependencies(self) -> List[str]:
|
1096
|
-
return self._deps
|