snowflake-ml-python 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/file_utils.py +3 -3
- snowflake/ml/_internal/human_readable_id/adjectives.txt +128 -0
- snowflake/ml/_internal/human_readable_id/animals.txt +128 -0
- snowflake/ml/_internal/human_readable_id/hrid_generator.py +40 -0
- snowflake/ml/_internal/human_readable_id/hrid_generator_base.py +135 -0
- snowflake/ml/_internal/telemetry.py +11 -2
- snowflake/ml/_internal/utils/formatting.py +1 -1
- snowflake/ml/feature_store/feature_store.py +15 -106
- snowflake/ml/fileset/sfcfs.py +4 -3
- snowflake/ml/fileset/stage_fs.py +18 -0
- snowflake/ml/model/_api.py +9 -9
- snowflake/ml/model/_client/model/model_version_impl.py +20 -15
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +3 -9
- snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +3 -5
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +7 -6
- snowflake/ml/model/_model_composer/model_composer.py +10 -8
- snowflake/ml/model/_model_composer/model_method/function_generator.py +1 -1
- snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +2 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +2 -2
- snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +1 -1
- snowflake/ml/model/_packager/model_handlers/_base.py +2 -2
- snowflake/ml/model/_packager/model_handlers/_utils.py +5 -5
- snowflake/ml/model/_packager/model_handlers/custom.py +7 -7
- snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +2 -2
- snowflake/ml/model/_packager/model_handlers/llm.py +1 -1
- snowflake/ml/model/_packager/model_handlers/mlflow.py +1 -1
- snowflake/ml/model/_packager/model_handlers/pytorch.py +13 -10
- snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +214 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +6 -6
- snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +15 -3
- snowflake/ml/model/_packager/model_handlers/tensorflow.py +8 -8
- snowflake/ml/model/_packager/model_handlers/torchscript.py +7 -7
- snowflake/ml/model/_packager/model_handlers/xgboost.py +8 -8
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_packager.py +8 -6
- snowflake/ml/model/custom_model.py +3 -1
- snowflake/ml/model/type_hints.py +13 -0
- snowflake/ml/modeling/_internal/estimator_utils.py +61 -1
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -43
- snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +4 -4
- snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +21 -17
- snowflake/ml/modeling/_internal/model_specifications.py +3 -1
- snowflake/ml/modeling/_internal/model_trainer.py +2 -2
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +547 -1
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +67 -114
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +9 -9
- snowflake/ml/modeling/_internal/transformer_protocols.py +2 -3
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +33 -61
- snowflake/ml/modeling/cluster/affinity_propagation.py +33 -61
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +33 -61
- snowflake/ml/modeling/cluster/birch.py +33 -61
- snowflake/ml/modeling/cluster/bisecting_k_means.py +33 -61
- snowflake/ml/modeling/cluster/dbscan.py +33 -61
- snowflake/ml/modeling/cluster/feature_agglomeration.py +33 -61
- snowflake/ml/modeling/cluster/k_means.py +33 -61
- snowflake/ml/modeling/cluster/mean_shift.py +33 -61
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +33 -61
- snowflake/ml/modeling/cluster/optics.py +33 -61
- snowflake/ml/modeling/cluster/spectral_biclustering.py +33 -61
- snowflake/ml/modeling/cluster/spectral_clustering.py +33 -61
- snowflake/ml/modeling/cluster/spectral_coclustering.py +33 -61
- snowflake/ml/modeling/compose/column_transformer.py +33 -61
- snowflake/ml/modeling/compose/transformed_target_regressor.py +33 -61
- snowflake/ml/modeling/covariance/elliptic_envelope.py +33 -61
- snowflake/ml/modeling/covariance/empirical_covariance.py +33 -61
- snowflake/ml/modeling/covariance/graphical_lasso.py +33 -61
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +33 -61
- snowflake/ml/modeling/covariance/ledoit_wolf.py +33 -61
- snowflake/ml/modeling/covariance/min_cov_det.py +33 -61
- snowflake/ml/modeling/covariance/oas.py +33 -61
- snowflake/ml/modeling/covariance/shrunk_covariance.py +33 -61
- snowflake/ml/modeling/decomposition/dictionary_learning.py +33 -61
- snowflake/ml/modeling/decomposition/factor_analysis.py +33 -61
- snowflake/ml/modeling/decomposition/fast_ica.py +33 -61
- snowflake/ml/modeling/decomposition/incremental_pca.py +33 -61
- snowflake/ml/modeling/decomposition/kernel_pca.py +33 -61
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +33 -61
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +33 -61
- snowflake/ml/modeling/decomposition/pca.py +33 -61
- snowflake/ml/modeling/decomposition/sparse_pca.py +33 -61
- snowflake/ml/modeling/decomposition/truncated_svd.py +33 -61
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +33 -61
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +33 -61
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/bagging_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/bagging_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/isolation_forest.py +33 -61
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/stacking_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/voting_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/voting_regressor.py +33 -61
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +33 -61
- snowflake/ml/modeling/feature_selection/select_fdr.py +33 -61
- snowflake/ml/modeling/feature_selection/select_fpr.py +33 -61
- snowflake/ml/modeling/feature_selection/select_fwe.py +33 -61
- snowflake/ml/modeling/feature_selection/select_k_best.py +33 -61
- snowflake/ml/modeling/feature_selection/select_percentile.py +33 -61
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +33 -61
- snowflake/ml/modeling/feature_selection/variance_threshold.py +33 -61
- snowflake/ml/modeling/framework/base.py +55 -5
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +33 -61
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +33 -61
- snowflake/ml/modeling/impute/iterative_imputer.py +33 -61
- snowflake/ml/modeling/impute/knn_imputer.py +33 -61
- snowflake/ml/modeling/impute/missing_indicator.py +33 -61
- snowflake/ml/modeling/impute/simple_imputer.py +4 -15
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +33 -61
- snowflake/ml/modeling/kernel_approximation/nystroem.py +33 -61
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +33 -61
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +33 -61
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +33 -61
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +33 -61
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +36 -63
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +36 -63
- snowflake/ml/modeling/linear_model/ard_regression.py +33 -61
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +33 -61
- snowflake/ml/modeling/linear_model/elastic_net.py +33 -61
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +33 -61
- snowflake/ml/modeling/linear_model/gamma_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/huber_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/lars.py +33 -61
- snowflake/ml/modeling/linear_model/lars_cv.py +33 -61
- snowflake/ml/modeling/linear_model/lasso.py +33 -61
- snowflake/ml/modeling/linear_model/lasso_cv.py +33 -61
- snowflake/ml/modeling/linear_model/lasso_lars.py +33 -61
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +33 -61
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +33 -61
- snowflake/ml/modeling/linear_model/linear_regression.py +33 -61
- snowflake/ml/modeling/linear_model/logistic_regression.py +33 -61
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +33 -61
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +33 -61
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +33 -61
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +33 -61
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +33 -61
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +33 -61
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +33 -61
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/perceptron.py +33 -61
- snowflake/ml/modeling/linear_model/poisson_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/ransac_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/ridge.py +33 -61
- snowflake/ml/modeling/linear_model/ridge_classifier.py +33 -61
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +33 -61
- snowflake/ml/modeling/linear_model/ridge_cv.py +33 -61
- snowflake/ml/modeling/linear_model/sgd_classifier.py +33 -61
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +33 -61
- snowflake/ml/modeling/linear_model/sgd_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +33 -61
- snowflake/ml/modeling/manifold/isomap.py +33 -61
- snowflake/ml/modeling/manifold/mds.py +33 -61
- snowflake/ml/modeling/manifold/spectral_embedding.py +33 -61
- snowflake/ml/modeling/manifold/tsne.py +33 -61
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +33 -61
- snowflake/ml/modeling/mixture/gaussian_mixture.py +33 -61
- snowflake/ml/modeling/model_selection/grid_search_cv.py +39 -57
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +26 -57
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +33 -61
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +33 -61
- snowflake/ml/modeling/multiclass/output_code_classifier.py +33 -61
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +33 -61
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +33 -61
- snowflake/ml/modeling/naive_bayes/complement_nb.py +33 -61
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +33 -61
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +33 -61
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +33 -61
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +33 -61
- snowflake/ml/modeling/neighbors/kernel_density.py +33 -61
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +33 -61
- snowflake/ml/modeling/neighbors/nearest_centroid.py +33 -61
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +33 -61
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +33 -61
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +33 -61
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +33 -61
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +33 -61
- snowflake/ml/modeling/neural_network/mlp_classifier.py +33 -61
- snowflake/ml/modeling/neural_network/mlp_regressor.py +33 -61
- snowflake/ml/modeling/preprocessing/polynomial_features.py +33 -61
- snowflake/ml/modeling/semi_supervised/label_propagation.py +33 -61
- snowflake/ml/modeling/semi_supervised/label_spreading.py +33 -61
- snowflake/ml/modeling/svm/linear_svc.py +33 -61
- snowflake/ml/modeling/svm/linear_svr.py +33 -61
- snowflake/ml/modeling/svm/nu_svc.py +33 -61
- snowflake/ml/modeling/svm/nu_svr.py +33 -61
- snowflake/ml/modeling/svm/svc.py +33 -61
- snowflake/ml/modeling/svm/svr.py +33 -61
- snowflake/ml/modeling/tree/decision_tree_classifier.py +33 -61
- snowflake/ml/modeling/tree/decision_tree_regressor.py +33 -61
- snowflake/ml/modeling/tree/extra_tree_classifier.py +33 -61
- snowflake/ml/modeling/tree/extra_tree_regressor.py +33 -61
- snowflake/ml/modeling/xgboost/xgb_classifier.py +33 -61
- snowflake/ml/modeling/xgboost/xgb_regressor.py +33 -61
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +33 -61
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +33 -61
- snowflake/ml/registry/_manager/model_manager.py +6 -2
- snowflake/ml/registry/model_registry.py +100 -27
- snowflake/ml/registry/registry.py +6 -2
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/METADATA +43 -7
- {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/RECORD +211 -206
- {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/top_level.txt +0 -0
@@ -324,18 +324,24 @@ class SequentialFeatureSelector(BaseTransformer):
|
|
324
324
|
self._get_model_signatures(dataset)
|
325
325
|
return self
|
326
326
|
|
327
|
-
def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
|
328
|
-
if self._drop_input_cols:
|
329
|
-
return []
|
330
|
-
else:
|
331
|
-
return list(set(dataset.columns) - set(self.output_cols))
|
332
|
-
|
333
327
|
def _batch_inference_validate_snowpark(
|
334
328
|
self,
|
335
329
|
dataset: DataFrame,
|
336
330
|
inference_method: str,
|
337
331
|
) -> List[str]:
|
338
|
-
"""Util method to run validate that batch inference can be run on a snowpark dataframe
|
332
|
+
"""Util method to run validate that batch inference can be run on a snowpark dataframe and
|
333
|
+
return the available package that exists in the snowflake anaconda channel
|
334
|
+
|
335
|
+
Args:
|
336
|
+
dataset: snowpark dataframe
|
337
|
+
inference_method: the inference method such as predict, score...
|
338
|
+
|
339
|
+
Raises:
|
340
|
+
SnowflakeMLException: If the estimator is not fitted, raise error
|
341
|
+
SnowflakeMLException: If the session is None, raise error
|
342
|
+
|
343
|
+
Returns:
|
344
|
+
A list of available package that exists in the snowflake anaconda channel
|
339
345
|
"""
|
340
346
|
if not self._is_fitted:
|
341
347
|
raise exceptions.SnowflakeMLException(
|
@@ -407,7 +413,7 @@ class SequentialFeatureSelector(BaseTransformer):
|
|
407
413
|
transform_kwargs = dict(
|
408
414
|
session = dataset._session,
|
409
415
|
dependencies = self._deps,
|
410
|
-
|
416
|
+
drop_input_cols = self._drop_input_cols,
|
411
417
|
expected_output_cols_type = expected_type_inferred,
|
412
418
|
)
|
413
419
|
|
@@ -469,16 +475,16 @@ class SequentialFeatureSelector(BaseTransformer):
|
|
469
475
|
# from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
|
470
476
|
# based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
|
471
477
|
# each row containing a list of values.
|
472
|
-
expected_dtype = "
|
478
|
+
expected_dtype = "array"
|
473
479
|
|
474
480
|
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
475
481
|
if expected_dtype == "":
|
476
|
-
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "
|
482
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
|
477
483
|
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
478
|
-
expected_dtype = "
|
479
|
-
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "
|
484
|
+
expected_dtype = "array"
|
485
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
|
480
486
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
481
|
-
expected_dtype = "
|
487
|
+
expected_dtype = "array"
|
482
488
|
else:
|
483
489
|
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
484
490
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
@@ -496,7 +502,7 @@ class SequentialFeatureSelector(BaseTransformer):
|
|
496
502
|
transform_kwargs = dict(
|
497
503
|
session = dataset._session,
|
498
504
|
dependencies = self._deps,
|
499
|
-
|
505
|
+
drop_input_cols = self._drop_input_cols,
|
500
506
|
expected_output_cols_type = expected_dtype,
|
501
507
|
)
|
502
508
|
|
@@ -547,7 +553,7 @@ class SequentialFeatureSelector(BaseTransformer):
|
|
547
553
|
subproject=_SUBPROJECT,
|
548
554
|
)
|
549
555
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
550
|
-
|
556
|
+
drop_input_cols=self._drop_input_cols,
|
551
557
|
expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
|
552
558
|
)
|
553
559
|
self._sklearn_object = fitted_estimator
|
@@ -565,44 +571,6 @@ class SequentialFeatureSelector(BaseTransformer):
|
|
565
571
|
assert self._sklearn_object is not None
|
566
572
|
return self._sklearn_object.embedding_
|
567
573
|
|
568
|
-
|
569
|
-
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
570
|
-
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
571
|
-
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
572
|
-
"""
|
573
|
-
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
574
|
-
if output_cols:
|
575
|
-
output_cols = [
|
576
|
-
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
577
|
-
for c in output_cols
|
578
|
-
]
|
579
|
-
elif getattr(self._sklearn_object, "classes_", None) is None:
|
580
|
-
output_cols = [output_cols_prefix]
|
581
|
-
elif self._sklearn_object is not None:
|
582
|
-
classes = self._sklearn_object.classes_
|
583
|
-
if isinstance(classes, numpy.ndarray):
|
584
|
-
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
585
|
-
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
586
|
-
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
587
|
-
output_cols = []
|
588
|
-
for i, cl in enumerate(classes):
|
589
|
-
# For binary classification, there is only one output column for each class
|
590
|
-
# ndarray as the two classes are complementary.
|
591
|
-
if len(cl) == 2:
|
592
|
-
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
593
|
-
else:
|
594
|
-
output_cols.extend([
|
595
|
-
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
596
|
-
])
|
597
|
-
else:
|
598
|
-
output_cols = []
|
599
|
-
|
600
|
-
# Make sure column names are valid snowflake identifiers.
|
601
|
-
assert output_cols is not None # Make MyPy happy
|
602
|
-
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
603
|
-
|
604
|
-
return rv
|
605
|
-
|
606
574
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
607
575
|
@telemetry.send_api_usage_telemetry(
|
608
576
|
project=_PROJECT,
|
@@ -642,7 +610,7 @@ class SequentialFeatureSelector(BaseTransformer):
|
|
642
610
|
transform_kwargs = dict(
|
643
611
|
session=dataset._session,
|
644
612
|
dependencies=self._deps,
|
645
|
-
|
613
|
+
drop_input_cols = self._drop_input_cols,
|
646
614
|
expected_output_cols_type="float",
|
647
615
|
)
|
648
616
|
|
@@ -707,7 +675,7 @@ class SequentialFeatureSelector(BaseTransformer):
|
|
707
675
|
transform_kwargs = dict(
|
708
676
|
session=dataset._session,
|
709
677
|
dependencies=self._deps,
|
710
|
-
|
678
|
+
drop_input_cols = self._drop_input_cols,
|
711
679
|
expected_output_cols_type="float",
|
712
680
|
)
|
713
681
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -768,7 +736,7 @@ class SequentialFeatureSelector(BaseTransformer):
|
|
768
736
|
transform_kwargs = dict(
|
769
737
|
session=dataset._session,
|
770
738
|
dependencies=self._deps,
|
771
|
-
|
739
|
+
drop_input_cols = self._drop_input_cols,
|
772
740
|
expected_output_cols_type="float",
|
773
741
|
)
|
774
742
|
|
@@ -833,7 +801,7 @@ class SequentialFeatureSelector(BaseTransformer):
|
|
833
801
|
transform_kwargs = dict(
|
834
802
|
session=dataset._session,
|
835
803
|
dependencies=self._deps,
|
836
|
-
|
804
|
+
drop_input_cols = self._drop_input_cols,
|
837
805
|
expected_output_cols_type="float",
|
838
806
|
)
|
839
807
|
|
@@ -887,13 +855,17 @@ class SequentialFeatureSelector(BaseTransformer):
|
|
887
855
|
transform_kwargs: ScoreKwargsTypedDict = dict()
|
888
856
|
|
889
857
|
if isinstance(dataset, DataFrame):
|
858
|
+
self._deps = self._batch_inference_validate_snowpark(
|
859
|
+
dataset=dataset,
|
860
|
+
inference_method="score",
|
861
|
+
)
|
890
862
|
selected_cols = self._get_active_columns()
|
891
863
|
if len(selected_cols) > 0:
|
892
864
|
dataset = dataset.select(selected_cols)
|
893
865
|
assert isinstance(dataset._session, Session) # keep mypy happy
|
894
866
|
transform_kwargs = dict(
|
895
867
|
session=dataset._session,
|
896
|
-
dependencies=["snowflake-snowpark-python"] + self.
|
868
|
+
dependencies=["snowflake-snowpark-python"] + self._deps,
|
897
869
|
score_sproc_imports=['sklearn'],
|
898
870
|
)
|
899
871
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -967,9 +939,9 @@ class SequentialFeatureSelector(BaseTransformer):
|
|
967
939
|
transform_kwargs = dict(
|
968
940
|
session = dataset._session,
|
969
941
|
dependencies = self._deps,
|
970
|
-
|
971
|
-
expected_output_cols_type
|
972
|
-
n_neighbors =
|
942
|
+
drop_input_cols = self._drop_input_cols,
|
943
|
+
expected_output_cols_type="array",
|
944
|
+
n_neighbors = n_neighbors,
|
973
945
|
return_distance = return_distance
|
974
946
|
)
|
975
947
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -257,18 +257,24 @@ class VarianceThreshold(BaseTransformer):
|
|
257
257
|
self._get_model_signatures(dataset)
|
258
258
|
return self
|
259
259
|
|
260
|
-
def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
|
261
|
-
if self._drop_input_cols:
|
262
|
-
return []
|
263
|
-
else:
|
264
|
-
return list(set(dataset.columns) - set(self.output_cols))
|
265
|
-
|
266
260
|
def _batch_inference_validate_snowpark(
|
267
261
|
self,
|
268
262
|
dataset: DataFrame,
|
269
263
|
inference_method: str,
|
270
264
|
) -> List[str]:
|
271
|
-
"""Util method to run validate that batch inference can be run on a snowpark dataframe
|
265
|
+
"""Util method to run validate that batch inference can be run on a snowpark dataframe and
|
266
|
+
return the available package that exists in the snowflake anaconda channel
|
267
|
+
|
268
|
+
Args:
|
269
|
+
dataset: snowpark dataframe
|
270
|
+
inference_method: the inference method such as predict, score...
|
271
|
+
|
272
|
+
Raises:
|
273
|
+
SnowflakeMLException: If the estimator is not fitted, raise error
|
274
|
+
SnowflakeMLException: If the session is None, raise error
|
275
|
+
|
276
|
+
Returns:
|
277
|
+
A list of available package that exists in the snowflake anaconda channel
|
272
278
|
"""
|
273
279
|
if not self._is_fitted:
|
274
280
|
raise exceptions.SnowflakeMLException(
|
@@ -340,7 +346,7 @@ class VarianceThreshold(BaseTransformer):
|
|
340
346
|
transform_kwargs = dict(
|
341
347
|
session = dataset._session,
|
342
348
|
dependencies = self._deps,
|
343
|
-
|
349
|
+
drop_input_cols = self._drop_input_cols,
|
344
350
|
expected_output_cols_type = expected_type_inferred,
|
345
351
|
)
|
346
352
|
|
@@ -402,16 +408,16 @@ class VarianceThreshold(BaseTransformer):
|
|
402
408
|
# from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
|
403
409
|
# based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
|
404
410
|
# each row containing a list of values.
|
405
|
-
expected_dtype = "
|
411
|
+
expected_dtype = "array"
|
406
412
|
|
407
413
|
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
408
414
|
if expected_dtype == "":
|
409
|
-
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "
|
415
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
|
410
416
|
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
411
|
-
expected_dtype = "
|
412
|
-
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "
|
417
|
+
expected_dtype = "array"
|
418
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
|
413
419
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
414
|
-
expected_dtype = "
|
420
|
+
expected_dtype = "array"
|
415
421
|
else:
|
416
422
|
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
417
423
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
@@ -429,7 +435,7 @@ class VarianceThreshold(BaseTransformer):
|
|
429
435
|
transform_kwargs = dict(
|
430
436
|
session = dataset._session,
|
431
437
|
dependencies = self._deps,
|
432
|
-
|
438
|
+
drop_input_cols = self._drop_input_cols,
|
433
439
|
expected_output_cols_type = expected_dtype,
|
434
440
|
)
|
435
441
|
|
@@ -480,7 +486,7 @@ class VarianceThreshold(BaseTransformer):
|
|
480
486
|
subproject=_SUBPROJECT,
|
481
487
|
)
|
482
488
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
483
|
-
|
489
|
+
drop_input_cols=self._drop_input_cols,
|
484
490
|
expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
|
485
491
|
)
|
486
492
|
self._sklearn_object = fitted_estimator
|
@@ -498,44 +504,6 @@ class VarianceThreshold(BaseTransformer):
|
|
498
504
|
assert self._sklearn_object is not None
|
499
505
|
return self._sklearn_object.embedding_
|
500
506
|
|
501
|
-
|
502
|
-
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
503
|
-
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
504
|
-
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
505
|
-
"""
|
506
|
-
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
507
|
-
if output_cols:
|
508
|
-
output_cols = [
|
509
|
-
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
510
|
-
for c in output_cols
|
511
|
-
]
|
512
|
-
elif getattr(self._sklearn_object, "classes_", None) is None:
|
513
|
-
output_cols = [output_cols_prefix]
|
514
|
-
elif self._sklearn_object is not None:
|
515
|
-
classes = self._sklearn_object.classes_
|
516
|
-
if isinstance(classes, numpy.ndarray):
|
517
|
-
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
518
|
-
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
519
|
-
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
520
|
-
output_cols = []
|
521
|
-
for i, cl in enumerate(classes):
|
522
|
-
# For binary classification, there is only one output column for each class
|
523
|
-
# ndarray as the two classes are complementary.
|
524
|
-
if len(cl) == 2:
|
525
|
-
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
526
|
-
else:
|
527
|
-
output_cols.extend([
|
528
|
-
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
529
|
-
])
|
530
|
-
else:
|
531
|
-
output_cols = []
|
532
|
-
|
533
|
-
# Make sure column names are valid snowflake identifiers.
|
534
|
-
assert output_cols is not None # Make MyPy happy
|
535
|
-
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
536
|
-
|
537
|
-
return rv
|
538
|
-
|
539
507
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
540
508
|
@telemetry.send_api_usage_telemetry(
|
541
509
|
project=_PROJECT,
|
@@ -575,7 +543,7 @@ class VarianceThreshold(BaseTransformer):
|
|
575
543
|
transform_kwargs = dict(
|
576
544
|
session=dataset._session,
|
577
545
|
dependencies=self._deps,
|
578
|
-
|
546
|
+
drop_input_cols = self._drop_input_cols,
|
579
547
|
expected_output_cols_type="float",
|
580
548
|
)
|
581
549
|
|
@@ -640,7 +608,7 @@ class VarianceThreshold(BaseTransformer):
|
|
640
608
|
transform_kwargs = dict(
|
641
609
|
session=dataset._session,
|
642
610
|
dependencies=self._deps,
|
643
|
-
|
611
|
+
drop_input_cols = self._drop_input_cols,
|
644
612
|
expected_output_cols_type="float",
|
645
613
|
)
|
646
614
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -701,7 +669,7 @@ class VarianceThreshold(BaseTransformer):
|
|
701
669
|
transform_kwargs = dict(
|
702
670
|
session=dataset._session,
|
703
671
|
dependencies=self._deps,
|
704
|
-
|
672
|
+
drop_input_cols = self._drop_input_cols,
|
705
673
|
expected_output_cols_type="float",
|
706
674
|
)
|
707
675
|
|
@@ -766,7 +734,7 @@ class VarianceThreshold(BaseTransformer):
|
|
766
734
|
transform_kwargs = dict(
|
767
735
|
session=dataset._session,
|
768
736
|
dependencies=self._deps,
|
769
|
-
|
737
|
+
drop_input_cols = self._drop_input_cols,
|
770
738
|
expected_output_cols_type="float",
|
771
739
|
)
|
772
740
|
|
@@ -820,13 +788,17 @@ class VarianceThreshold(BaseTransformer):
|
|
820
788
|
transform_kwargs: ScoreKwargsTypedDict = dict()
|
821
789
|
|
822
790
|
if isinstance(dataset, DataFrame):
|
791
|
+
self._deps = self._batch_inference_validate_snowpark(
|
792
|
+
dataset=dataset,
|
793
|
+
inference_method="score",
|
794
|
+
)
|
823
795
|
selected_cols = self._get_active_columns()
|
824
796
|
if len(selected_cols) > 0:
|
825
797
|
dataset = dataset.select(selected_cols)
|
826
798
|
assert isinstance(dataset._session, Session) # keep mypy happy
|
827
799
|
transform_kwargs = dict(
|
828
800
|
session=dataset._session,
|
829
|
-
dependencies=["snowflake-snowpark-python"] + self.
|
801
|
+
dependencies=["snowflake-snowpark-python"] + self._deps,
|
830
802
|
score_sproc_imports=['sklearn'],
|
831
803
|
)
|
832
804
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -900,9 +872,9 @@ class VarianceThreshold(BaseTransformer):
|
|
900
872
|
transform_kwargs = dict(
|
901
873
|
session = dataset._session,
|
902
874
|
dependencies = self._deps,
|
903
|
-
|
904
|
-
expected_output_cols_type
|
905
|
-
n_neighbors =
|
875
|
+
drop_input_cols = self._drop_input_cols,
|
876
|
+
expected_output_cols_type="array",
|
877
|
+
n_neighbors = n_neighbors,
|
906
878
|
return_distance = return_distance
|
907
879
|
)
|
908
880
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -51,8 +51,8 @@ class Base:
|
|
51
51
|
input_cols: Input columns.
|
52
52
|
output_cols: Output columns.
|
53
53
|
label_cols: Label column(s).
|
54
|
-
passthrough_cols: List columns not to be used or modified by the estimator/
|
55
|
-
These columns will be passed through all the estimator/
|
54
|
+
passthrough_cols: List columns not to be used or modified by the estimator/transformers.
|
55
|
+
These columns will be passed through all the estimator/transformer operations without any modifications.
|
56
56
|
"""
|
57
57
|
self.input_cols: List[str] = []
|
58
58
|
self.output_cols: List[str] = []
|
@@ -185,7 +185,10 @@ class Base:
|
|
185
185
|
error_code=error_codes.INVALID_ATTRIBUTE,
|
186
186
|
original_exception=RuntimeError(
|
187
187
|
modeling_error_messages.SIZE_MISMATCH.format(
|
188
|
-
"input_cols",
|
188
|
+
"input_cols",
|
189
|
+
len(self.input_cols),
|
190
|
+
"output_cols",
|
191
|
+
len(self.output_cols),
|
189
192
|
)
|
190
193
|
),
|
191
194
|
)
|
@@ -498,7 +501,11 @@ class BaseTransformer(BaseEstimator):
|
|
498
501
|
sample_weight_col: Optional[str] = None,
|
499
502
|
) -> None:
|
500
503
|
"""Base class for all transformers."""
|
501
|
-
super().__init__(
|
504
|
+
super().__init__(
|
505
|
+
file_names=file_names,
|
506
|
+
custom_states=custom_states,
|
507
|
+
sample_weight_col=sample_weight_col,
|
508
|
+
)
|
502
509
|
self._sklearn_object = None
|
503
510
|
self._is_fitted = False
|
504
511
|
self._drop_input_cols = drop_input_cols
|
@@ -577,6 +584,46 @@ class BaseTransformer(BaseEstimator):
|
|
577
584
|
),
|
578
585
|
)
|
579
586
|
|
587
|
+
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
588
|
+
"""Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
589
|
+
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
590
|
+
|
591
|
+
Args:
|
592
|
+
output_cols_prefix: the prefix for output cols, such as its inference method.
|
593
|
+
output_cols: The output cols. Defaults to None. This is introduced by kneighbors methods
|
594
|
+
|
595
|
+
Returns:
|
596
|
+
inferred output column names
|
597
|
+
"""
|
598
|
+
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
599
|
+
if output_cols:
|
600
|
+
output_cols = [
|
601
|
+
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)]) for c in output_cols
|
602
|
+
]
|
603
|
+
elif getattr(self._sklearn_object, "classes_", None) is None:
|
604
|
+
output_cols = [output_cols_prefix]
|
605
|
+
elif self._sklearn_object is not None:
|
606
|
+
classes = self._sklearn_object.classes_
|
607
|
+
if isinstance(classes, np.ndarray):
|
608
|
+
output_cols = [f"{output_cols_prefix}{str(c)}" for c in classes.tolist()]
|
609
|
+
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], np.ndarray):
|
610
|
+
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
611
|
+
output_cols = []
|
612
|
+
for i, cl in enumerate(classes):
|
613
|
+
# For binary classification, there is only one output column for each class
|
614
|
+
# ndarray as the two classes are complementary.
|
615
|
+
if len(cl) == 2:
|
616
|
+
output_cols.append(f"{output_cols_prefix}{i}_{cl[0]}")
|
617
|
+
else:
|
618
|
+
output_cols.extend([f"{output_cols_prefix}{i}_{c}" for c in cl.tolist()])
|
619
|
+
else:
|
620
|
+
output_cols = []
|
621
|
+
|
622
|
+
# Make sure column names are valid snowflake identifiers.
|
623
|
+
assert output_cols is not None # Make MyPy happy
|
624
|
+
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
625
|
+
return rv
|
626
|
+
|
580
627
|
def set_drop_input_cols(self, drop_input_cols: Optional[bool] = False) -> None:
|
581
628
|
self._drop_input_cols = drop_input_cols
|
582
629
|
|
@@ -665,7 +712,10 @@ class BaseTransformer(BaseEstimator):
|
|
665
712
|
error_code=error_codes.INVALID_ATTRIBUTE,
|
666
713
|
original_exception=RuntimeError(
|
667
714
|
modeling_error_messages.SIZE_MISMATCH.format(
|
668
|
-
"output_cols",
|
715
|
+
"output_cols",
|
716
|
+
len(self.output_cols),
|
717
|
+
"transformed array shape",
|
718
|
+
shape,
|
669
719
|
)
|
670
720
|
),
|
671
721
|
)
|
@@ -352,18 +352,24 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
352
352
|
self._get_model_signatures(dataset)
|
353
353
|
return self
|
354
354
|
|
355
|
-
def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
|
356
|
-
if self._drop_input_cols:
|
357
|
-
return []
|
358
|
-
else:
|
359
|
-
return list(set(dataset.columns) - set(self.output_cols))
|
360
|
-
|
361
355
|
def _batch_inference_validate_snowpark(
|
362
356
|
self,
|
363
357
|
dataset: DataFrame,
|
364
358
|
inference_method: str,
|
365
359
|
) -> List[str]:
|
366
|
-
"""Util method to run validate that batch inference can be run on a snowpark dataframe
|
360
|
+
"""Util method to run validate that batch inference can be run on a snowpark dataframe and
|
361
|
+
return the available package that exists in the snowflake anaconda channel
|
362
|
+
|
363
|
+
Args:
|
364
|
+
dataset: snowpark dataframe
|
365
|
+
inference_method: the inference method such as predict, score...
|
366
|
+
|
367
|
+
Raises:
|
368
|
+
SnowflakeMLException: If the estimator is not fitted, raise error
|
369
|
+
SnowflakeMLException: If the session is None, raise error
|
370
|
+
|
371
|
+
Returns:
|
372
|
+
A list of available package that exists in the snowflake anaconda channel
|
367
373
|
"""
|
368
374
|
if not self._is_fitted:
|
369
375
|
raise exceptions.SnowflakeMLException(
|
@@ -437,7 +443,7 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
437
443
|
transform_kwargs = dict(
|
438
444
|
session = dataset._session,
|
439
445
|
dependencies = self._deps,
|
440
|
-
|
446
|
+
drop_input_cols = self._drop_input_cols,
|
441
447
|
expected_output_cols_type = expected_type_inferred,
|
442
448
|
)
|
443
449
|
|
@@ -497,16 +503,16 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
497
503
|
# from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
|
498
504
|
# based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
|
499
505
|
# each row containing a list of values.
|
500
|
-
expected_dtype = "
|
506
|
+
expected_dtype = "array"
|
501
507
|
|
502
508
|
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
503
509
|
if expected_dtype == "":
|
504
|
-
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "
|
510
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
|
505
511
|
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
506
|
-
expected_dtype = "
|
507
|
-
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "
|
512
|
+
expected_dtype = "array"
|
513
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
|
508
514
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
509
|
-
expected_dtype = "
|
515
|
+
expected_dtype = "array"
|
510
516
|
else:
|
511
517
|
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
512
518
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
@@ -524,7 +530,7 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
524
530
|
transform_kwargs = dict(
|
525
531
|
session = dataset._session,
|
526
532
|
dependencies = self._deps,
|
527
|
-
|
533
|
+
drop_input_cols = self._drop_input_cols,
|
528
534
|
expected_output_cols_type = expected_dtype,
|
529
535
|
)
|
530
536
|
|
@@ -575,7 +581,7 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
575
581
|
subproject=_SUBPROJECT,
|
576
582
|
)
|
577
583
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
578
|
-
|
584
|
+
drop_input_cols=self._drop_input_cols,
|
579
585
|
expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
|
580
586
|
)
|
581
587
|
self._sklearn_object = fitted_estimator
|
@@ -593,44 +599,6 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
593
599
|
assert self._sklearn_object is not None
|
594
600
|
return self._sklearn_object.embedding_
|
595
601
|
|
596
|
-
|
597
|
-
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
598
|
-
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
599
|
-
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
600
|
-
"""
|
601
|
-
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
602
|
-
if output_cols:
|
603
|
-
output_cols = [
|
604
|
-
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
605
|
-
for c in output_cols
|
606
|
-
]
|
607
|
-
elif getattr(self._sklearn_object, "classes_", None) is None:
|
608
|
-
output_cols = [output_cols_prefix]
|
609
|
-
elif self._sklearn_object is not None:
|
610
|
-
classes = self._sklearn_object.classes_
|
611
|
-
if isinstance(classes, numpy.ndarray):
|
612
|
-
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
613
|
-
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
614
|
-
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
615
|
-
output_cols = []
|
616
|
-
for i, cl in enumerate(classes):
|
617
|
-
# For binary classification, there is only one output column for each class
|
618
|
-
# ndarray as the two classes are complementary.
|
619
|
-
if len(cl) == 2:
|
620
|
-
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
621
|
-
else:
|
622
|
-
output_cols.extend([
|
623
|
-
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
624
|
-
])
|
625
|
-
else:
|
626
|
-
output_cols = []
|
627
|
-
|
628
|
-
# Make sure column names are valid snowflake identifiers.
|
629
|
-
assert output_cols is not None # Make MyPy happy
|
630
|
-
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
631
|
-
|
632
|
-
return rv
|
633
|
-
|
634
602
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
635
603
|
@telemetry.send_api_usage_telemetry(
|
636
604
|
project=_PROJECT,
|
@@ -672,7 +640,7 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
672
640
|
transform_kwargs = dict(
|
673
641
|
session=dataset._session,
|
674
642
|
dependencies=self._deps,
|
675
|
-
|
643
|
+
drop_input_cols = self._drop_input_cols,
|
676
644
|
expected_output_cols_type="float",
|
677
645
|
)
|
678
646
|
|
@@ -739,7 +707,7 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
739
707
|
transform_kwargs = dict(
|
740
708
|
session=dataset._session,
|
741
709
|
dependencies=self._deps,
|
742
|
-
|
710
|
+
drop_input_cols = self._drop_input_cols,
|
743
711
|
expected_output_cols_type="float",
|
744
712
|
)
|
745
713
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -800,7 +768,7 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
800
768
|
transform_kwargs = dict(
|
801
769
|
session=dataset._session,
|
802
770
|
dependencies=self._deps,
|
803
|
-
|
771
|
+
drop_input_cols = self._drop_input_cols,
|
804
772
|
expected_output_cols_type="float",
|
805
773
|
)
|
806
774
|
|
@@ -865,7 +833,7 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
865
833
|
transform_kwargs = dict(
|
866
834
|
session=dataset._session,
|
867
835
|
dependencies=self._deps,
|
868
|
-
|
836
|
+
drop_input_cols = self._drop_input_cols,
|
869
837
|
expected_output_cols_type="float",
|
870
838
|
)
|
871
839
|
|
@@ -921,13 +889,17 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
921
889
|
transform_kwargs: ScoreKwargsTypedDict = dict()
|
922
890
|
|
923
891
|
if isinstance(dataset, DataFrame):
|
892
|
+
self._deps = self._batch_inference_validate_snowpark(
|
893
|
+
dataset=dataset,
|
894
|
+
inference_method="score",
|
895
|
+
)
|
924
896
|
selected_cols = self._get_active_columns()
|
925
897
|
if len(selected_cols) > 0:
|
926
898
|
dataset = dataset.select(selected_cols)
|
927
899
|
assert isinstance(dataset._session, Session) # keep mypy happy
|
928
900
|
transform_kwargs = dict(
|
929
901
|
session=dataset._session,
|
930
|
-
dependencies=["snowflake-snowpark-python"] + self.
|
902
|
+
dependencies=["snowflake-snowpark-python"] + self._deps,
|
931
903
|
score_sproc_imports=['sklearn'],
|
932
904
|
)
|
933
905
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -1001,9 +973,9 @@ class GaussianProcessClassifier(BaseTransformer):
|
|
1001
973
|
transform_kwargs = dict(
|
1002
974
|
session = dataset._session,
|
1003
975
|
dependencies = self._deps,
|
1004
|
-
|
1005
|
-
expected_output_cols_type
|
1006
|
-
n_neighbors =
|
976
|
+
drop_input_cols = self._drop_input_cols,
|
977
|
+
expected_output_cols_type="array",
|
978
|
+
n_neighbors = n_neighbors,
|
1007
979
|
return_distance = return_distance
|
1008
980
|
)
|
1009
981
|
elif isinstance(dataset, pd.DataFrame):
|