snowflake-ml-python 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/file_utils.py +3 -3
- snowflake/ml/_internal/human_readable_id/adjectives.txt +128 -0
- snowflake/ml/_internal/human_readable_id/animals.txt +128 -0
- snowflake/ml/_internal/human_readable_id/hrid_generator.py +40 -0
- snowflake/ml/_internal/human_readable_id/hrid_generator_base.py +135 -0
- snowflake/ml/_internal/telemetry.py +11 -2
- snowflake/ml/_internal/utils/formatting.py +1 -1
- snowflake/ml/feature_store/feature_store.py +15 -106
- snowflake/ml/fileset/sfcfs.py +4 -3
- snowflake/ml/fileset/stage_fs.py +18 -0
- snowflake/ml/model/_api.py +9 -9
- snowflake/ml/model/_client/model/model_version_impl.py +20 -15
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +3 -9
- snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +3 -5
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +7 -6
- snowflake/ml/model/_model_composer/model_composer.py +10 -8
- snowflake/ml/model/_model_composer/model_method/function_generator.py +1 -1
- snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +2 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +2 -2
- snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +1 -1
- snowflake/ml/model/_packager/model_handlers/_base.py +2 -2
- snowflake/ml/model/_packager/model_handlers/_utils.py +5 -5
- snowflake/ml/model/_packager/model_handlers/custom.py +7 -7
- snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +2 -2
- snowflake/ml/model/_packager/model_handlers/llm.py +1 -1
- snowflake/ml/model/_packager/model_handlers/mlflow.py +1 -1
- snowflake/ml/model/_packager/model_handlers/pytorch.py +13 -10
- snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +214 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +6 -6
- snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +15 -3
- snowflake/ml/model/_packager/model_handlers/tensorflow.py +8 -8
- snowflake/ml/model/_packager/model_handlers/torchscript.py +7 -7
- snowflake/ml/model/_packager/model_handlers/xgboost.py +8 -8
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_packager.py +8 -6
- snowflake/ml/model/custom_model.py +3 -1
- snowflake/ml/model/type_hints.py +13 -0
- snowflake/ml/modeling/_internal/estimator_utils.py +61 -1
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -43
- snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +4 -4
- snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +21 -17
- snowflake/ml/modeling/_internal/model_specifications.py +3 -1
- snowflake/ml/modeling/_internal/model_trainer.py +2 -2
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +547 -1
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +67 -114
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +9 -9
- snowflake/ml/modeling/_internal/transformer_protocols.py +2 -3
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +33 -61
- snowflake/ml/modeling/cluster/affinity_propagation.py +33 -61
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +33 -61
- snowflake/ml/modeling/cluster/birch.py +33 -61
- snowflake/ml/modeling/cluster/bisecting_k_means.py +33 -61
- snowflake/ml/modeling/cluster/dbscan.py +33 -61
- snowflake/ml/modeling/cluster/feature_agglomeration.py +33 -61
- snowflake/ml/modeling/cluster/k_means.py +33 -61
- snowflake/ml/modeling/cluster/mean_shift.py +33 -61
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +33 -61
- snowflake/ml/modeling/cluster/optics.py +33 -61
- snowflake/ml/modeling/cluster/spectral_biclustering.py +33 -61
- snowflake/ml/modeling/cluster/spectral_clustering.py +33 -61
- snowflake/ml/modeling/cluster/spectral_coclustering.py +33 -61
- snowflake/ml/modeling/compose/column_transformer.py +33 -61
- snowflake/ml/modeling/compose/transformed_target_regressor.py +33 -61
- snowflake/ml/modeling/covariance/elliptic_envelope.py +33 -61
- snowflake/ml/modeling/covariance/empirical_covariance.py +33 -61
- snowflake/ml/modeling/covariance/graphical_lasso.py +33 -61
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +33 -61
- snowflake/ml/modeling/covariance/ledoit_wolf.py +33 -61
- snowflake/ml/modeling/covariance/min_cov_det.py +33 -61
- snowflake/ml/modeling/covariance/oas.py +33 -61
- snowflake/ml/modeling/covariance/shrunk_covariance.py +33 -61
- snowflake/ml/modeling/decomposition/dictionary_learning.py +33 -61
- snowflake/ml/modeling/decomposition/factor_analysis.py +33 -61
- snowflake/ml/modeling/decomposition/fast_ica.py +33 -61
- snowflake/ml/modeling/decomposition/incremental_pca.py +33 -61
- snowflake/ml/modeling/decomposition/kernel_pca.py +33 -61
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +33 -61
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +33 -61
- snowflake/ml/modeling/decomposition/pca.py +33 -61
- snowflake/ml/modeling/decomposition/sparse_pca.py +33 -61
- snowflake/ml/modeling/decomposition/truncated_svd.py +33 -61
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +33 -61
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +33 -61
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/bagging_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/bagging_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/isolation_forest.py +33 -61
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/stacking_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/voting_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/voting_regressor.py +33 -61
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +33 -61
- snowflake/ml/modeling/feature_selection/select_fdr.py +33 -61
- snowflake/ml/modeling/feature_selection/select_fpr.py +33 -61
- snowflake/ml/modeling/feature_selection/select_fwe.py +33 -61
- snowflake/ml/modeling/feature_selection/select_k_best.py +33 -61
- snowflake/ml/modeling/feature_selection/select_percentile.py +33 -61
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +33 -61
- snowflake/ml/modeling/feature_selection/variance_threshold.py +33 -61
- snowflake/ml/modeling/framework/base.py +55 -5
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +33 -61
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +33 -61
- snowflake/ml/modeling/impute/iterative_imputer.py +33 -61
- snowflake/ml/modeling/impute/knn_imputer.py +33 -61
- snowflake/ml/modeling/impute/missing_indicator.py +33 -61
- snowflake/ml/modeling/impute/simple_imputer.py +4 -15
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +33 -61
- snowflake/ml/modeling/kernel_approximation/nystroem.py +33 -61
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +33 -61
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +33 -61
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +33 -61
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +33 -61
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +36 -63
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +36 -63
- snowflake/ml/modeling/linear_model/ard_regression.py +33 -61
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +33 -61
- snowflake/ml/modeling/linear_model/elastic_net.py +33 -61
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +33 -61
- snowflake/ml/modeling/linear_model/gamma_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/huber_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/lars.py +33 -61
- snowflake/ml/modeling/linear_model/lars_cv.py +33 -61
- snowflake/ml/modeling/linear_model/lasso.py +33 -61
- snowflake/ml/modeling/linear_model/lasso_cv.py +33 -61
- snowflake/ml/modeling/linear_model/lasso_lars.py +33 -61
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +33 -61
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +33 -61
- snowflake/ml/modeling/linear_model/linear_regression.py +33 -61
- snowflake/ml/modeling/linear_model/logistic_regression.py +33 -61
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +33 -61
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +33 -61
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +33 -61
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +33 -61
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +33 -61
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +33 -61
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +33 -61
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/perceptron.py +33 -61
- snowflake/ml/modeling/linear_model/poisson_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/ransac_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/ridge.py +33 -61
- snowflake/ml/modeling/linear_model/ridge_classifier.py +33 -61
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +33 -61
- snowflake/ml/modeling/linear_model/ridge_cv.py +33 -61
- snowflake/ml/modeling/linear_model/sgd_classifier.py +33 -61
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +33 -61
- snowflake/ml/modeling/linear_model/sgd_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +33 -61
- snowflake/ml/modeling/manifold/isomap.py +33 -61
- snowflake/ml/modeling/manifold/mds.py +33 -61
- snowflake/ml/modeling/manifold/spectral_embedding.py +33 -61
- snowflake/ml/modeling/manifold/tsne.py +33 -61
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +33 -61
- snowflake/ml/modeling/mixture/gaussian_mixture.py +33 -61
- snowflake/ml/modeling/model_selection/grid_search_cv.py +39 -57
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +26 -57
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +33 -61
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +33 -61
- snowflake/ml/modeling/multiclass/output_code_classifier.py +33 -61
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +33 -61
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +33 -61
- snowflake/ml/modeling/naive_bayes/complement_nb.py +33 -61
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +33 -61
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +33 -61
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +33 -61
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +33 -61
- snowflake/ml/modeling/neighbors/kernel_density.py +33 -61
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +33 -61
- snowflake/ml/modeling/neighbors/nearest_centroid.py +33 -61
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +33 -61
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +33 -61
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +33 -61
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +33 -61
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +33 -61
- snowflake/ml/modeling/neural_network/mlp_classifier.py +33 -61
- snowflake/ml/modeling/neural_network/mlp_regressor.py +33 -61
- snowflake/ml/modeling/preprocessing/polynomial_features.py +33 -61
- snowflake/ml/modeling/semi_supervised/label_propagation.py +33 -61
- snowflake/ml/modeling/semi_supervised/label_spreading.py +33 -61
- snowflake/ml/modeling/svm/linear_svc.py +33 -61
- snowflake/ml/modeling/svm/linear_svr.py +33 -61
- snowflake/ml/modeling/svm/nu_svc.py +33 -61
- snowflake/ml/modeling/svm/nu_svr.py +33 -61
- snowflake/ml/modeling/svm/svc.py +33 -61
- snowflake/ml/modeling/svm/svr.py +33 -61
- snowflake/ml/modeling/tree/decision_tree_classifier.py +33 -61
- snowflake/ml/modeling/tree/decision_tree_regressor.py +33 -61
- snowflake/ml/modeling/tree/extra_tree_classifier.py +33 -61
- snowflake/ml/modeling/tree/extra_tree_regressor.py +33 -61
- snowflake/ml/modeling/xgboost/xgb_classifier.py +33 -61
- snowflake/ml/modeling/xgboost/xgb_regressor.py +33 -61
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +33 -61
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +33 -61
- snowflake/ml/registry/_manager/model_manager.py +6 -2
- snowflake/ml/registry/model_registry.py +100 -27
- snowflake/ml/registry/registry.py +6 -2
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/METADATA +43 -7
- {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/RECORD +211 -206
- {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/top_level.txt +0 -0
@@ -343,18 +343,24 @@ class BisectingKMeans(BaseTransformer):
|
|
343
343
|
self._get_model_signatures(dataset)
|
344
344
|
return self
|
345
345
|
|
346
|
-
def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
|
347
|
-
if self._drop_input_cols:
|
348
|
-
return []
|
349
|
-
else:
|
350
|
-
return list(set(dataset.columns) - set(self.output_cols))
|
351
|
-
|
352
346
|
def _batch_inference_validate_snowpark(
|
353
347
|
self,
|
354
348
|
dataset: DataFrame,
|
355
349
|
inference_method: str,
|
356
350
|
) -> List[str]:
|
357
|
-
"""Util method to run validate that batch inference can be run on a snowpark dataframe
|
351
|
+
"""Util method to run validate that batch inference can be run on a snowpark dataframe and
|
352
|
+
return the available package that exists in the snowflake anaconda channel
|
353
|
+
|
354
|
+
Args:
|
355
|
+
dataset: snowpark dataframe
|
356
|
+
inference_method: the inference method such as predict, score...
|
357
|
+
|
358
|
+
Raises:
|
359
|
+
SnowflakeMLException: If the estimator is not fitted, raise error
|
360
|
+
SnowflakeMLException: If the session is None, raise error
|
361
|
+
|
362
|
+
Returns:
|
363
|
+
A list of available package that exists in the snowflake anaconda channel
|
358
364
|
"""
|
359
365
|
if not self._is_fitted:
|
360
366
|
raise exceptions.SnowflakeMLException(
|
@@ -428,7 +434,7 @@ class BisectingKMeans(BaseTransformer):
|
|
428
434
|
transform_kwargs = dict(
|
429
435
|
session = dataset._session,
|
430
436
|
dependencies = self._deps,
|
431
|
-
|
437
|
+
drop_input_cols = self._drop_input_cols,
|
432
438
|
expected_output_cols_type = expected_type_inferred,
|
433
439
|
)
|
434
440
|
|
@@ -490,16 +496,16 @@ class BisectingKMeans(BaseTransformer):
|
|
490
496
|
# from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
|
491
497
|
# based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
|
492
498
|
# each row containing a list of values.
|
493
|
-
expected_dtype = "
|
499
|
+
expected_dtype = "array"
|
494
500
|
|
495
501
|
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
496
502
|
if expected_dtype == "":
|
497
|
-
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "
|
503
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
|
498
504
|
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
499
|
-
expected_dtype = "
|
500
|
-
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "
|
505
|
+
expected_dtype = "array"
|
506
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
|
501
507
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
502
|
-
expected_dtype = "
|
508
|
+
expected_dtype = "array"
|
503
509
|
else:
|
504
510
|
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
505
511
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
@@ -517,7 +523,7 @@ class BisectingKMeans(BaseTransformer):
|
|
517
523
|
transform_kwargs = dict(
|
518
524
|
session = dataset._session,
|
519
525
|
dependencies = self._deps,
|
520
|
-
|
526
|
+
drop_input_cols = self._drop_input_cols,
|
521
527
|
expected_output_cols_type = expected_dtype,
|
522
528
|
)
|
523
529
|
|
@@ -570,7 +576,7 @@ class BisectingKMeans(BaseTransformer):
|
|
570
576
|
subproject=_SUBPROJECT,
|
571
577
|
)
|
572
578
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
573
|
-
|
579
|
+
drop_input_cols=self._drop_input_cols,
|
574
580
|
expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
|
575
581
|
)
|
576
582
|
self._sklearn_object = fitted_estimator
|
@@ -588,44 +594,6 @@ class BisectingKMeans(BaseTransformer):
|
|
588
594
|
assert self._sklearn_object is not None
|
589
595
|
return self._sklearn_object.embedding_
|
590
596
|
|
591
|
-
|
592
|
-
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
593
|
-
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
594
|
-
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
595
|
-
"""
|
596
|
-
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
597
|
-
if output_cols:
|
598
|
-
output_cols = [
|
599
|
-
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
600
|
-
for c in output_cols
|
601
|
-
]
|
602
|
-
elif getattr(self._sklearn_object, "classes_", None) is None:
|
603
|
-
output_cols = [output_cols_prefix]
|
604
|
-
elif self._sklearn_object is not None:
|
605
|
-
classes = self._sklearn_object.classes_
|
606
|
-
if isinstance(classes, numpy.ndarray):
|
607
|
-
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
608
|
-
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
609
|
-
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
610
|
-
output_cols = []
|
611
|
-
for i, cl in enumerate(classes):
|
612
|
-
# For binary classification, there is only one output column for each class
|
613
|
-
# ndarray as the two classes are complementary.
|
614
|
-
if len(cl) == 2:
|
615
|
-
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
616
|
-
else:
|
617
|
-
output_cols.extend([
|
618
|
-
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
619
|
-
])
|
620
|
-
else:
|
621
|
-
output_cols = []
|
622
|
-
|
623
|
-
# Make sure column names are valid snowflake identifiers.
|
624
|
-
assert output_cols is not None # Make MyPy happy
|
625
|
-
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
626
|
-
|
627
|
-
return rv
|
628
|
-
|
629
597
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
630
598
|
@telemetry.send_api_usage_telemetry(
|
631
599
|
project=_PROJECT,
|
@@ -665,7 +633,7 @@ class BisectingKMeans(BaseTransformer):
|
|
665
633
|
transform_kwargs = dict(
|
666
634
|
session=dataset._session,
|
667
635
|
dependencies=self._deps,
|
668
|
-
|
636
|
+
drop_input_cols = self._drop_input_cols,
|
669
637
|
expected_output_cols_type="float",
|
670
638
|
)
|
671
639
|
|
@@ -730,7 +698,7 @@ class BisectingKMeans(BaseTransformer):
|
|
730
698
|
transform_kwargs = dict(
|
731
699
|
session=dataset._session,
|
732
700
|
dependencies=self._deps,
|
733
|
-
|
701
|
+
drop_input_cols = self._drop_input_cols,
|
734
702
|
expected_output_cols_type="float",
|
735
703
|
)
|
736
704
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -791,7 +759,7 @@ class BisectingKMeans(BaseTransformer):
|
|
791
759
|
transform_kwargs = dict(
|
792
760
|
session=dataset._session,
|
793
761
|
dependencies=self._deps,
|
794
|
-
|
762
|
+
drop_input_cols = self._drop_input_cols,
|
795
763
|
expected_output_cols_type="float",
|
796
764
|
)
|
797
765
|
|
@@ -856,7 +824,7 @@ class BisectingKMeans(BaseTransformer):
|
|
856
824
|
transform_kwargs = dict(
|
857
825
|
session=dataset._session,
|
858
826
|
dependencies=self._deps,
|
859
|
-
|
827
|
+
drop_input_cols = self._drop_input_cols,
|
860
828
|
expected_output_cols_type="float",
|
861
829
|
)
|
862
830
|
|
@@ -912,13 +880,17 @@ class BisectingKMeans(BaseTransformer):
|
|
912
880
|
transform_kwargs: ScoreKwargsTypedDict = dict()
|
913
881
|
|
914
882
|
if isinstance(dataset, DataFrame):
|
883
|
+
self._deps = self._batch_inference_validate_snowpark(
|
884
|
+
dataset=dataset,
|
885
|
+
inference_method="score",
|
886
|
+
)
|
915
887
|
selected_cols = self._get_active_columns()
|
916
888
|
if len(selected_cols) > 0:
|
917
889
|
dataset = dataset.select(selected_cols)
|
918
890
|
assert isinstance(dataset._session, Session) # keep mypy happy
|
919
891
|
transform_kwargs = dict(
|
920
892
|
session=dataset._session,
|
921
|
-
dependencies=["snowflake-snowpark-python"] + self.
|
893
|
+
dependencies=["snowflake-snowpark-python"] + self._deps,
|
922
894
|
score_sproc_imports=['sklearn'],
|
923
895
|
)
|
924
896
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -992,9 +964,9 @@ class BisectingKMeans(BaseTransformer):
|
|
992
964
|
transform_kwargs = dict(
|
993
965
|
session = dataset._session,
|
994
966
|
dependencies = self._deps,
|
995
|
-
|
996
|
-
expected_output_cols_type
|
997
|
-
n_neighbors =
|
967
|
+
drop_input_cols = self._drop_input_cols,
|
968
|
+
expected_output_cols_type="array",
|
969
|
+
n_neighbors = n_neighbors,
|
998
970
|
return_distance = return_distance
|
999
971
|
)
|
1000
972
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -311,18 +311,24 @@ class DBSCAN(BaseTransformer):
|
|
311
311
|
self._get_model_signatures(dataset)
|
312
312
|
return self
|
313
313
|
|
314
|
-
def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
|
315
|
-
if self._drop_input_cols:
|
316
|
-
return []
|
317
|
-
else:
|
318
|
-
return list(set(dataset.columns) - set(self.output_cols))
|
319
|
-
|
320
314
|
def _batch_inference_validate_snowpark(
|
321
315
|
self,
|
322
316
|
dataset: DataFrame,
|
323
317
|
inference_method: str,
|
324
318
|
) -> List[str]:
|
325
|
-
"""Util method to run validate that batch inference can be run on a snowpark dataframe
|
319
|
+
"""Util method to run validate that batch inference can be run on a snowpark dataframe and
|
320
|
+
return the available package that exists in the snowflake anaconda channel
|
321
|
+
|
322
|
+
Args:
|
323
|
+
dataset: snowpark dataframe
|
324
|
+
inference_method: the inference method such as predict, score...
|
325
|
+
|
326
|
+
Raises:
|
327
|
+
SnowflakeMLException: If the estimator is not fitted, raise error
|
328
|
+
SnowflakeMLException: If the session is None, raise error
|
329
|
+
|
330
|
+
Returns:
|
331
|
+
A list of available package that exists in the snowflake anaconda channel
|
326
332
|
"""
|
327
333
|
if not self._is_fitted:
|
328
334
|
raise exceptions.SnowflakeMLException(
|
@@ -394,7 +400,7 @@ class DBSCAN(BaseTransformer):
|
|
394
400
|
transform_kwargs = dict(
|
395
401
|
session = dataset._session,
|
396
402
|
dependencies = self._deps,
|
397
|
-
|
403
|
+
drop_input_cols = self._drop_input_cols,
|
398
404
|
expected_output_cols_type = expected_type_inferred,
|
399
405
|
)
|
400
406
|
|
@@ -454,16 +460,16 @@ class DBSCAN(BaseTransformer):
|
|
454
460
|
# from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
|
455
461
|
# based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
|
456
462
|
# each row containing a list of values.
|
457
|
-
expected_dtype = "
|
463
|
+
expected_dtype = "array"
|
458
464
|
|
459
465
|
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
460
466
|
if expected_dtype == "":
|
461
|
-
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "
|
467
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
|
462
468
|
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
463
|
-
expected_dtype = "
|
464
|
-
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "
|
469
|
+
expected_dtype = "array"
|
470
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
|
465
471
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
466
|
-
expected_dtype = "
|
472
|
+
expected_dtype = "array"
|
467
473
|
else:
|
468
474
|
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
469
475
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
@@ -481,7 +487,7 @@ class DBSCAN(BaseTransformer):
|
|
481
487
|
transform_kwargs = dict(
|
482
488
|
session = dataset._session,
|
483
489
|
dependencies = self._deps,
|
484
|
-
|
490
|
+
drop_input_cols = self._drop_input_cols,
|
485
491
|
expected_output_cols_type = expected_dtype,
|
486
492
|
)
|
487
493
|
|
@@ -534,7 +540,7 @@ class DBSCAN(BaseTransformer):
|
|
534
540
|
subproject=_SUBPROJECT,
|
535
541
|
)
|
536
542
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
537
|
-
|
543
|
+
drop_input_cols=self._drop_input_cols,
|
538
544
|
expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
|
539
545
|
)
|
540
546
|
self._sklearn_object = fitted_estimator
|
@@ -552,44 +558,6 @@ class DBSCAN(BaseTransformer):
|
|
552
558
|
assert self._sklearn_object is not None
|
553
559
|
return self._sklearn_object.embedding_
|
554
560
|
|
555
|
-
|
556
|
-
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
557
|
-
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
558
|
-
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
559
|
-
"""
|
560
|
-
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
561
|
-
if output_cols:
|
562
|
-
output_cols = [
|
563
|
-
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
564
|
-
for c in output_cols
|
565
|
-
]
|
566
|
-
elif getattr(self._sklearn_object, "classes_", None) is None:
|
567
|
-
output_cols = [output_cols_prefix]
|
568
|
-
elif self._sklearn_object is not None:
|
569
|
-
classes = self._sklearn_object.classes_
|
570
|
-
if isinstance(classes, numpy.ndarray):
|
571
|
-
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
572
|
-
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
573
|
-
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
574
|
-
output_cols = []
|
575
|
-
for i, cl in enumerate(classes):
|
576
|
-
# For binary classification, there is only one output column for each class
|
577
|
-
# ndarray as the two classes are complementary.
|
578
|
-
if len(cl) == 2:
|
579
|
-
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
580
|
-
else:
|
581
|
-
output_cols.extend([
|
582
|
-
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
583
|
-
])
|
584
|
-
else:
|
585
|
-
output_cols = []
|
586
|
-
|
587
|
-
# Make sure column names are valid snowflake identifiers.
|
588
|
-
assert output_cols is not None # Make MyPy happy
|
589
|
-
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
590
|
-
|
591
|
-
return rv
|
592
|
-
|
593
561
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
594
562
|
@telemetry.send_api_usage_telemetry(
|
595
563
|
project=_PROJECT,
|
@@ -629,7 +597,7 @@ class DBSCAN(BaseTransformer):
|
|
629
597
|
transform_kwargs = dict(
|
630
598
|
session=dataset._session,
|
631
599
|
dependencies=self._deps,
|
632
|
-
|
600
|
+
drop_input_cols = self._drop_input_cols,
|
633
601
|
expected_output_cols_type="float",
|
634
602
|
)
|
635
603
|
|
@@ -694,7 +662,7 @@ class DBSCAN(BaseTransformer):
|
|
694
662
|
transform_kwargs = dict(
|
695
663
|
session=dataset._session,
|
696
664
|
dependencies=self._deps,
|
697
|
-
|
665
|
+
drop_input_cols = self._drop_input_cols,
|
698
666
|
expected_output_cols_type="float",
|
699
667
|
)
|
700
668
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -755,7 +723,7 @@ class DBSCAN(BaseTransformer):
|
|
755
723
|
transform_kwargs = dict(
|
756
724
|
session=dataset._session,
|
757
725
|
dependencies=self._deps,
|
758
|
-
|
726
|
+
drop_input_cols = self._drop_input_cols,
|
759
727
|
expected_output_cols_type="float",
|
760
728
|
)
|
761
729
|
|
@@ -820,7 +788,7 @@ class DBSCAN(BaseTransformer):
|
|
820
788
|
transform_kwargs = dict(
|
821
789
|
session=dataset._session,
|
822
790
|
dependencies=self._deps,
|
823
|
-
|
791
|
+
drop_input_cols = self._drop_input_cols,
|
824
792
|
expected_output_cols_type="float",
|
825
793
|
)
|
826
794
|
|
@@ -874,13 +842,17 @@ class DBSCAN(BaseTransformer):
|
|
874
842
|
transform_kwargs: ScoreKwargsTypedDict = dict()
|
875
843
|
|
876
844
|
if isinstance(dataset, DataFrame):
|
845
|
+
self._deps = self._batch_inference_validate_snowpark(
|
846
|
+
dataset=dataset,
|
847
|
+
inference_method="score",
|
848
|
+
)
|
877
849
|
selected_cols = self._get_active_columns()
|
878
850
|
if len(selected_cols) > 0:
|
879
851
|
dataset = dataset.select(selected_cols)
|
880
852
|
assert isinstance(dataset._session, Session) # keep mypy happy
|
881
853
|
transform_kwargs = dict(
|
882
854
|
session=dataset._session,
|
883
|
-
dependencies=["snowflake-snowpark-python"] + self.
|
855
|
+
dependencies=["snowflake-snowpark-python"] + self._deps,
|
884
856
|
score_sproc_imports=['sklearn'],
|
885
857
|
)
|
886
858
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -954,9 +926,9 @@ class DBSCAN(BaseTransformer):
|
|
954
926
|
transform_kwargs = dict(
|
955
927
|
session = dataset._session,
|
956
928
|
dependencies = self._deps,
|
957
|
-
|
958
|
-
expected_output_cols_type
|
959
|
-
n_neighbors =
|
929
|
+
drop_input_cols = self._drop_input_cols,
|
930
|
+
expected_output_cols_type="array",
|
931
|
+
n_neighbors = n_neighbors,
|
960
932
|
return_distance = return_distance
|
961
933
|
)
|
962
934
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -343,18 +343,24 @@ class FeatureAgglomeration(BaseTransformer):
|
|
343
343
|
self._get_model_signatures(dataset)
|
344
344
|
return self
|
345
345
|
|
346
|
-
def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
|
347
|
-
if self._drop_input_cols:
|
348
|
-
return []
|
349
|
-
else:
|
350
|
-
return list(set(dataset.columns) - set(self.output_cols))
|
351
|
-
|
352
346
|
def _batch_inference_validate_snowpark(
|
353
347
|
self,
|
354
348
|
dataset: DataFrame,
|
355
349
|
inference_method: str,
|
356
350
|
) -> List[str]:
|
357
|
-
"""Util method to run validate that batch inference can be run on a snowpark dataframe
|
351
|
+
"""Util method to run validate that batch inference can be run on a snowpark dataframe and
|
352
|
+
return the available package that exists in the snowflake anaconda channel
|
353
|
+
|
354
|
+
Args:
|
355
|
+
dataset: snowpark dataframe
|
356
|
+
inference_method: the inference method such as predict, score...
|
357
|
+
|
358
|
+
Raises:
|
359
|
+
SnowflakeMLException: If the estimator is not fitted, raise error
|
360
|
+
SnowflakeMLException: If the session is None, raise error
|
361
|
+
|
362
|
+
Returns:
|
363
|
+
A list of available package that exists in the snowflake anaconda channel
|
358
364
|
"""
|
359
365
|
if not self._is_fitted:
|
360
366
|
raise exceptions.SnowflakeMLException(
|
@@ -426,7 +432,7 @@ class FeatureAgglomeration(BaseTransformer):
|
|
426
432
|
transform_kwargs = dict(
|
427
433
|
session = dataset._session,
|
428
434
|
dependencies = self._deps,
|
429
|
-
|
435
|
+
drop_input_cols = self._drop_input_cols,
|
430
436
|
expected_output_cols_type = expected_type_inferred,
|
431
437
|
)
|
432
438
|
|
@@ -488,16 +494,16 @@ class FeatureAgglomeration(BaseTransformer):
|
|
488
494
|
# from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
|
489
495
|
# based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
|
490
496
|
# each row containing a list of values.
|
491
|
-
expected_dtype = "
|
497
|
+
expected_dtype = "array"
|
492
498
|
|
493
499
|
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
494
500
|
if expected_dtype == "":
|
495
|
-
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "
|
501
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
|
496
502
|
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
497
|
-
expected_dtype = "
|
498
|
-
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "
|
503
|
+
expected_dtype = "array"
|
504
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
|
499
505
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
500
|
-
expected_dtype = "
|
506
|
+
expected_dtype = "array"
|
501
507
|
else:
|
502
508
|
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
503
509
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
@@ -515,7 +521,7 @@ class FeatureAgglomeration(BaseTransformer):
|
|
515
521
|
transform_kwargs = dict(
|
516
522
|
session = dataset._session,
|
517
523
|
dependencies = self._deps,
|
518
|
-
|
524
|
+
drop_input_cols = self._drop_input_cols,
|
519
525
|
expected_output_cols_type = expected_dtype,
|
520
526
|
)
|
521
527
|
|
@@ -568,7 +574,7 @@ class FeatureAgglomeration(BaseTransformer):
|
|
568
574
|
subproject=_SUBPROJECT,
|
569
575
|
)
|
570
576
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
571
|
-
|
577
|
+
drop_input_cols=self._drop_input_cols,
|
572
578
|
expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
|
573
579
|
)
|
574
580
|
self._sklearn_object = fitted_estimator
|
@@ -586,44 +592,6 @@ class FeatureAgglomeration(BaseTransformer):
|
|
586
592
|
assert self._sklearn_object is not None
|
587
593
|
return self._sklearn_object.embedding_
|
588
594
|
|
589
|
-
|
590
|
-
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
591
|
-
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
592
|
-
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
593
|
-
"""
|
594
|
-
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
595
|
-
if output_cols:
|
596
|
-
output_cols = [
|
597
|
-
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
598
|
-
for c in output_cols
|
599
|
-
]
|
600
|
-
elif getattr(self._sklearn_object, "classes_", None) is None:
|
601
|
-
output_cols = [output_cols_prefix]
|
602
|
-
elif self._sklearn_object is not None:
|
603
|
-
classes = self._sklearn_object.classes_
|
604
|
-
if isinstance(classes, numpy.ndarray):
|
605
|
-
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
606
|
-
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
607
|
-
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
608
|
-
output_cols = []
|
609
|
-
for i, cl in enumerate(classes):
|
610
|
-
# For binary classification, there is only one output column for each class
|
611
|
-
# ndarray as the two classes are complementary.
|
612
|
-
if len(cl) == 2:
|
613
|
-
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
614
|
-
else:
|
615
|
-
output_cols.extend([
|
616
|
-
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
617
|
-
])
|
618
|
-
else:
|
619
|
-
output_cols = []
|
620
|
-
|
621
|
-
# Make sure column names are valid snowflake identifiers.
|
622
|
-
assert output_cols is not None # Make MyPy happy
|
623
|
-
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
624
|
-
|
625
|
-
return rv
|
626
|
-
|
627
595
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
628
596
|
@telemetry.send_api_usage_telemetry(
|
629
597
|
project=_PROJECT,
|
@@ -663,7 +631,7 @@ class FeatureAgglomeration(BaseTransformer):
|
|
663
631
|
transform_kwargs = dict(
|
664
632
|
session=dataset._session,
|
665
633
|
dependencies=self._deps,
|
666
|
-
|
634
|
+
drop_input_cols = self._drop_input_cols,
|
667
635
|
expected_output_cols_type="float",
|
668
636
|
)
|
669
637
|
|
@@ -728,7 +696,7 @@ class FeatureAgglomeration(BaseTransformer):
|
|
728
696
|
transform_kwargs = dict(
|
729
697
|
session=dataset._session,
|
730
698
|
dependencies=self._deps,
|
731
|
-
|
699
|
+
drop_input_cols = self._drop_input_cols,
|
732
700
|
expected_output_cols_type="float",
|
733
701
|
)
|
734
702
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -789,7 +757,7 @@ class FeatureAgglomeration(BaseTransformer):
|
|
789
757
|
transform_kwargs = dict(
|
790
758
|
session=dataset._session,
|
791
759
|
dependencies=self._deps,
|
792
|
-
|
760
|
+
drop_input_cols = self._drop_input_cols,
|
793
761
|
expected_output_cols_type="float",
|
794
762
|
)
|
795
763
|
|
@@ -854,7 +822,7 @@ class FeatureAgglomeration(BaseTransformer):
|
|
854
822
|
transform_kwargs = dict(
|
855
823
|
session=dataset._session,
|
856
824
|
dependencies=self._deps,
|
857
|
-
|
825
|
+
drop_input_cols = self._drop_input_cols,
|
858
826
|
expected_output_cols_type="float",
|
859
827
|
)
|
860
828
|
|
@@ -908,13 +876,17 @@ class FeatureAgglomeration(BaseTransformer):
|
|
908
876
|
transform_kwargs: ScoreKwargsTypedDict = dict()
|
909
877
|
|
910
878
|
if isinstance(dataset, DataFrame):
|
879
|
+
self._deps = self._batch_inference_validate_snowpark(
|
880
|
+
dataset=dataset,
|
881
|
+
inference_method="score",
|
882
|
+
)
|
911
883
|
selected_cols = self._get_active_columns()
|
912
884
|
if len(selected_cols) > 0:
|
913
885
|
dataset = dataset.select(selected_cols)
|
914
886
|
assert isinstance(dataset._session, Session) # keep mypy happy
|
915
887
|
transform_kwargs = dict(
|
916
888
|
session=dataset._session,
|
917
|
-
dependencies=["snowflake-snowpark-python"] + self.
|
889
|
+
dependencies=["snowflake-snowpark-python"] + self._deps,
|
918
890
|
score_sproc_imports=['sklearn'],
|
919
891
|
)
|
920
892
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -988,9 +960,9 @@ class FeatureAgglomeration(BaseTransformer):
|
|
988
960
|
transform_kwargs = dict(
|
989
961
|
session = dataset._session,
|
990
962
|
dependencies = self._deps,
|
991
|
-
|
992
|
-
expected_output_cols_type
|
993
|
-
n_neighbors =
|
963
|
+
drop_input_cols = self._drop_input_cols,
|
964
|
+
expected_output_cols_type="array",
|
965
|
+
n_neighbors = n_neighbors,
|
994
966
|
return_distance = return_distance
|
995
967
|
)
|
996
968
|
elif isinstance(dataset, pd.DataFrame):
|