snowflake-ml-python 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/file_utils.py +3 -3
- snowflake/ml/_internal/human_readable_id/adjectives.txt +128 -0
- snowflake/ml/_internal/human_readable_id/animals.txt +128 -0
- snowflake/ml/_internal/human_readable_id/hrid_generator.py +40 -0
- snowflake/ml/_internal/human_readable_id/hrid_generator_base.py +135 -0
- snowflake/ml/_internal/telemetry.py +11 -2
- snowflake/ml/_internal/utils/formatting.py +1 -1
- snowflake/ml/feature_store/feature_store.py +15 -106
- snowflake/ml/fileset/sfcfs.py +4 -3
- snowflake/ml/fileset/stage_fs.py +18 -0
- snowflake/ml/model/_api.py +9 -9
- snowflake/ml/model/_client/model/model_version_impl.py +20 -15
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +3 -9
- snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +3 -5
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +7 -6
- snowflake/ml/model/_model_composer/model_composer.py +10 -8
- snowflake/ml/model/_model_composer/model_method/function_generator.py +1 -1
- snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +2 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +2 -2
- snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +1 -1
- snowflake/ml/model/_packager/model_handlers/_base.py +2 -2
- snowflake/ml/model/_packager/model_handlers/_utils.py +5 -5
- snowflake/ml/model/_packager/model_handlers/custom.py +7 -7
- snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +2 -2
- snowflake/ml/model/_packager/model_handlers/llm.py +1 -1
- snowflake/ml/model/_packager/model_handlers/mlflow.py +1 -1
- snowflake/ml/model/_packager/model_handlers/pytorch.py +13 -10
- snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +214 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +6 -6
- snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +15 -3
- snowflake/ml/model/_packager/model_handlers/tensorflow.py +8 -8
- snowflake/ml/model/_packager/model_handlers/torchscript.py +7 -7
- snowflake/ml/model/_packager/model_handlers/xgboost.py +8 -8
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_packager.py +8 -6
- snowflake/ml/model/custom_model.py +3 -1
- snowflake/ml/model/type_hints.py +13 -0
- snowflake/ml/modeling/_internal/estimator_utils.py +61 -1
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -43
- snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +4 -4
- snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +21 -17
- snowflake/ml/modeling/_internal/model_specifications.py +3 -1
- snowflake/ml/modeling/_internal/model_trainer.py +2 -2
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +547 -1
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +67 -114
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +9 -9
- snowflake/ml/modeling/_internal/transformer_protocols.py +2 -3
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +33 -61
- snowflake/ml/modeling/cluster/affinity_propagation.py +33 -61
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +33 -61
- snowflake/ml/modeling/cluster/birch.py +33 -61
- snowflake/ml/modeling/cluster/bisecting_k_means.py +33 -61
- snowflake/ml/modeling/cluster/dbscan.py +33 -61
- snowflake/ml/modeling/cluster/feature_agglomeration.py +33 -61
- snowflake/ml/modeling/cluster/k_means.py +33 -61
- snowflake/ml/modeling/cluster/mean_shift.py +33 -61
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +33 -61
- snowflake/ml/modeling/cluster/optics.py +33 -61
- snowflake/ml/modeling/cluster/spectral_biclustering.py +33 -61
- snowflake/ml/modeling/cluster/spectral_clustering.py +33 -61
- snowflake/ml/modeling/cluster/spectral_coclustering.py +33 -61
- snowflake/ml/modeling/compose/column_transformer.py +33 -61
- snowflake/ml/modeling/compose/transformed_target_regressor.py +33 -61
- snowflake/ml/modeling/covariance/elliptic_envelope.py +33 -61
- snowflake/ml/modeling/covariance/empirical_covariance.py +33 -61
- snowflake/ml/modeling/covariance/graphical_lasso.py +33 -61
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +33 -61
- snowflake/ml/modeling/covariance/ledoit_wolf.py +33 -61
- snowflake/ml/modeling/covariance/min_cov_det.py +33 -61
- snowflake/ml/modeling/covariance/oas.py +33 -61
- snowflake/ml/modeling/covariance/shrunk_covariance.py +33 -61
- snowflake/ml/modeling/decomposition/dictionary_learning.py +33 -61
- snowflake/ml/modeling/decomposition/factor_analysis.py +33 -61
- snowflake/ml/modeling/decomposition/fast_ica.py +33 -61
- snowflake/ml/modeling/decomposition/incremental_pca.py +33 -61
- snowflake/ml/modeling/decomposition/kernel_pca.py +33 -61
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +33 -61
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +33 -61
- snowflake/ml/modeling/decomposition/pca.py +33 -61
- snowflake/ml/modeling/decomposition/sparse_pca.py +33 -61
- snowflake/ml/modeling/decomposition/truncated_svd.py +33 -61
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +33 -61
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +33 -61
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/bagging_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/bagging_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/isolation_forest.py +33 -61
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/stacking_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/voting_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/voting_regressor.py +33 -61
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +33 -61
- snowflake/ml/modeling/feature_selection/select_fdr.py +33 -61
- snowflake/ml/modeling/feature_selection/select_fpr.py +33 -61
- snowflake/ml/modeling/feature_selection/select_fwe.py +33 -61
- snowflake/ml/modeling/feature_selection/select_k_best.py +33 -61
- snowflake/ml/modeling/feature_selection/select_percentile.py +33 -61
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +33 -61
- snowflake/ml/modeling/feature_selection/variance_threshold.py +33 -61
- snowflake/ml/modeling/framework/base.py +55 -5
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +33 -61
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +33 -61
- snowflake/ml/modeling/impute/iterative_imputer.py +33 -61
- snowflake/ml/modeling/impute/knn_imputer.py +33 -61
- snowflake/ml/modeling/impute/missing_indicator.py +33 -61
- snowflake/ml/modeling/impute/simple_imputer.py +4 -15
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +33 -61
- snowflake/ml/modeling/kernel_approximation/nystroem.py +33 -61
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +33 -61
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +33 -61
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +33 -61
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +33 -61
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +36 -63
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +36 -63
- snowflake/ml/modeling/linear_model/ard_regression.py +33 -61
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +33 -61
- snowflake/ml/modeling/linear_model/elastic_net.py +33 -61
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +33 -61
- snowflake/ml/modeling/linear_model/gamma_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/huber_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/lars.py +33 -61
- snowflake/ml/modeling/linear_model/lars_cv.py +33 -61
- snowflake/ml/modeling/linear_model/lasso.py +33 -61
- snowflake/ml/modeling/linear_model/lasso_cv.py +33 -61
- snowflake/ml/modeling/linear_model/lasso_lars.py +33 -61
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +33 -61
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +33 -61
- snowflake/ml/modeling/linear_model/linear_regression.py +33 -61
- snowflake/ml/modeling/linear_model/logistic_regression.py +33 -61
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +33 -61
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +33 -61
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +33 -61
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +33 -61
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +33 -61
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +33 -61
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +33 -61
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/perceptron.py +33 -61
- snowflake/ml/modeling/linear_model/poisson_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/ransac_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/ridge.py +33 -61
- snowflake/ml/modeling/linear_model/ridge_classifier.py +33 -61
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +33 -61
- snowflake/ml/modeling/linear_model/ridge_cv.py +33 -61
- snowflake/ml/modeling/linear_model/sgd_classifier.py +33 -61
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +33 -61
- snowflake/ml/modeling/linear_model/sgd_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +33 -61
- snowflake/ml/modeling/manifold/isomap.py +33 -61
- snowflake/ml/modeling/manifold/mds.py +33 -61
- snowflake/ml/modeling/manifold/spectral_embedding.py +33 -61
- snowflake/ml/modeling/manifold/tsne.py +33 -61
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +33 -61
- snowflake/ml/modeling/mixture/gaussian_mixture.py +33 -61
- snowflake/ml/modeling/model_selection/grid_search_cv.py +39 -57
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +26 -57
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +33 -61
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +33 -61
- snowflake/ml/modeling/multiclass/output_code_classifier.py +33 -61
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +33 -61
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +33 -61
- snowflake/ml/modeling/naive_bayes/complement_nb.py +33 -61
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +33 -61
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +33 -61
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +33 -61
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +33 -61
- snowflake/ml/modeling/neighbors/kernel_density.py +33 -61
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +33 -61
- snowflake/ml/modeling/neighbors/nearest_centroid.py +33 -61
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +33 -61
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +33 -61
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +33 -61
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +33 -61
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +33 -61
- snowflake/ml/modeling/neural_network/mlp_classifier.py +33 -61
- snowflake/ml/modeling/neural_network/mlp_regressor.py +33 -61
- snowflake/ml/modeling/preprocessing/polynomial_features.py +33 -61
- snowflake/ml/modeling/semi_supervised/label_propagation.py +33 -61
- snowflake/ml/modeling/semi_supervised/label_spreading.py +33 -61
- snowflake/ml/modeling/svm/linear_svc.py +33 -61
- snowflake/ml/modeling/svm/linear_svr.py +33 -61
- snowflake/ml/modeling/svm/nu_svc.py +33 -61
- snowflake/ml/modeling/svm/nu_svr.py +33 -61
- snowflake/ml/modeling/svm/svc.py +33 -61
- snowflake/ml/modeling/svm/svr.py +33 -61
- snowflake/ml/modeling/tree/decision_tree_classifier.py +33 -61
- snowflake/ml/modeling/tree/decision_tree_regressor.py +33 -61
- snowflake/ml/modeling/tree/extra_tree_classifier.py +33 -61
- snowflake/ml/modeling/tree/extra_tree_regressor.py +33 -61
- snowflake/ml/modeling/xgboost/xgb_classifier.py +33 -61
- snowflake/ml/modeling/xgboost/xgb_regressor.py +33 -61
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +33 -61
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +33 -61
- snowflake/ml/registry/_manager/model_manager.py +6 -2
- snowflake/ml/registry/model_registry.py +100 -27
- snowflake/ml/registry/registry.py +6 -2
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/METADATA +43 -7
- {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/RECORD +211 -206
- {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/top_level.txt +0 -0
@@ -287,18 +287,24 @@ class EllipticEnvelope(BaseTransformer):
|
|
287
287
|
self._get_model_signatures(dataset)
|
288
288
|
return self
|
289
289
|
|
290
|
-
def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
|
291
|
-
if self._drop_input_cols:
|
292
|
-
return []
|
293
|
-
else:
|
294
|
-
return list(set(dataset.columns) - set(self.output_cols))
|
295
|
-
|
296
290
|
def _batch_inference_validate_snowpark(
|
297
291
|
self,
|
298
292
|
dataset: DataFrame,
|
299
293
|
inference_method: str,
|
300
294
|
) -> List[str]:
|
301
|
-
"""Util method to run validate that batch inference can be run on a snowpark dataframe
|
295
|
+
"""Util method to run validate that batch inference can be run on a snowpark dataframe and
|
296
|
+
return the available package that exists in the snowflake anaconda channel
|
297
|
+
|
298
|
+
Args:
|
299
|
+
dataset: snowpark dataframe
|
300
|
+
inference_method: the inference method such as predict, score...
|
301
|
+
|
302
|
+
Raises:
|
303
|
+
SnowflakeMLException: If the estimator is not fitted, raise error
|
304
|
+
SnowflakeMLException: If the session is None, raise error
|
305
|
+
|
306
|
+
Returns:
|
307
|
+
A list of available package that exists in the snowflake anaconda channel
|
302
308
|
"""
|
303
309
|
if not self._is_fitted:
|
304
310
|
raise exceptions.SnowflakeMLException(
|
@@ -372,7 +378,7 @@ class EllipticEnvelope(BaseTransformer):
|
|
372
378
|
transform_kwargs = dict(
|
373
379
|
session = dataset._session,
|
374
380
|
dependencies = self._deps,
|
375
|
-
|
381
|
+
drop_input_cols = self._drop_input_cols,
|
376
382
|
expected_output_cols_type = expected_type_inferred,
|
377
383
|
)
|
378
384
|
|
@@ -432,16 +438,16 @@ class EllipticEnvelope(BaseTransformer):
|
|
432
438
|
# from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
|
433
439
|
# based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
|
434
440
|
# each row containing a list of values.
|
435
|
-
expected_dtype = "
|
441
|
+
expected_dtype = "array"
|
436
442
|
|
437
443
|
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
438
444
|
if expected_dtype == "":
|
439
|
-
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "
|
445
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
|
440
446
|
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
441
|
-
expected_dtype = "
|
442
|
-
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "
|
447
|
+
expected_dtype = "array"
|
448
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
|
443
449
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
444
|
-
expected_dtype = "
|
450
|
+
expected_dtype = "array"
|
445
451
|
else:
|
446
452
|
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
447
453
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
@@ -459,7 +465,7 @@ class EllipticEnvelope(BaseTransformer):
|
|
459
465
|
transform_kwargs = dict(
|
460
466
|
session = dataset._session,
|
461
467
|
dependencies = self._deps,
|
462
|
-
|
468
|
+
drop_input_cols = self._drop_input_cols,
|
463
469
|
expected_output_cols_type = expected_dtype,
|
464
470
|
)
|
465
471
|
|
@@ -512,7 +518,7 @@ class EllipticEnvelope(BaseTransformer):
|
|
512
518
|
subproject=_SUBPROJECT,
|
513
519
|
)
|
514
520
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
515
|
-
|
521
|
+
drop_input_cols=self._drop_input_cols,
|
516
522
|
expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
|
517
523
|
)
|
518
524
|
self._sklearn_object = fitted_estimator
|
@@ -530,44 +536,6 @@ class EllipticEnvelope(BaseTransformer):
|
|
530
536
|
assert self._sklearn_object is not None
|
531
537
|
return self._sklearn_object.embedding_
|
532
538
|
|
533
|
-
|
534
|
-
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
535
|
-
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
536
|
-
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
537
|
-
"""
|
538
|
-
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
539
|
-
if output_cols:
|
540
|
-
output_cols = [
|
541
|
-
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
542
|
-
for c in output_cols
|
543
|
-
]
|
544
|
-
elif getattr(self._sklearn_object, "classes_", None) is None:
|
545
|
-
output_cols = [output_cols_prefix]
|
546
|
-
elif self._sklearn_object is not None:
|
547
|
-
classes = self._sklearn_object.classes_
|
548
|
-
if isinstance(classes, numpy.ndarray):
|
549
|
-
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
550
|
-
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
551
|
-
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
552
|
-
output_cols = []
|
553
|
-
for i, cl in enumerate(classes):
|
554
|
-
# For binary classification, there is only one output column for each class
|
555
|
-
# ndarray as the two classes are complementary.
|
556
|
-
if len(cl) == 2:
|
557
|
-
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
558
|
-
else:
|
559
|
-
output_cols.extend([
|
560
|
-
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
561
|
-
])
|
562
|
-
else:
|
563
|
-
output_cols = []
|
564
|
-
|
565
|
-
# Make sure column names are valid snowflake identifiers.
|
566
|
-
assert output_cols is not None # Make MyPy happy
|
567
|
-
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
568
|
-
|
569
|
-
return rv
|
570
|
-
|
571
539
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
572
540
|
@telemetry.send_api_usage_telemetry(
|
573
541
|
project=_PROJECT,
|
@@ -607,7 +575,7 @@ class EllipticEnvelope(BaseTransformer):
|
|
607
575
|
transform_kwargs = dict(
|
608
576
|
session=dataset._session,
|
609
577
|
dependencies=self._deps,
|
610
|
-
|
578
|
+
drop_input_cols = self._drop_input_cols,
|
611
579
|
expected_output_cols_type="float",
|
612
580
|
)
|
613
581
|
|
@@ -672,7 +640,7 @@ class EllipticEnvelope(BaseTransformer):
|
|
672
640
|
transform_kwargs = dict(
|
673
641
|
session=dataset._session,
|
674
642
|
dependencies=self._deps,
|
675
|
-
|
643
|
+
drop_input_cols = self._drop_input_cols,
|
676
644
|
expected_output_cols_type="float",
|
677
645
|
)
|
678
646
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -735,7 +703,7 @@ class EllipticEnvelope(BaseTransformer):
|
|
735
703
|
transform_kwargs = dict(
|
736
704
|
session=dataset._session,
|
737
705
|
dependencies=self._deps,
|
738
|
-
|
706
|
+
drop_input_cols = self._drop_input_cols,
|
739
707
|
expected_output_cols_type="float",
|
740
708
|
)
|
741
709
|
|
@@ -802,7 +770,7 @@ class EllipticEnvelope(BaseTransformer):
|
|
802
770
|
transform_kwargs = dict(
|
803
771
|
session=dataset._session,
|
804
772
|
dependencies=self._deps,
|
805
|
-
|
773
|
+
drop_input_cols = self._drop_input_cols,
|
806
774
|
expected_output_cols_type="float",
|
807
775
|
)
|
808
776
|
|
@@ -858,13 +826,17 @@ class EllipticEnvelope(BaseTransformer):
|
|
858
826
|
transform_kwargs: ScoreKwargsTypedDict = dict()
|
859
827
|
|
860
828
|
if isinstance(dataset, DataFrame):
|
829
|
+
self._deps = self._batch_inference_validate_snowpark(
|
830
|
+
dataset=dataset,
|
831
|
+
inference_method="score",
|
832
|
+
)
|
861
833
|
selected_cols = self._get_active_columns()
|
862
834
|
if len(selected_cols) > 0:
|
863
835
|
dataset = dataset.select(selected_cols)
|
864
836
|
assert isinstance(dataset._session, Session) # keep mypy happy
|
865
837
|
transform_kwargs = dict(
|
866
838
|
session=dataset._session,
|
867
|
-
dependencies=["snowflake-snowpark-python"] + self.
|
839
|
+
dependencies=["snowflake-snowpark-python"] + self._deps,
|
868
840
|
score_sproc_imports=['sklearn'],
|
869
841
|
)
|
870
842
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -938,9 +910,9 @@ class EllipticEnvelope(BaseTransformer):
|
|
938
910
|
transform_kwargs = dict(
|
939
911
|
session = dataset._session,
|
940
912
|
dependencies = self._deps,
|
941
|
-
|
942
|
-
expected_output_cols_type
|
943
|
-
n_neighbors =
|
913
|
+
drop_input_cols = self._drop_input_cols,
|
914
|
+
expected_output_cols_type="array",
|
915
|
+
n_neighbors = n_neighbors,
|
944
916
|
return_distance = return_distance
|
945
917
|
)
|
946
918
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -263,18 +263,24 @@ class EmpiricalCovariance(BaseTransformer):
|
|
263
263
|
self._get_model_signatures(dataset)
|
264
264
|
return self
|
265
265
|
|
266
|
-
def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
|
267
|
-
if self._drop_input_cols:
|
268
|
-
return []
|
269
|
-
else:
|
270
|
-
return list(set(dataset.columns) - set(self.output_cols))
|
271
|
-
|
272
266
|
def _batch_inference_validate_snowpark(
|
273
267
|
self,
|
274
268
|
dataset: DataFrame,
|
275
269
|
inference_method: str,
|
276
270
|
) -> List[str]:
|
277
|
-
"""Util method to run validate that batch inference can be run on a snowpark dataframe
|
271
|
+
"""Util method to run validate that batch inference can be run on a snowpark dataframe and
|
272
|
+
return the available package that exists in the snowflake anaconda channel
|
273
|
+
|
274
|
+
Args:
|
275
|
+
dataset: snowpark dataframe
|
276
|
+
inference_method: the inference method such as predict, score...
|
277
|
+
|
278
|
+
Raises:
|
279
|
+
SnowflakeMLException: If the estimator is not fitted, raise error
|
280
|
+
SnowflakeMLException: If the session is None, raise error
|
281
|
+
|
282
|
+
Returns:
|
283
|
+
A list of available package that exists in the snowflake anaconda channel
|
278
284
|
"""
|
279
285
|
if not self._is_fitted:
|
280
286
|
raise exceptions.SnowflakeMLException(
|
@@ -346,7 +352,7 @@ class EmpiricalCovariance(BaseTransformer):
|
|
346
352
|
transform_kwargs = dict(
|
347
353
|
session = dataset._session,
|
348
354
|
dependencies = self._deps,
|
349
|
-
|
355
|
+
drop_input_cols = self._drop_input_cols,
|
350
356
|
expected_output_cols_type = expected_type_inferred,
|
351
357
|
)
|
352
358
|
|
@@ -406,16 +412,16 @@ class EmpiricalCovariance(BaseTransformer):
|
|
406
412
|
# from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
|
407
413
|
# based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
|
408
414
|
# each row containing a list of values.
|
409
|
-
expected_dtype = "
|
415
|
+
expected_dtype = "array"
|
410
416
|
|
411
417
|
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
412
418
|
if expected_dtype == "":
|
413
|
-
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "
|
419
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
|
414
420
|
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
415
|
-
expected_dtype = "
|
416
|
-
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "
|
421
|
+
expected_dtype = "array"
|
422
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
|
417
423
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
418
|
-
expected_dtype = "
|
424
|
+
expected_dtype = "array"
|
419
425
|
else:
|
420
426
|
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
421
427
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
@@ -433,7 +439,7 @@ class EmpiricalCovariance(BaseTransformer):
|
|
433
439
|
transform_kwargs = dict(
|
434
440
|
session = dataset._session,
|
435
441
|
dependencies = self._deps,
|
436
|
-
|
442
|
+
drop_input_cols = self._drop_input_cols,
|
437
443
|
expected_output_cols_type = expected_dtype,
|
438
444
|
)
|
439
445
|
|
@@ -484,7 +490,7 @@ class EmpiricalCovariance(BaseTransformer):
|
|
484
490
|
subproject=_SUBPROJECT,
|
485
491
|
)
|
486
492
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
487
|
-
|
493
|
+
drop_input_cols=self._drop_input_cols,
|
488
494
|
expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
|
489
495
|
)
|
490
496
|
self._sklearn_object = fitted_estimator
|
@@ -502,44 +508,6 @@ class EmpiricalCovariance(BaseTransformer):
|
|
502
508
|
assert self._sklearn_object is not None
|
503
509
|
return self._sklearn_object.embedding_
|
504
510
|
|
505
|
-
|
506
|
-
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
507
|
-
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
508
|
-
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
509
|
-
"""
|
510
|
-
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
511
|
-
if output_cols:
|
512
|
-
output_cols = [
|
513
|
-
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
514
|
-
for c in output_cols
|
515
|
-
]
|
516
|
-
elif getattr(self._sklearn_object, "classes_", None) is None:
|
517
|
-
output_cols = [output_cols_prefix]
|
518
|
-
elif self._sklearn_object is not None:
|
519
|
-
classes = self._sklearn_object.classes_
|
520
|
-
if isinstance(classes, numpy.ndarray):
|
521
|
-
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
522
|
-
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
523
|
-
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
524
|
-
output_cols = []
|
525
|
-
for i, cl in enumerate(classes):
|
526
|
-
# For binary classification, there is only one output column for each class
|
527
|
-
# ndarray as the two classes are complementary.
|
528
|
-
if len(cl) == 2:
|
529
|
-
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
530
|
-
else:
|
531
|
-
output_cols.extend([
|
532
|
-
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
533
|
-
])
|
534
|
-
else:
|
535
|
-
output_cols = []
|
536
|
-
|
537
|
-
# Make sure column names are valid snowflake identifiers.
|
538
|
-
assert output_cols is not None # Make MyPy happy
|
539
|
-
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
540
|
-
|
541
|
-
return rv
|
542
|
-
|
543
511
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
544
512
|
@telemetry.send_api_usage_telemetry(
|
545
513
|
project=_PROJECT,
|
@@ -579,7 +547,7 @@ class EmpiricalCovariance(BaseTransformer):
|
|
579
547
|
transform_kwargs = dict(
|
580
548
|
session=dataset._session,
|
581
549
|
dependencies=self._deps,
|
582
|
-
|
550
|
+
drop_input_cols = self._drop_input_cols,
|
583
551
|
expected_output_cols_type="float",
|
584
552
|
)
|
585
553
|
|
@@ -644,7 +612,7 @@ class EmpiricalCovariance(BaseTransformer):
|
|
644
612
|
transform_kwargs = dict(
|
645
613
|
session=dataset._session,
|
646
614
|
dependencies=self._deps,
|
647
|
-
|
615
|
+
drop_input_cols = self._drop_input_cols,
|
648
616
|
expected_output_cols_type="float",
|
649
617
|
)
|
650
618
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -705,7 +673,7 @@ class EmpiricalCovariance(BaseTransformer):
|
|
705
673
|
transform_kwargs = dict(
|
706
674
|
session=dataset._session,
|
707
675
|
dependencies=self._deps,
|
708
|
-
|
676
|
+
drop_input_cols = self._drop_input_cols,
|
709
677
|
expected_output_cols_type="float",
|
710
678
|
)
|
711
679
|
|
@@ -770,7 +738,7 @@ class EmpiricalCovariance(BaseTransformer):
|
|
770
738
|
transform_kwargs = dict(
|
771
739
|
session=dataset._session,
|
772
740
|
dependencies=self._deps,
|
773
|
-
|
741
|
+
drop_input_cols = self._drop_input_cols,
|
774
742
|
expected_output_cols_type="float",
|
775
743
|
)
|
776
744
|
|
@@ -826,13 +794,17 @@ class EmpiricalCovariance(BaseTransformer):
|
|
826
794
|
transform_kwargs: ScoreKwargsTypedDict = dict()
|
827
795
|
|
828
796
|
if isinstance(dataset, DataFrame):
|
797
|
+
self._deps = self._batch_inference_validate_snowpark(
|
798
|
+
dataset=dataset,
|
799
|
+
inference_method="score",
|
800
|
+
)
|
829
801
|
selected_cols = self._get_active_columns()
|
830
802
|
if len(selected_cols) > 0:
|
831
803
|
dataset = dataset.select(selected_cols)
|
832
804
|
assert isinstance(dataset._session, Session) # keep mypy happy
|
833
805
|
transform_kwargs = dict(
|
834
806
|
session=dataset._session,
|
835
|
-
dependencies=["snowflake-snowpark-python"] + self.
|
807
|
+
dependencies=["snowflake-snowpark-python"] + self._deps,
|
836
808
|
score_sproc_imports=['sklearn'],
|
837
809
|
)
|
838
810
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -906,9 +878,9 @@ class EmpiricalCovariance(BaseTransformer):
|
|
906
878
|
transform_kwargs = dict(
|
907
879
|
session = dataset._session,
|
908
880
|
dependencies = self._deps,
|
909
|
-
|
910
|
-
expected_output_cols_type
|
911
|
-
n_neighbors =
|
881
|
+
drop_input_cols = self._drop_input_cols,
|
882
|
+
expected_output_cols_type="array",
|
883
|
+
n_neighbors = n_neighbors,
|
912
884
|
return_distance = return_distance
|
913
885
|
)
|
914
886
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -311,18 +311,24 @@ class GraphicalLasso(BaseTransformer):
|
|
311
311
|
self._get_model_signatures(dataset)
|
312
312
|
return self
|
313
313
|
|
314
|
-
def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
|
315
|
-
if self._drop_input_cols:
|
316
|
-
return []
|
317
|
-
else:
|
318
|
-
return list(set(dataset.columns) - set(self.output_cols))
|
319
|
-
|
320
314
|
def _batch_inference_validate_snowpark(
|
321
315
|
self,
|
322
316
|
dataset: DataFrame,
|
323
317
|
inference_method: str,
|
324
318
|
) -> List[str]:
|
325
|
-
"""Util method to run validate that batch inference can be run on a snowpark dataframe
|
319
|
+
"""Util method to run validate that batch inference can be run on a snowpark dataframe and
|
320
|
+
return the available package that exists in the snowflake anaconda channel
|
321
|
+
|
322
|
+
Args:
|
323
|
+
dataset: snowpark dataframe
|
324
|
+
inference_method: the inference method such as predict, score...
|
325
|
+
|
326
|
+
Raises:
|
327
|
+
SnowflakeMLException: If the estimator is not fitted, raise error
|
328
|
+
SnowflakeMLException: If the session is None, raise error
|
329
|
+
|
330
|
+
Returns:
|
331
|
+
A list of available package that exists in the snowflake anaconda channel
|
326
332
|
"""
|
327
333
|
if not self._is_fitted:
|
328
334
|
raise exceptions.SnowflakeMLException(
|
@@ -394,7 +400,7 @@ class GraphicalLasso(BaseTransformer):
|
|
394
400
|
transform_kwargs = dict(
|
395
401
|
session = dataset._session,
|
396
402
|
dependencies = self._deps,
|
397
|
-
|
403
|
+
drop_input_cols = self._drop_input_cols,
|
398
404
|
expected_output_cols_type = expected_type_inferred,
|
399
405
|
)
|
400
406
|
|
@@ -454,16 +460,16 @@ class GraphicalLasso(BaseTransformer):
|
|
454
460
|
# from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
|
455
461
|
# based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
|
456
462
|
# each row containing a list of values.
|
457
|
-
expected_dtype = "
|
463
|
+
expected_dtype = "array"
|
458
464
|
|
459
465
|
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
460
466
|
if expected_dtype == "":
|
461
|
-
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "
|
467
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
|
462
468
|
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
463
|
-
expected_dtype = "
|
464
|
-
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "
|
469
|
+
expected_dtype = "array"
|
470
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
|
465
471
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
466
|
-
expected_dtype = "
|
472
|
+
expected_dtype = "array"
|
467
473
|
else:
|
468
474
|
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
469
475
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
@@ -481,7 +487,7 @@ class GraphicalLasso(BaseTransformer):
|
|
481
487
|
transform_kwargs = dict(
|
482
488
|
session = dataset._session,
|
483
489
|
dependencies = self._deps,
|
484
|
-
|
490
|
+
drop_input_cols = self._drop_input_cols,
|
485
491
|
expected_output_cols_type = expected_dtype,
|
486
492
|
)
|
487
493
|
|
@@ -532,7 +538,7 @@ class GraphicalLasso(BaseTransformer):
|
|
532
538
|
subproject=_SUBPROJECT,
|
533
539
|
)
|
534
540
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
535
|
-
|
541
|
+
drop_input_cols=self._drop_input_cols,
|
536
542
|
expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
|
537
543
|
)
|
538
544
|
self._sklearn_object = fitted_estimator
|
@@ -550,44 +556,6 @@ class GraphicalLasso(BaseTransformer):
|
|
550
556
|
assert self._sklearn_object is not None
|
551
557
|
return self._sklearn_object.embedding_
|
552
558
|
|
553
|
-
|
554
|
-
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
555
|
-
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
556
|
-
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
557
|
-
"""
|
558
|
-
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
559
|
-
if output_cols:
|
560
|
-
output_cols = [
|
561
|
-
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
562
|
-
for c in output_cols
|
563
|
-
]
|
564
|
-
elif getattr(self._sklearn_object, "classes_", None) is None:
|
565
|
-
output_cols = [output_cols_prefix]
|
566
|
-
elif self._sklearn_object is not None:
|
567
|
-
classes = self._sklearn_object.classes_
|
568
|
-
if isinstance(classes, numpy.ndarray):
|
569
|
-
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
570
|
-
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
571
|
-
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
572
|
-
output_cols = []
|
573
|
-
for i, cl in enumerate(classes):
|
574
|
-
# For binary classification, there is only one output column for each class
|
575
|
-
# ndarray as the two classes are complementary.
|
576
|
-
if len(cl) == 2:
|
577
|
-
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
578
|
-
else:
|
579
|
-
output_cols.extend([
|
580
|
-
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
581
|
-
])
|
582
|
-
else:
|
583
|
-
output_cols = []
|
584
|
-
|
585
|
-
# Make sure column names are valid snowflake identifiers.
|
586
|
-
assert output_cols is not None # Make MyPy happy
|
587
|
-
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
588
|
-
|
589
|
-
return rv
|
590
|
-
|
591
559
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
592
560
|
@telemetry.send_api_usage_telemetry(
|
593
561
|
project=_PROJECT,
|
@@ -627,7 +595,7 @@ class GraphicalLasso(BaseTransformer):
|
|
627
595
|
transform_kwargs = dict(
|
628
596
|
session=dataset._session,
|
629
597
|
dependencies=self._deps,
|
630
|
-
|
598
|
+
drop_input_cols = self._drop_input_cols,
|
631
599
|
expected_output_cols_type="float",
|
632
600
|
)
|
633
601
|
|
@@ -692,7 +660,7 @@ class GraphicalLasso(BaseTransformer):
|
|
692
660
|
transform_kwargs = dict(
|
693
661
|
session=dataset._session,
|
694
662
|
dependencies=self._deps,
|
695
|
-
|
663
|
+
drop_input_cols = self._drop_input_cols,
|
696
664
|
expected_output_cols_type="float",
|
697
665
|
)
|
698
666
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -753,7 +721,7 @@ class GraphicalLasso(BaseTransformer):
|
|
753
721
|
transform_kwargs = dict(
|
754
722
|
session=dataset._session,
|
755
723
|
dependencies=self._deps,
|
756
|
-
|
724
|
+
drop_input_cols = self._drop_input_cols,
|
757
725
|
expected_output_cols_type="float",
|
758
726
|
)
|
759
727
|
|
@@ -818,7 +786,7 @@ class GraphicalLasso(BaseTransformer):
|
|
818
786
|
transform_kwargs = dict(
|
819
787
|
session=dataset._session,
|
820
788
|
dependencies=self._deps,
|
821
|
-
|
789
|
+
drop_input_cols = self._drop_input_cols,
|
822
790
|
expected_output_cols_type="float",
|
823
791
|
)
|
824
792
|
|
@@ -874,13 +842,17 @@ class GraphicalLasso(BaseTransformer):
|
|
874
842
|
transform_kwargs: ScoreKwargsTypedDict = dict()
|
875
843
|
|
876
844
|
if isinstance(dataset, DataFrame):
|
845
|
+
self._deps = self._batch_inference_validate_snowpark(
|
846
|
+
dataset=dataset,
|
847
|
+
inference_method="score",
|
848
|
+
)
|
877
849
|
selected_cols = self._get_active_columns()
|
878
850
|
if len(selected_cols) > 0:
|
879
851
|
dataset = dataset.select(selected_cols)
|
880
852
|
assert isinstance(dataset._session, Session) # keep mypy happy
|
881
853
|
transform_kwargs = dict(
|
882
854
|
session=dataset._session,
|
883
|
-
dependencies=["snowflake-snowpark-python"] + self.
|
855
|
+
dependencies=["snowflake-snowpark-python"] + self._deps,
|
884
856
|
score_sproc_imports=['sklearn'],
|
885
857
|
)
|
886
858
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -954,9 +926,9 @@ class GraphicalLasso(BaseTransformer):
|
|
954
926
|
transform_kwargs = dict(
|
955
927
|
session = dataset._session,
|
956
928
|
dependencies = self._deps,
|
957
|
-
|
958
|
-
expected_output_cols_type
|
959
|
-
n_neighbors =
|
929
|
+
drop_input_cols = self._drop_input_cols,
|
930
|
+
expected_output_cols_type="array",
|
931
|
+
n_neighbors = n_neighbors,
|
960
932
|
return_distance = return_distance
|
961
933
|
)
|
962
934
|
elif isinstance(dataset, pd.DataFrame):
|