snowflake-ml-python 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/file_utils.py +3 -3
- snowflake/ml/_internal/human_readable_id/adjectives.txt +128 -0
- snowflake/ml/_internal/human_readable_id/animals.txt +128 -0
- snowflake/ml/_internal/human_readable_id/hrid_generator.py +40 -0
- snowflake/ml/_internal/human_readable_id/hrid_generator_base.py +135 -0
- snowflake/ml/_internal/telemetry.py +11 -2
- snowflake/ml/_internal/utils/formatting.py +1 -1
- snowflake/ml/feature_store/feature_store.py +15 -106
- snowflake/ml/fileset/sfcfs.py +4 -3
- snowflake/ml/fileset/stage_fs.py +18 -0
- snowflake/ml/model/_api.py +9 -9
- snowflake/ml/model/_client/model/model_version_impl.py +20 -15
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +3 -9
- snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +3 -5
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +7 -6
- snowflake/ml/model/_model_composer/model_composer.py +10 -8
- snowflake/ml/model/_model_composer/model_method/function_generator.py +1 -1
- snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +2 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +2 -2
- snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +1 -1
- snowflake/ml/model/_packager/model_handlers/_base.py +2 -2
- snowflake/ml/model/_packager/model_handlers/_utils.py +5 -5
- snowflake/ml/model/_packager/model_handlers/custom.py +7 -7
- snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +2 -2
- snowflake/ml/model/_packager/model_handlers/llm.py +1 -1
- snowflake/ml/model/_packager/model_handlers/mlflow.py +1 -1
- snowflake/ml/model/_packager/model_handlers/pytorch.py +13 -10
- snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +214 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +6 -6
- snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +15 -3
- snowflake/ml/model/_packager/model_handlers/tensorflow.py +8 -8
- snowflake/ml/model/_packager/model_handlers/torchscript.py +7 -7
- snowflake/ml/model/_packager/model_handlers/xgboost.py +8 -8
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_packager.py +8 -6
- snowflake/ml/model/custom_model.py +3 -1
- snowflake/ml/model/type_hints.py +13 -0
- snowflake/ml/modeling/_internal/estimator_utils.py +61 -1
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -43
- snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +4 -4
- snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +21 -17
- snowflake/ml/modeling/_internal/model_specifications.py +3 -1
- snowflake/ml/modeling/_internal/model_trainer.py +2 -2
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +547 -1
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +67 -114
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +9 -9
- snowflake/ml/modeling/_internal/transformer_protocols.py +2 -3
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +33 -61
- snowflake/ml/modeling/cluster/affinity_propagation.py +33 -61
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +33 -61
- snowflake/ml/modeling/cluster/birch.py +33 -61
- snowflake/ml/modeling/cluster/bisecting_k_means.py +33 -61
- snowflake/ml/modeling/cluster/dbscan.py +33 -61
- snowflake/ml/modeling/cluster/feature_agglomeration.py +33 -61
- snowflake/ml/modeling/cluster/k_means.py +33 -61
- snowflake/ml/modeling/cluster/mean_shift.py +33 -61
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +33 -61
- snowflake/ml/modeling/cluster/optics.py +33 -61
- snowflake/ml/modeling/cluster/spectral_biclustering.py +33 -61
- snowflake/ml/modeling/cluster/spectral_clustering.py +33 -61
- snowflake/ml/modeling/cluster/spectral_coclustering.py +33 -61
- snowflake/ml/modeling/compose/column_transformer.py +33 -61
- snowflake/ml/modeling/compose/transformed_target_regressor.py +33 -61
- snowflake/ml/modeling/covariance/elliptic_envelope.py +33 -61
- snowflake/ml/modeling/covariance/empirical_covariance.py +33 -61
- snowflake/ml/modeling/covariance/graphical_lasso.py +33 -61
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +33 -61
- snowflake/ml/modeling/covariance/ledoit_wolf.py +33 -61
- snowflake/ml/modeling/covariance/min_cov_det.py +33 -61
- snowflake/ml/modeling/covariance/oas.py +33 -61
- snowflake/ml/modeling/covariance/shrunk_covariance.py +33 -61
- snowflake/ml/modeling/decomposition/dictionary_learning.py +33 -61
- snowflake/ml/modeling/decomposition/factor_analysis.py +33 -61
- snowflake/ml/modeling/decomposition/fast_ica.py +33 -61
- snowflake/ml/modeling/decomposition/incremental_pca.py +33 -61
- snowflake/ml/modeling/decomposition/kernel_pca.py +33 -61
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +33 -61
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +33 -61
- snowflake/ml/modeling/decomposition/pca.py +33 -61
- snowflake/ml/modeling/decomposition/sparse_pca.py +33 -61
- snowflake/ml/modeling/decomposition/truncated_svd.py +33 -61
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +33 -61
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +33 -61
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/bagging_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/bagging_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/isolation_forest.py +33 -61
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/stacking_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/voting_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/voting_regressor.py +33 -61
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +33 -61
- snowflake/ml/modeling/feature_selection/select_fdr.py +33 -61
- snowflake/ml/modeling/feature_selection/select_fpr.py +33 -61
- snowflake/ml/modeling/feature_selection/select_fwe.py +33 -61
- snowflake/ml/modeling/feature_selection/select_k_best.py +33 -61
- snowflake/ml/modeling/feature_selection/select_percentile.py +33 -61
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +33 -61
- snowflake/ml/modeling/feature_selection/variance_threshold.py +33 -61
- snowflake/ml/modeling/framework/base.py +55 -5
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +33 -61
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +33 -61
- snowflake/ml/modeling/impute/iterative_imputer.py +33 -61
- snowflake/ml/modeling/impute/knn_imputer.py +33 -61
- snowflake/ml/modeling/impute/missing_indicator.py +33 -61
- snowflake/ml/modeling/impute/simple_imputer.py +4 -15
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +33 -61
- snowflake/ml/modeling/kernel_approximation/nystroem.py +33 -61
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +33 -61
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +33 -61
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +33 -61
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +33 -61
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +36 -63
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +36 -63
- snowflake/ml/modeling/linear_model/ard_regression.py +33 -61
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +33 -61
- snowflake/ml/modeling/linear_model/elastic_net.py +33 -61
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +33 -61
- snowflake/ml/modeling/linear_model/gamma_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/huber_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/lars.py +33 -61
- snowflake/ml/modeling/linear_model/lars_cv.py +33 -61
- snowflake/ml/modeling/linear_model/lasso.py +33 -61
- snowflake/ml/modeling/linear_model/lasso_cv.py +33 -61
- snowflake/ml/modeling/linear_model/lasso_lars.py +33 -61
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +33 -61
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +33 -61
- snowflake/ml/modeling/linear_model/linear_regression.py +33 -61
- snowflake/ml/modeling/linear_model/logistic_regression.py +33 -61
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +33 -61
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +33 -61
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +33 -61
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +33 -61
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +33 -61
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +33 -61
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +33 -61
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/perceptron.py +33 -61
- snowflake/ml/modeling/linear_model/poisson_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/ransac_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/ridge.py +33 -61
- snowflake/ml/modeling/linear_model/ridge_classifier.py +33 -61
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +33 -61
- snowflake/ml/modeling/linear_model/ridge_cv.py +33 -61
- snowflake/ml/modeling/linear_model/sgd_classifier.py +33 -61
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +33 -61
- snowflake/ml/modeling/linear_model/sgd_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +33 -61
- snowflake/ml/modeling/manifold/isomap.py +33 -61
- snowflake/ml/modeling/manifold/mds.py +33 -61
- snowflake/ml/modeling/manifold/spectral_embedding.py +33 -61
- snowflake/ml/modeling/manifold/tsne.py +33 -61
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +33 -61
- snowflake/ml/modeling/mixture/gaussian_mixture.py +33 -61
- snowflake/ml/modeling/model_selection/grid_search_cv.py +39 -57
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +26 -57
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +33 -61
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +33 -61
- snowflake/ml/modeling/multiclass/output_code_classifier.py +33 -61
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +33 -61
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +33 -61
- snowflake/ml/modeling/naive_bayes/complement_nb.py +33 -61
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +33 -61
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +33 -61
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +33 -61
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +33 -61
- snowflake/ml/modeling/neighbors/kernel_density.py +33 -61
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +33 -61
- snowflake/ml/modeling/neighbors/nearest_centroid.py +33 -61
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +33 -61
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +33 -61
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +33 -61
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +33 -61
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +33 -61
- snowflake/ml/modeling/neural_network/mlp_classifier.py +33 -61
- snowflake/ml/modeling/neural_network/mlp_regressor.py +33 -61
- snowflake/ml/modeling/preprocessing/polynomial_features.py +33 -61
- snowflake/ml/modeling/semi_supervised/label_propagation.py +33 -61
- snowflake/ml/modeling/semi_supervised/label_spreading.py +33 -61
- snowflake/ml/modeling/svm/linear_svc.py +33 -61
- snowflake/ml/modeling/svm/linear_svr.py +33 -61
- snowflake/ml/modeling/svm/nu_svc.py +33 -61
- snowflake/ml/modeling/svm/nu_svr.py +33 -61
- snowflake/ml/modeling/svm/svc.py +33 -61
- snowflake/ml/modeling/svm/svr.py +33 -61
- snowflake/ml/modeling/tree/decision_tree_classifier.py +33 -61
- snowflake/ml/modeling/tree/decision_tree_regressor.py +33 -61
- snowflake/ml/modeling/tree/extra_tree_classifier.py +33 -61
- snowflake/ml/modeling/tree/extra_tree_regressor.py +33 -61
- snowflake/ml/modeling/xgboost/xgb_classifier.py +33 -61
- snowflake/ml/modeling/xgboost/xgb_regressor.py +33 -61
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +33 -61
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +33 -61
- snowflake/ml/registry/_manager/model_manager.py +6 -2
- snowflake/ml/registry/model_registry.py +100 -27
- snowflake/ml/registry/registry.py +6 -2
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/METADATA +43 -7
- {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/RECORD +211 -206
- {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/top_level.txt +0 -0
@@ -324,18 +324,24 @@ class IsolationForest(BaseTransformer):
|
|
324
324
|
self._get_model_signatures(dataset)
|
325
325
|
return self
|
326
326
|
|
327
|
-
def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
|
328
|
-
if self._drop_input_cols:
|
329
|
-
return []
|
330
|
-
else:
|
331
|
-
return list(set(dataset.columns) - set(self.output_cols))
|
332
|
-
|
333
327
|
def _batch_inference_validate_snowpark(
|
334
328
|
self,
|
335
329
|
dataset: DataFrame,
|
336
330
|
inference_method: str,
|
337
331
|
) -> List[str]:
|
338
|
-
"""Util method to run validate that batch inference can be run on a snowpark dataframe
|
332
|
+
"""Util method to run validate that batch inference can be run on a snowpark dataframe and
|
333
|
+
return the available package that exists in the snowflake anaconda channel
|
334
|
+
|
335
|
+
Args:
|
336
|
+
dataset: snowpark dataframe
|
337
|
+
inference_method: the inference method such as predict, score...
|
338
|
+
|
339
|
+
Raises:
|
340
|
+
SnowflakeMLException: If the estimator is not fitted, raise error
|
341
|
+
SnowflakeMLException: If the session is None, raise error
|
342
|
+
|
343
|
+
Returns:
|
344
|
+
A list of available package that exists in the snowflake anaconda channel
|
339
345
|
"""
|
340
346
|
if not self._is_fitted:
|
341
347
|
raise exceptions.SnowflakeMLException(
|
@@ -409,7 +415,7 @@ class IsolationForest(BaseTransformer):
|
|
409
415
|
transform_kwargs = dict(
|
410
416
|
session = dataset._session,
|
411
417
|
dependencies = self._deps,
|
412
|
-
|
418
|
+
drop_input_cols = self._drop_input_cols,
|
413
419
|
expected_output_cols_type = expected_type_inferred,
|
414
420
|
)
|
415
421
|
|
@@ -469,16 +475,16 @@ class IsolationForest(BaseTransformer):
|
|
469
475
|
# from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
|
470
476
|
# based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
|
471
477
|
# each row containing a list of values.
|
472
|
-
expected_dtype = "
|
478
|
+
expected_dtype = "array"
|
473
479
|
|
474
480
|
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
475
481
|
if expected_dtype == "":
|
476
|
-
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "
|
482
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
|
477
483
|
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
478
|
-
expected_dtype = "
|
479
|
-
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "
|
484
|
+
expected_dtype = "array"
|
485
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
|
480
486
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
481
|
-
expected_dtype = "
|
487
|
+
expected_dtype = "array"
|
482
488
|
else:
|
483
489
|
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
484
490
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
@@ -496,7 +502,7 @@ class IsolationForest(BaseTransformer):
|
|
496
502
|
transform_kwargs = dict(
|
497
503
|
session = dataset._session,
|
498
504
|
dependencies = self._deps,
|
499
|
-
|
505
|
+
drop_input_cols = self._drop_input_cols,
|
500
506
|
expected_output_cols_type = expected_dtype,
|
501
507
|
)
|
502
508
|
|
@@ -549,7 +555,7 @@ class IsolationForest(BaseTransformer):
|
|
549
555
|
subproject=_SUBPROJECT,
|
550
556
|
)
|
551
557
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
552
|
-
|
558
|
+
drop_input_cols=self._drop_input_cols,
|
553
559
|
expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
|
554
560
|
)
|
555
561
|
self._sklearn_object = fitted_estimator
|
@@ -567,44 +573,6 @@ class IsolationForest(BaseTransformer):
|
|
567
573
|
assert self._sklearn_object is not None
|
568
574
|
return self._sklearn_object.embedding_
|
569
575
|
|
570
|
-
|
571
|
-
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
572
|
-
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
573
|
-
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
574
|
-
"""
|
575
|
-
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
576
|
-
if output_cols:
|
577
|
-
output_cols = [
|
578
|
-
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
579
|
-
for c in output_cols
|
580
|
-
]
|
581
|
-
elif getattr(self._sklearn_object, "classes_", None) is None:
|
582
|
-
output_cols = [output_cols_prefix]
|
583
|
-
elif self._sklearn_object is not None:
|
584
|
-
classes = self._sklearn_object.classes_
|
585
|
-
if isinstance(classes, numpy.ndarray):
|
586
|
-
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
587
|
-
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
588
|
-
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
589
|
-
output_cols = []
|
590
|
-
for i, cl in enumerate(classes):
|
591
|
-
# For binary classification, there is only one output column for each class
|
592
|
-
# ndarray as the two classes are complementary.
|
593
|
-
if len(cl) == 2:
|
594
|
-
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
595
|
-
else:
|
596
|
-
output_cols.extend([
|
597
|
-
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
598
|
-
])
|
599
|
-
else:
|
600
|
-
output_cols = []
|
601
|
-
|
602
|
-
# Make sure column names are valid snowflake identifiers.
|
603
|
-
assert output_cols is not None # Make MyPy happy
|
604
|
-
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
605
|
-
|
606
|
-
return rv
|
607
|
-
|
608
576
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
609
577
|
@telemetry.send_api_usage_telemetry(
|
610
578
|
project=_PROJECT,
|
@@ -644,7 +612,7 @@ class IsolationForest(BaseTransformer):
|
|
644
612
|
transform_kwargs = dict(
|
645
613
|
session=dataset._session,
|
646
614
|
dependencies=self._deps,
|
647
|
-
|
615
|
+
drop_input_cols = self._drop_input_cols,
|
648
616
|
expected_output_cols_type="float",
|
649
617
|
)
|
650
618
|
|
@@ -709,7 +677,7 @@ class IsolationForest(BaseTransformer):
|
|
709
677
|
transform_kwargs = dict(
|
710
678
|
session=dataset._session,
|
711
679
|
dependencies=self._deps,
|
712
|
-
|
680
|
+
drop_input_cols = self._drop_input_cols,
|
713
681
|
expected_output_cols_type="float",
|
714
682
|
)
|
715
683
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -772,7 +740,7 @@ class IsolationForest(BaseTransformer):
|
|
772
740
|
transform_kwargs = dict(
|
773
741
|
session=dataset._session,
|
774
742
|
dependencies=self._deps,
|
775
|
-
|
743
|
+
drop_input_cols = self._drop_input_cols,
|
776
744
|
expected_output_cols_type="float",
|
777
745
|
)
|
778
746
|
|
@@ -839,7 +807,7 @@ class IsolationForest(BaseTransformer):
|
|
839
807
|
transform_kwargs = dict(
|
840
808
|
session=dataset._session,
|
841
809
|
dependencies=self._deps,
|
842
|
-
|
810
|
+
drop_input_cols = self._drop_input_cols,
|
843
811
|
expected_output_cols_type="float",
|
844
812
|
)
|
845
813
|
|
@@ -893,13 +861,17 @@ class IsolationForest(BaseTransformer):
|
|
893
861
|
transform_kwargs: ScoreKwargsTypedDict = dict()
|
894
862
|
|
895
863
|
if isinstance(dataset, DataFrame):
|
864
|
+
self._deps = self._batch_inference_validate_snowpark(
|
865
|
+
dataset=dataset,
|
866
|
+
inference_method="score",
|
867
|
+
)
|
896
868
|
selected_cols = self._get_active_columns()
|
897
869
|
if len(selected_cols) > 0:
|
898
870
|
dataset = dataset.select(selected_cols)
|
899
871
|
assert isinstance(dataset._session, Session) # keep mypy happy
|
900
872
|
transform_kwargs = dict(
|
901
873
|
session=dataset._session,
|
902
|
-
dependencies=["snowflake-snowpark-python"] + self.
|
874
|
+
dependencies=["snowflake-snowpark-python"] + self._deps,
|
903
875
|
score_sproc_imports=['sklearn'],
|
904
876
|
)
|
905
877
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -973,9 +945,9 @@ class IsolationForest(BaseTransformer):
|
|
973
945
|
transform_kwargs = dict(
|
974
946
|
session = dataset._session,
|
975
947
|
dependencies = self._deps,
|
976
|
-
|
977
|
-
expected_output_cols_type
|
978
|
-
n_neighbors =
|
948
|
+
drop_input_cols = self._drop_input_cols,
|
949
|
+
expected_output_cols_type="array",
|
950
|
+
n_neighbors = n_neighbors,
|
979
951
|
return_distance = return_distance
|
980
952
|
)
|
981
953
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -436,18 +436,24 @@ class RandomForestClassifier(BaseTransformer):
|
|
436
436
|
self._get_model_signatures(dataset)
|
437
437
|
return self
|
438
438
|
|
439
|
-
def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
|
440
|
-
if self._drop_input_cols:
|
441
|
-
return []
|
442
|
-
else:
|
443
|
-
return list(set(dataset.columns) - set(self.output_cols))
|
444
|
-
|
445
439
|
def _batch_inference_validate_snowpark(
|
446
440
|
self,
|
447
441
|
dataset: DataFrame,
|
448
442
|
inference_method: str,
|
449
443
|
) -> List[str]:
|
450
|
-
"""Util method to run validate that batch inference can be run on a snowpark dataframe
|
444
|
+
"""Util method to run validate that batch inference can be run on a snowpark dataframe and
|
445
|
+
return the available package that exists in the snowflake anaconda channel
|
446
|
+
|
447
|
+
Args:
|
448
|
+
dataset: snowpark dataframe
|
449
|
+
inference_method: the inference method such as predict, score...
|
450
|
+
|
451
|
+
Raises:
|
452
|
+
SnowflakeMLException: If the estimator is not fitted, raise error
|
453
|
+
SnowflakeMLException: If the session is None, raise error
|
454
|
+
|
455
|
+
Returns:
|
456
|
+
A list of available package that exists in the snowflake anaconda channel
|
451
457
|
"""
|
452
458
|
if not self._is_fitted:
|
453
459
|
raise exceptions.SnowflakeMLException(
|
@@ -521,7 +527,7 @@ class RandomForestClassifier(BaseTransformer):
|
|
521
527
|
transform_kwargs = dict(
|
522
528
|
session = dataset._session,
|
523
529
|
dependencies = self._deps,
|
524
|
-
|
530
|
+
drop_input_cols = self._drop_input_cols,
|
525
531
|
expected_output_cols_type = expected_type_inferred,
|
526
532
|
)
|
527
533
|
|
@@ -581,16 +587,16 @@ class RandomForestClassifier(BaseTransformer):
|
|
581
587
|
# from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
|
582
588
|
# based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
|
583
589
|
# each row containing a list of values.
|
584
|
-
expected_dtype = "
|
590
|
+
expected_dtype = "array"
|
585
591
|
|
586
592
|
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
587
593
|
if expected_dtype == "":
|
588
|
-
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "
|
594
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
|
589
595
|
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
590
|
-
expected_dtype = "
|
591
|
-
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "
|
596
|
+
expected_dtype = "array"
|
597
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
|
592
598
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
593
|
-
expected_dtype = "
|
599
|
+
expected_dtype = "array"
|
594
600
|
else:
|
595
601
|
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
596
602
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
@@ -608,7 +614,7 @@ class RandomForestClassifier(BaseTransformer):
|
|
608
614
|
transform_kwargs = dict(
|
609
615
|
session = dataset._session,
|
610
616
|
dependencies = self._deps,
|
611
|
-
|
617
|
+
drop_input_cols = self._drop_input_cols,
|
612
618
|
expected_output_cols_type = expected_dtype,
|
613
619
|
)
|
614
620
|
|
@@ -659,7 +665,7 @@ class RandomForestClassifier(BaseTransformer):
|
|
659
665
|
subproject=_SUBPROJECT,
|
660
666
|
)
|
661
667
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
662
|
-
|
668
|
+
drop_input_cols=self._drop_input_cols,
|
663
669
|
expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
|
664
670
|
)
|
665
671
|
self._sklearn_object = fitted_estimator
|
@@ -677,44 +683,6 @@ class RandomForestClassifier(BaseTransformer):
|
|
677
683
|
assert self._sklearn_object is not None
|
678
684
|
return self._sklearn_object.embedding_
|
679
685
|
|
680
|
-
|
681
|
-
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
682
|
-
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
683
|
-
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
684
|
-
"""
|
685
|
-
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
686
|
-
if output_cols:
|
687
|
-
output_cols = [
|
688
|
-
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
689
|
-
for c in output_cols
|
690
|
-
]
|
691
|
-
elif getattr(self._sklearn_object, "classes_", None) is None:
|
692
|
-
output_cols = [output_cols_prefix]
|
693
|
-
elif self._sklearn_object is not None:
|
694
|
-
classes = self._sklearn_object.classes_
|
695
|
-
if isinstance(classes, numpy.ndarray):
|
696
|
-
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
697
|
-
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
698
|
-
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
699
|
-
output_cols = []
|
700
|
-
for i, cl in enumerate(classes):
|
701
|
-
# For binary classification, there is only one output column for each class
|
702
|
-
# ndarray as the two classes are complementary.
|
703
|
-
if len(cl) == 2:
|
704
|
-
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
705
|
-
else:
|
706
|
-
output_cols.extend([
|
707
|
-
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
708
|
-
])
|
709
|
-
else:
|
710
|
-
output_cols = []
|
711
|
-
|
712
|
-
# Make sure column names are valid snowflake identifiers.
|
713
|
-
assert output_cols is not None # Make MyPy happy
|
714
|
-
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
715
|
-
|
716
|
-
return rv
|
717
|
-
|
718
686
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
719
687
|
@telemetry.send_api_usage_telemetry(
|
720
688
|
project=_PROJECT,
|
@@ -756,7 +724,7 @@ class RandomForestClassifier(BaseTransformer):
|
|
756
724
|
transform_kwargs = dict(
|
757
725
|
session=dataset._session,
|
758
726
|
dependencies=self._deps,
|
759
|
-
|
727
|
+
drop_input_cols = self._drop_input_cols,
|
760
728
|
expected_output_cols_type="float",
|
761
729
|
)
|
762
730
|
|
@@ -823,7 +791,7 @@ class RandomForestClassifier(BaseTransformer):
|
|
823
791
|
transform_kwargs = dict(
|
824
792
|
session=dataset._session,
|
825
793
|
dependencies=self._deps,
|
826
|
-
|
794
|
+
drop_input_cols = self._drop_input_cols,
|
827
795
|
expected_output_cols_type="float",
|
828
796
|
)
|
829
797
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -884,7 +852,7 @@ class RandomForestClassifier(BaseTransformer):
|
|
884
852
|
transform_kwargs = dict(
|
885
853
|
session=dataset._session,
|
886
854
|
dependencies=self._deps,
|
887
|
-
|
855
|
+
drop_input_cols = self._drop_input_cols,
|
888
856
|
expected_output_cols_type="float",
|
889
857
|
)
|
890
858
|
|
@@ -949,7 +917,7 @@ class RandomForestClassifier(BaseTransformer):
|
|
949
917
|
transform_kwargs = dict(
|
950
918
|
session=dataset._session,
|
951
919
|
dependencies=self._deps,
|
952
|
-
|
920
|
+
drop_input_cols = self._drop_input_cols,
|
953
921
|
expected_output_cols_type="float",
|
954
922
|
)
|
955
923
|
|
@@ -1005,13 +973,17 @@ class RandomForestClassifier(BaseTransformer):
|
|
1005
973
|
transform_kwargs: ScoreKwargsTypedDict = dict()
|
1006
974
|
|
1007
975
|
if isinstance(dataset, DataFrame):
|
976
|
+
self._deps = self._batch_inference_validate_snowpark(
|
977
|
+
dataset=dataset,
|
978
|
+
inference_method="score",
|
979
|
+
)
|
1008
980
|
selected_cols = self._get_active_columns()
|
1009
981
|
if len(selected_cols) > 0:
|
1010
982
|
dataset = dataset.select(selected_cols)
|
1011
983
|
assert isinstance(dataset._session, Session) # keep mypy happy
|
1012
984
|
transform_kwargs = dict(
|
1013
985
|
session=dataset._session,
|
1014
|
-
dependencies=["snowflake-snowpark-python"] + self.
|
986
|
+
dependencies=["snowflake-snowpark-python"] + self._deps,
|
1015
987
|
score_sproc_imports=['sklearn'],
|
1016
988
|
)
|
1017
989
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -1085,9 +1057,9 @@ class RandomForestClassifier(BaseTransformer):
|
|
1085
1057
|
transform_kwargs = dict(
|
1086
1058
|
session = dataset._session,
|
1087
1059
|
dependencies = self._deps,
|
1088
|
-
|
1089
|
-
expected_output_cols_type
|
1090
|
-
n_neighbors =
|
1060
|
+
drop_input_cols = self._drop_input_cols,
|
1061
|
+
expected_output_cols_type="array",
|
1062
|
+
n_neighbors = n_neighbors,
|
1091
1063
|
return_distance = return_distance
|
1092
1064
|
)
|
1093
1065
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -415,18 +415,24 @@ class RandomForestRegressor(BaseTransformer):
|
|
415
415
|
self._get_model_signatures(dataset)
|
416
416
|
return self
|
417
417
|
|
418
|
-
def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
|
419
|
-
if self._drop_input_cols:
|
420
|
-
return []
|
421
|
-
else:
|
422
|
-
return list(set(dataset.columns) - set(self.output_cols))
|
423
|
-
|
424
418
|
def _batch_inference_validate_snowpark(
|
425
419
|
self,
|
426
420
|
dataset: DataFrame,
|
427
421
|
inference_method: str,
|
428
422
|
) -> List[str]:
|
429
|
-
"""Util method to run validate that batch inference can be run on a snowpark dataframe
|
423
|
+
"""Util method to run validate that batch inference can be run on a snowpark dataframe and
|
424
|
+
return the available package that exists in the snowflake anaconda channel
|
425
|
+
|
426
|
+
Args:
|
427
|
+
dataset: snowpark dataframe
|
428
|
+
inference_method: the inference method such as predict, score...
|
429
|
+
|
430
|
+
Raises:
|
431
|
+
SnowflakeMLException: If the estimator is not fitted, raise error
|
432
|
+
SnowflakeMLException: If the session is None, raise error
|
433
|
+
|
434
|
+
Returns:
|
435
|
+
A list of available package that exists in the snowflake anaconda channel
|
430
436
|
"""
|
431
437
|
if not self._is_fitted:
|
432
438
|
raise exceptions.SnowflakeMLException(
|
@@ -500,7 +506,7 @@ class RandomForestRegressor(BaseTransformer):
|
|
500
506
|
transform_kwargs = dict(
|
501
507
|
session = dataset._session,
|
502
508
|
dependencies = self._deps,
|
503
|
-
|
509
|
+
drop_input_cols = self._drop_input_cols,
|
504
510
|
expected_output_cols_type = expected_type_inferred,
|
505
511
|
)
|
506
512
|
|
@@ -560,16 +566,16 @@ class RandomForestRegressor(BaseTransformer):
|
|
560
566
|
# from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
|
561
567
|
# based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
|
562
568
|
# each row containing a list of values.
|
563
|
-
expected_dtype = "
|
569
|
+
expected_dtype = "array"
|
564
570
|
|
565
571
|
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
566
572
|
if expected_dtype == "":
|
567
|
-
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "
|
573
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
|
568
574
|
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
569
|
-
expected_dtype = "
|
570
|
-
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "
|
575
|
+
expected_dtype = "array"
|
576
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
|
571
577
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
572
|
-
expected_dtype = "
|
578
|
+
expected_dtype = "array"
|
573
579
|
else:
|
574
580
|
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
575
581
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
@@ -587,7 +593,7 @@ class RandomForestRegressor(BaseTransformer):
|
|
587
593
|
transform_kwargs = dict(
|
588
594
|
session = dataset._session,
|
589
595
|
dependencies = self._deps,
|
590
|
-
|
596
|
+
drop_input_cols = self._drop_input_cols,
|
591
597
|
expected_output_cols_type = expected_dtype,
|
592
598
|
)
|
593
599
|
|
@@ -638,7 +644,7 @@ class RandomForestRegressor(BaseTransformer):
|
|
638
644
|
subproject=_SUBPROJECT,
|
639
645
|
)
|
640
646
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
641
|
-
|
647
|
+
drop_input_cols=self._drop_input_cols,
|
642
648
|
expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
|
643
649
|
)
|
644
650
|
self._sklearn_object = fitted_estimator
|
@@ -656,44 +662,6 @@ class RandomForestRegressor(BaseTransformer):
|
|
656
662
|
assert self._sklearn_object is not None
|
657
663
|
return self._sklearn_object.embedding_
|
658
664
|
|
659
|
-
|
660
|
-
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
661
|
-
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
662
|
-
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
663
|
-
"""
|
664
|
-
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
665
|
-
if output_cols:
|
666
|
-
output_cols = [
|
667
|
-
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
668
|
-
for c in output_cols
|
669
|
-
]
|
670
|
-
elif getattr(self._sklearn_object, "classes_", None) is None:
|
671
|
-
output_cols = [output_cols_prefix]
|
672
|
-
elif self._sklearn_object is not None:
|
673
|
-
classes = self._sklearn_object.classes_
|
674
|
-
if isinstance(classes, numpy.ndarray):
|
675
|
-
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
676
|
-
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
677
|
-
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
678
|
-
output_cols = []
|
679
|
-
for i, cl in enumerate(classes):
|
680
|
-
# For binary classification, there is only one output column for each class
|
681
|
-
# ndarray as the two classes are complementary.
|
682
|
-
if len(cl) == 2:
|
683
|
-
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
684
|
-
else:
|
685
|
-
output_cols.extend([
|
686
|
-
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
687
|
-
])
|
688
|
-
else:
|
689
|
-
output_cols = []
|
690
|
-
|
691
|
-
# Make sure column names are valid snowflake identifiers.
|
692
|
-
assert output_cols is not None # Make MyPy happy
|
693
|
-
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
694
|
-
|
695
|
-
return rv
|
696
|
-
|
697
665
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
698
666
|
@telemetry.send_api_usage_telemetry(
|
699
667
|
project=_PROJECT,
|
@@ -733,7 +701,7 @@ class RandomForestRegressor(BaseTransformer):
|
|
733
701
|
transform_kwargs = dict(
|
734
702
|
session=dataset._session,
|
735
703
|
dependencies=self._deps,
|
736
|
-
|
704
|
+
drop_input_cols = self._drop_input_cols,
|
737
705
|
expected_output_cols_type="float",
|
738
706
|
)
|
739
707
|
|
@@ -798,7 +766,7 @@ class RandomForestRegressor(BaseTransformer):
|
|
798
766
|
transform_kwargs = dict(
|
799
767
|
session=dataset._session,
|
800
768
|
dependencies=self._deps,
|
801
|
-
|
769
|
+
drop_input_cols = self._drop_input_cols,
|
802
770
|
expected_output_cols_type="float",
|
803
771
|
)
|
804
772
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -859,7 +827,7 @@ class RandomForestRegressor(BaseTransformer):
|
|
859
827
|
transform_kwargs = dict(
|
860
828
|
session=dataset._session,
|
861
829
|
dependencies=self._deps,
|
862
|
-
|
830
|
+
drop_input_cols = self._drop_input_cols,
|
863
831
|
expected_output_cols_type="float",
|
864
832
|
)
|
865
833
|
|
@@ -924,7 +892,7 @@ class RandomForestRegressor(BaseTransformer):
|
|
924
892
|
transform_kwargs = dict(
|
925
893
|
session=dataset._session,
|
926
894
|
dependencies=self._deps,
|
927
|
-
|
895
|
+
drop_input_cols = self._drop_input_cols,
|
928
896
|
expected_output_cols_type="float",
|
929
897
|
)
|
930
898
|
|
@@ -980,13 +948,17 @@ class RandomForestRegressor(BaseTransformer):
|
|
980
948
|
transform_kwargs: ScoreKwargsTypedDict = dict()
|
981
949
|
|
982
950
|
if isinstance(dataset, DataFrame):
|
951
|
+
self._deps = self._batch_inference_validate_snowpark(
|
952
|
+
dataset=dataset,
|
953
|
+
inference_method="score",
|
954
|
+
)
|
983
955
|
selected_cols = self._get_active_columns()
|
984
956
|
if len(selected_cols) > 0:
|
985
957
|
dataset = dataset.select(selected_cols)
|
986
958
|
assert isinstance(dataset._session, Session) # keep mypy happy
|
987
959
|
transform_kwargs = dict(
|
988
960
|
session=dataset._session,
|
989
|
-
dependencies=["snowflake-snowpark-python"] + self.
|
961
|
+
dependencies=["snowflake-snowpark-python"] + self._deps,
|
990
962
|
score_sproc_imports=['sklearn'],
|
991
963
|
)
|
992
964
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -1060,9 +1032,9 @@ class RandomForestRegressor(BaseTransformer):
|
|
1060
1032
|
transform_kwargs = dict(
|
1061
1033
|
session = dataset._session,
|
1062
1034
|
dependencies = self._deps,
|
1063
|
-
|
1064
|
-
expected_output_cols_type
|
1065
|
-
n_neighbors =
|
1035
|
+
drop_input_cols = self._drop_input_cols,
|
1036
|
+
expected_output_cols_type="array",
|
1037
|
+
n_neighbors = n_neighbors,
|
1066
1038
|
return_distance = return_distance
|
1067
1039
|
)
|
1068
1040
|
elif isinstance(dataset, pd.DataFrame):
|