snowflake-ml-python 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/file_utils.py +3 -3
- snowflake/ml/_internal/human_readable_id/adjectives.txt +128 -0
- snowflake/ml/_internal/human_readable_id/animals.txt +128 -0
- snowflake/ml/_internal/human_readable_id/hrid_generator.py +40 -0
- snowflake/ml/_internal/human_readable_id/hrid_generator_base.py +135 -0
- snowflake/ml/_internal/telemetry.py +11 -2
- snowflake/ml/_internal/utils/formatting.py +1 -1
- snowflake/ml/feature_store/feature_store.py +15 -106
- snowflake/ml/fileset/sfcfs.py +4 -3
- snowflake/ml/fileset/stage_fs.py +18 -0
- snowflake/ml/model/_api.py +9 -9
- snowflake/ml/model/_client/model/model_version_impl.py +20 -15
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +3 -9
- snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +3 -5
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +7 -6
- snowflake/ml/model/_model_composer/model_composer.py +10 -8
- snowflake/ml/model/_model_composer/model_method/function_generator.py +1 -1
- snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +2 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +2 -2
- snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +1 -1
- snowflake/ml/model/_packager/model_handlers/_base.py +2 -2
- snowflake/ml/model/_packager/model_handlers/_utils.py +5 -5
- snowflake/ml/model/_packager/model_handlers/custom.py +7 -7
- snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +2 -2
- snowflake/ml/model/_packager/model_handlers/llm.py +1 -1
- snowflake/ml/model/_packager/model_handlers/mlflow.py +1 -1
- snowflake/ml/model/_packager/model_handlers/pytorch.py +13 -10
- snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +214 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +6 -6
- snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +15 -3
- snowflake/ml/model/_packager/model_handlers/tensorflow.py +8 -8
- snowflake/ml/model/_packager/model_handlers/torchscript.py +7 -7
- snowflake/ml/model/_packager/model_handlers/xgboost.py +8 -8
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_packager.py +8 -6
- snowflake/ml/model/custom_model.py +3 -1
- snowflake/ml/model/type_hints.py +13 -0
- snowflake/ml/modeling/_internal/estimator_utils.py +61 -1
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -43
- snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +4 -4
- snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +21 -17
- snowflake/ml/modeling/_internal/model_specifications.py +3 -1
- snowflake/ml/modeling/_internal/model_trainer.py +2 -2
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +547 -1
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +67 -114
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +9 -9
- snowflake/ml/modeling/_internal/transformer_protocols.py +2 -3
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +33 -61
- snowflake/ml/modeling/cluster/affinity_propagation.py +33 -61
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +33 -61
- snowflake/ml/modeling/cluster/birch.py +33 -61
- snowflake/ml/modeling/cluster/bisecting_k_means.py +33 -61
- snowflake/ml/modeling/cluster/dbscan.py +33 -61
- snowflake/ml/modeling/cluster/feature_agglomeration.py +33 -61
- snowflake/ml/modeling/cluster/k_means.py +33 -61
- snowflake/ml/modeling/cluster/mean_shift.py +33 -61
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +33 -61
- snowflake/ml/modeling/cluster/optics.py +33 -61
- snowflake/ml/modeling/cluster/spectral_biclustering.py +33 -61
- snowflake/ml/modeling/cluster/spectral_clustering.py +33 -61
- snowflake/ml/modeling/cluster/spectral_coclustering.py +33 -61
- snowflake/ml/modeling/compose/column_transformer.py +33 -61
- snowflake/ml/modeling/compose/transformed_target_regressor.py +33 -61
- snowflake/ml/modeling/covariance/elliptic_envelope.py +33 -61
- snowflake/ml/modeling/covariance/empirical_covariance.py +33 -61
- snowflake/ml/modeling/covariance/graphical_lasso.py +33 -61
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +33 -61
- snowflake/ml/modeling/covariance/ledoit_wolf.py +33 -61
- snowflake/ml/modeling/covariance/min_cov_det.py +33 -61
- snowflake/ml/modeling/covariance/oas.py +33 -61
- snowflake/ml/modeling/covariance/shrunk_covariance.py +33 -61
- snowflake/ml/modeling/decomposition/dictionary_learning.py +33 -61
- snowflake/ml/modeling/decomposition/factor_analysis.py +33 -61
- snowflake/ml/modeling/decomposition/fast_ica.py +33 -61
- snowflake/ml/modeling/decomposition/incremental_pca.py +33 -61
- snowflake/ml/modeling/decomposition/kernel_pca.py +33 -61
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +33 -61
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +33 -61
- snowflake/ml/modeling/decomposition/pca.py +33 -61
- snowflake/ml/modeling/decomposition/sparse_pca.py +33 -61
- snowflake/ml/modeling/decomposition/truncated_svd.py +33 -61
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +33 -61
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +33 -61
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/bagging_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/bagging_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/isolation_forest.py +33 -61
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/stacking_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/voting_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/voting_regressor.py +33 -61
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +33 -61
- snowflake/ml/modeling/feature_selection/select_fdr.py +33 -61
- snowflake/ml/modeling/feature_selection/select_fpr.py +33 -61
- snowflake/ml/modeling/feature_selection/select_fwe.py +33 -61
- snowflake/ml/modeling/feature_selection/select_k_best.py +33 -61
- snowflake/ml/modeling/feature_selection/select_percentile.py +33 -61
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +33 -61
- snowflake/ml/modeling/feature_selection/variance_threshold.py +33 -61
- snowflake/ml/modeling/framework/base.py +55 -5
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +33 -61
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +33 -61
- snowflake/ml/modeling/impute/iterative_imputer.py +33 -61
- snowflake/ml/modeling/impute/knn_imputer.py +33 -61
- snowflake/ml/modeling/impute/missing_indicator.py +33 -61
- snowflake/ml/modeling/impute/simple_imputer.py +4 -15
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +33 -61
- snowflake/ml/modeling/kernel_approximation/nystroem.py +33 -61
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +33 -61
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +33 -61
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +33 -61
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +33 -61
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +36 -63
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +36 -63
- snowflake/ml/modeling/linear_model/ard_regression.py +33 -61
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +33 -61
- snowflake/ml/modeling/linear_model/elastic_net.py +33 -61
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +33 -61
- snowflake/ml/modeling/linear_model/gamma_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/huber_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/lars.py +33 -61
- snowflake/ml/modeling/linear_model/lars_cv.py +33 -61
- snowflake/ml/modeling/linear_model/lasso.py +33 -61
- snowflake/ml/modeling/linear_model/lasso_cv.py +33 -61
- snowflake/ml/modeling/linear_model/lasso_lars.py +33 -61
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +33 -61
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +33 -61
- snowflake/ml/modeling/linear_model/linear_regression.py +33 -61
- snowflake/ml/modeling/linear_model/logistic_regression.py +33 -61
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +33 -61
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +33 -61
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +33 -61
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +33 -61
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +33 -61
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +33 -61
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +33 -61
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/perceptron.py +33 -61
- snowflake/ml/modeling/linear_model/poisson_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/ransac_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/ridge.py +33 -61
- snowflake/ml/modeling/linear_model/ridge_classifier.py +33 -61
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +33 -61
- snowflake/ml/modeling/linear_model/ridge_cv.py +33 -61
- snowflake/ml/modeling/linear_model/sgd_classifier.py +33 -61
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +33 -61
- snowflake/ml/modeling/linear_model/sgd_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +33 -61
- snowflake/ml/modeling/manifold/isomap.py +33 -61
- snowflake/ml/modeling/manifold/mds.py +33 -61
- snowflake/ml/modeling/manifold/spectral_embedding.py +33 -61
- snowflake/ml/modeling/manifold/tsne.py +33 -61
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +33 -61
- snowflake/ml/modeling/mixture/gaussian_mixture.py +33 -61
- snowflake/ml/modeling/model_selection/grid_search_cv.py +39 -57
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +26 -57
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +33 -61
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +33 -61
- snowflake/ml/modeling/multiclass/output_code_classifier.py +33 -61
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +33 -61
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +33 -61
- snowflake/ml/modeling/naive_bayes/complement_nb.py +33 -61
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +33 -61
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +33 -61
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +33 -61
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +33 -61
- snowflake/ml/modeling/neighbors/kernel_density.py +33 -61
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +33 -61
- snowflake/ml/modeling/neighbors/nearest_centroid.py +33 -61
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +33 -61
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +33 -61
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +33 -61
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +33 -61
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +33 -61
- snowflake/ml/modeling/neural_network/mlp_classifier.py +33 -61
- snowflake/ml/modeling/neural_network/mlp_regressor.py +33 -61
- snowflake/ml/modeling/preprocessing/polynomial_features.py +33 -61
- snowflake/ml/modeling/semi_supervised/label_propagation.py +33 -61
- snowflake/ml/modeling/semi_supervised/label_spreading.py +33 -61
- snowflake/ml/modeling/svm/linear_svc.py +33 -61
- snowflake/ml/modeling/svm/linear_svr.py +33 -61
- snowflake/ml/modeling/svm/nu_svc.py +33 -61
- snowflake/ml/modeling/svm/nu_svr.py +33 -61
- snowflake/ml/modeling/svm/svc.py +33 -61
- snowflake/ml/modeling/svm/svr.py +33 -61
- snowflake/ml/modeling/tree/decision_tree_classifier.py +33 -61
- snowflake/ml/modeling/tree/decision_tree_regressor.py +33 -61
- snowflake/ml/modeling/tree/extra_tree_classifier.py +33 -61
- snowflake/ml/modeling/tree/extra_tree_regressor.py +33 -61
- snowflake/ml/modeling/xgboost/xgb_classifier.py +33 -61
- snowflake/ml/modeling/xgboost/xgb_regressor.py +33 -61
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +33 -61
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +33 -61
- snowflake/ml/registry/_manager/model_manager.py +6 -2
- snowflake/ml/registry/model_registry.py +100 -27
- snowflake/ml/registry/registry.py +6 -2
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/METADATA +43 -7
- {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/RECORD +211 -206
- {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/top_level.txt +0 -0
@@ -301,18 +301,24 @@ class SpectralCoclustering(BaseTransformer):
|
|
301
301
|
self._get_model_signatures(dataset)
|
302
302
|
return self
|
303
303
|
|
304
|
-
def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
|
305
|
-
if self._drop_input_cols:
|
306
|
-
return []
|
307
|
-
else:
|
308
|
-
return list(set(dataset.columns) - set(self.output_cols))
|
309
|
-
|
310
304
|
def _batch_inference_validate_snowpark(
|
311
305
|
self,
|
312
306
|
dataset: DataFrame,
|
313
307
|
inference_method: str,
|
314
308
|
) -> List[str]:
|
315
|
-
"""Util method to run validate that batch inference can be run on a snowpark dataframe
|
309
|
+
"""Util method to run validate that batch inference can be run on a snowpark dataframe and
|
310
|
+
return the available package that exists in the snowflake anaconda channel
|
311
|
+
|
312
|
+
Args:
|
313
|
+
dataset: snowpark dataframe
|
314
|
+
inference_method: the inference method such as predict, score...
|
315
|
+
|
316
|
+
Raises:
|
317
|
+
SnowflakeMLException: If the estimator is not fitted, raise error
|
318
|
+
SnowflakeMLException: If the session is None, raise error
|
319
|
+
|
320
|
+
Returns:
|
321
|
+
A list of available package that exists in the snowflake anaconda channel
|
316
322
|
"""
|
317
323
|
if not self._is_fitted:
|
318
324
|
raise exceptions.SnowflakeMLException(
|
@@ -384,7 +390,7 @@ class SpectralCoclustering(BaseTransformer):
|
|
384
390
|
transform_kwargs = dict(
|
385
391
|
session = dataset._session,
|
386
392
|
dependencies = self._deps,
|
387
|
-
|
393
|
+
drop_input_cols = self._drop_input_cols,
|
388
394
|
expected_output_cols_type = expected_type_inferred,
|
389
395
|
)
|
390
396
|
|
@@ -444,16 +450,16 @@ class SpectralCoclustering(BaseTransformer):
|
|
444
450
|
# from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
|
445
451
|
# based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
|
446
452
|
# each row containing a list of values.
|
447
|
-
expected_dtype = "
|
453
|
+
expected_dtype = "array"
|
448
454
|
|
449
455
|
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
450
456
|
if expected_dtype == "":
|
451
|
-
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "
|
457
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
|
452
458
|
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
453
|
-
expected_dtype = "
|
454
|
-
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "
|
459
|
+
expected_dtype = "array"
|
460
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
|
455
461
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
456
|
-
expected_dtype = "
|
462
|
+
expected_dtype = "array"
|
457
463
|
else:
|
458
464
|
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
459
465
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
@@ -471,7 +477,7 @@ class SpectralCoclustering(BaseTransformer):
|
|
471
477
|
transform_kwargs = dict(
|
472
478
|
session = dataset._session,
|
473
479
|
dependencies = self._deps,
|
474
|
-
|
480
|
+
drop_input_cols = self._drop_input_cols,
|
475
481
|
expected_output_cols_type = expected_dtype,
|
476
482
|
)
|
477
483
|
|
@@ -522,7 +528,7 @@ class SpectralCoclustering(BaseTransformer):
|
|
522
528
|
subproject=_SUBPROJECT,
|
523
529
|
)
|
524
530
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
525
|
-
|
531
|
+
drop_input_cols=self._drop_input_cols,
|
526
532
|
expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
|
527
533
|
)
|
528
534
|
self._sklearn_object = fitted_estimator
|
@@ -540,44 +546,6 @@ class SpectralCoclustering(BaseTransformer):
|
|
540
546
|
assert self._sklearn_object is not None
|
541
547
|
return self._sklearn_object.embedding_
|
542
548
|
|
543
|
-
|
544
|
-
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
545
|
-
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
546
|
-
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
547
|
-
"""
|
548
|
-
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
549
|
-
if output_cols:
|
550
|
-
output_cols = [
|
551
|
-
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
552
|
-
for c in output_cols
|
553
|
-
]
|
554
|
-
elif getattr(self._sklearn_object, "classes_", None) is None:
|
555
|
-
output_cols = [output_cols_prefix]
|
556
|
-
elif self._sklearn_object is not None:
|
557
|
-
classes = self._sklearn_object.classes_
|
558
|
-
if isinstance(classes, numpy.ndarray):
|
559
|
-
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
560
|
-
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
561
|
-
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
562
|
-
output_cols = []
|
563
|
-
for i, cl in enumerate(classes):
|
564
|
-
# For binary classification, there is only one output column for each class
|
565
|
-
# ndarray as the two classes are complementary.
|
566
|
-
if len(cl) == 2:
|
567
|
-
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
568
|
-
else:
|
569
|
-
output_cols.extend([
|
570
|
-
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
571
|
-
])
|
572
|
-
else:
|
573
|
-
output_cols = []
|
574
|
-
|
575
|
-
# Make sure column names are valid snowflake identifiers.
|
576
|
-
assert output_cols is not None # Make MyPy happy
|
577
|
-
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
578
|
-
|
579
|
-
return rv
|
580
|
-
|
581
549
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
582
550
|
@telemetry.send_api_usage_telemetry(
|
583
551
|
project=_PROJECT,
|
@@ -617,7 +585,7 @@ class SpectralCoclustering(BaseTransformer):
|
|
617
585
|
transform_kwargs = dict(
|
618
586
|
session=dataset._session,
|
619
587
|
dependencies=self._deps,
|
620
|
-
|
588
|
+
drop_input_cols = self._drop_input_cols,
|
621
589
|
expected_output_cols_type="float",
|
622
590
|
)
|
623
591
|
|
@@ -682,7 +650,7 @@ class SpectralCoclustering(BaseTransformer):
|
|
682
650
|
transform_kwargs = dict(
|
683
651
|
session=dataset._session,
|
684
652
|
dependencies=self._deps,
|
685
|
-
|
653
|
+
drop_input_cols = self._drop_input_cols,
|
686
654
|
expected_output_cols_type="float",
|
687
655
|
)
|
688
656
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -743,7 +711,7 @@ class SpectralCoclustering(BaseTransformer):
|
|
743
711
|
transform_kwargs = dict(
|
744
712
|
session=dataset._session,
|
745
713
|
dependencies=self._deps,
|
746
|
-
|
714
|
+
drop_input_cols = self._drop_input_cols,
|
747
715
|
expected_output_cols_type="float",
|
748
716
|
)
|
749
717
|
|
@@ -808,7 +776,7 @@ class SpectralCoclustering(BaseTransformer):
|
|
808
776
|
transform_kwargs = dict(
|
809
777
|
session=dataset._session,
|
810
778
|
dependencies=self._deps,
|
811
|
-
|
779
|
+
drop_input_cols = self._drop_input_cols,
|
812
780
|
expected_output_cols_type="float",
|
813
781
|
)
|
814
782
|
|
@@ -862,13 +830,17 @@ class SpectralCoclustering(BaseTransformer):
|
|
862
830
|
transform_kwargs: ScoreKwargsTypedDict = dict()
|
863
831
|
|
864
832
|
if isinstance(dataset, DataFrame):
|
833
|
+
self._deps = self._batch_inference_validate_snowpark(
|
834
|
+
dataset=dataset,
|
835
|
+
inference_method="score",
|
836
|
+
)
|
865
837
|
selected_cols = self._get_active_columns()
|
866
838
|
if len(selected_cols) > 0:
|
867
839
|
dataset = dataset.select(selected_cols)
|
868
840
|
assert isinstance(dataset._session, Session) # keep mypy happy
|
869
841
|
transform_kwargs = dict(
|
870
842
|
session=dataset._session,
|
871
|
-
dependencies=["snowflake-snowpark-python"] + self.
|
843
|
+
dependencies=["snowflake-snowpark-python"] + self._deps,
|
872
844
|
score_sproc_imports=['sklearn'],
|
873
845
|
)
|
874
846
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -942,9 +914,9 @@ class SpectralCoclustering(BaseTransformer):
|
|
942
914
|
transform_kwargs = dict(
|
943
915
|
session = dataset._session,
|
944
916
|
dependencies = self._deps,
|
945
|
-
|
946
|
-
expected_output_cols_type
|
947
|
-
n_neighbors =
|
917
|
+
drop_input_cols = self._drop_input_cols,
|
918
|
+
expected_output_cols_type="array",
|
919
|
+
n_neighbors = n_neighbors,
|
948
920
|
return_distance = return_distance
|
949
921
|
)
|
950
922
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -331,18 +331,24 @@ class ColumnTransformer(BaseTransformer):
|
|
331
331
|
self._get_model_signatures(dataset)
|
332
332
|
return self
|
333
333
|
|
334
|
-
def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
|
335
|
-
if self._drop_input_cols:
|
336
|
-
return []
|
337
|
-
else:
|
338
|
-
return list(set(dataset.columns) - set(self.output_cols))
|
339
|
-
|
340
334
|
def _batch_inference_validate_snowpark(
|
341
335
|
self,
|
342
336
|
dataset: DataFrame,
|
343
337
|
inference_method: str,
|
344
338
|
) -> List[str]:
|
345
|
-
"""Util method to run validate that batch inference can be run on a snowpark dataframe
|
339
|
+
"""Util method to run validate that batch inference can be run on a snowpark dataframe and
|
340
|
+
return the available package that exists in the snowflake anaconda channel
|
341
|
+
|
342
|
+
Args:
|
343
|
+
dataset: snowpark dataframe
|
344
|
+
inference_method: the inference method such as predict, score...
|
345
|
+
|
346
|
+
Raises:
|
347
|
+
SnowflakeMLException: If the estimator is not fitted, raise error
|
348
|
+
SnowflakeMLException: If the session is None, raise error
|
349
|
+
|
350
|
+
Returns:
|
351
|
+
A list of available package that exists in the snowflake anaconda channel
|
346
352
|
"""
|
347
353
|
if not self._is_fitted:
|
348
354
|
raise exceptions.SnowflakeMLException(
|
@@ -414,7 +420,7 @@ class ColumnTransformer(BaseTransformer):
|
|
414
420
|
transform_kwargs = dict(
|
415
421
|
session = dataset._session,
|
416
422
|
dependencies = self._deps,
|
417
|
-
|
423
|
+
drop_input_cols = self._drop_input_cols,
|
418
424
|
expected_output_cols_type = expected_type_inferred,
|
419
425
|
)
|
420
426
|
|
@@ -476,16 +482,16 @@ class ColumnTransformer(BaseTransformer):
|
|
476
482
|
# from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
|
477
483
|
# based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
|
478
484
|
# each row containing a list of values.
|
479
|
-
expected_dtype = "
|
485
|
+
expected_dtype = "array"
|
480
486
|
|
481
487
|
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
482
488
|
if expected_dtype == "":
|
483
|
-
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "
|
489
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
|
484
490
|
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
485
|
-
expected_dtype = "
|
486
|
-
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "
|
491
|
+
expected_dtype = "array"
|
492
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
|
487
493
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
488
|
-
expected_dtype = "
|
494
|
+
expected_dtype = "array"
|
489
495
|
else:
|
490
496
|
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
491
497
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
@@ -503,7 +509,7 @@ class ColumnTransformer(BaseTransformer):
|
|
503
509
|
transform_kwargs = dict(
|
504
510
|
session = dataset._session,
|
505
511
|
dependencies = self._deps,
|
506
|
-
|
512
|
+
drop_input_cols = self._drop_input_cols,
|
507
513
|
expected_output_cols_type = expected_dtype,
|
508
514
|
)
|
509
515
|
|
@@ -554,7 +560,7 @@ class ColumnTransformer(BaseTransformer):
|
|
554
560
|
subproject=_SUBPROJECT,
|
555
561
|
)
|
556
562
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
557
|
-
|
563
|
+
drop_input_cols=self._drop_input_cols,
|
558
564
|
expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
|
559
565
|
)
|
560
566
|
self._sklearn_object = fitted_estimator
|
@@ -572,44 +578,6 @@ class ColumnTransformer(BaseTransformer):
|
|
572
578
|
assert self._sklearn_object is not None
|
573
579
|
return self._sklearn_object.embedding_
|
574
580
|
|
575
|
-
|
576
|
-
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
577
|
-
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
578
|
-
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
579
|
-
"""
|
580
|
-
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
581
|
-
if output_cols:
|
582
|
-
output_cols = [
|
583
|
-
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
584
|
-
for c in output_cols
|
585
|
-
]
|
586
|
-
elif getattr(self._sklearn_object, "classes_", None) is None:
|
587
|
-
output_cols = [output_cols_prefix]
|
588
|
-
elif self._sklearn_object is not None:
|
589
|
-
classes = self._sklearn_object.classes_
|
590
|
-
if isinstance(classes, numpy.ndarray):
|
591
|
-
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
592
|
-
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
593
|
-
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
594
|
-
output_cols = []
|
595
|
-
for i, cl in enumerate(classes):
|
596
|
-
# For binary classification, there is only one output column for each class
|
597
|
-
# ndarray as the two classes are complementary.
|
598
|
-
if len(cl) == 2:
|
599
|
-
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
600
|
-
else:
|
601
|
-
output_cols.extend([
|
602
|
-
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
603
|
-
])
|
604
|
-
else:
|
605
|
-
output_cols = []
|
606
|
-
|
607
|
-
# Make sure column names are valid snowflake identifiers.
|
608
|
-
assert output_cols is not None # Make MyPy happy
|
609
|
-
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
610
|
-
|
611
|
-
return rv
|
612
|
-
|
613
581
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
614
582
|
@telemetry.send_api_usage_telemetry(
|
615
583
|
project=_PROJECT,
|
@@ -649,7 +617,7 @@ class ColumnTransformer(BaseTransformer):
|
|
649
617
|
transform_kwargs = dict(
|
650
618
|
session=dataset._session,
|
651
619
|
dependencies=self._deps,
|
652
|
-
|
620
|
+
drop_input_cols = self._drop_input_cols,
|
653
621
|
expected_output_cols_type="float",
|
654
622
|
)
|
655
623
|
|
@@ -714,7 +682,7 @@ class ColumnTransformer(BaseTransformer):
|
|
714
682
|
transform_kwargs = dict(
|
715
683
|
session=dataset._session,
|
716
684
|
dependencies=self._deps,
|
717
|
-
|
685
|
+
drop_input_cols = self._drop_input_cols,
|
718
686
|
expected_output_cols_type="float",
|
719
687
|
)
|
720
688
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -775,7 +743,7 @@ class ColumnTransformer(BaseTransformer):
|
|
775
743
|
transform_kwargs = dict(
|
776
744
|
session=dataset._session,
|
777
745
|
dependencies=self._deps,
|
778
|
-
|
746
|
+
drop_input_cols = self._drop_input_cols,
|
779
747
|
expected_output_cols_type="float",
|
780
748
|
)
|
781
749
|
|
@@ -840,7 +808,7 @@ class ColumnTransformer(BaseTransformer):
|
|
840
808
|
transform_kwargs = dict(
|
841
809
|
session=dataset._session,
|
842
810
|
dependencies=self._deps,
|
843
|
-
|
811
|
+
drop_input_cols = self._drop_input_cols,
|
844
812
|
expected_output_cols_type="float",
|
845
813
|
)
|
846
814
|
|
@@ -894,13 +862,17 @@ class ColumnTransformer(BaseTransformer):
|
|
894
862
|
transform_kwargs: ScoreKwargsTypedDict = dict()
|
895
863
|
|
896
864
|
if isinstance(dataset, DataFrame):
|
865
|
+
self._deps = self._batch_inference_validate_snowpark(
|
866
|
+
dataset=dataset,
|
867
|
+
inference_method="score",
|
868
|
+
)
|
897
869
|
selected_cols = self._get_active_columns()
|
898
870
|
if len(selected_cols) > 0:
|
899
871
|
dataset = dataset.select(selected_cols)
|
900
872
|
assert isinstance(dataset._session, Session) # keep mypy happy
|
901
873
|
transform_kwargs = dict(
|
902
874
|
session=dataset._session,
|
903
|
-
dependencies=["snowflake-snowpark-python"] + self.
|
875
|
+
dependencies=["snowflake-snowpark-python"] + self._deps,
|
904
876
|
score_sproc_imports=['sklearn'],
|
905
877
|
)
|
906
878
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -974,9 +946,9 @@ class ColumnTransformer(BaseTransformer):
|
|
974
946
|
transform_kwargs = dict(
|
975
947
|
session = dataset._session,
|
976
948
|
dependencies = self._deps,
|
977
|
-
|
978
|
-
expected_output_cols_type
|
979
|
-
n_neighbors =
|
949
|
+
drop_input_cols = self._drop_input_cols,
|
950
|
+
expected_output_cols_type="array",
|
951
|
+
n_neighbors = n_neighbors,
|
980
952
|
return_distance = return_distance
|
981
953
|
)
|
982
954
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -292,18 +292,24 @@ class TransformedTargetRegressor(BaseTransformer):
|
|
292
292
|
self._get_model_signatures(dataset)
|
293
293
|
return self
|
294
294
|
|
295
|
-
def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
|
296
|
-
if self._drop_input_cols:
|
297
|
-
return []
|
298
|
-
else:
|
299
|
-
return list(set(dataset.columns) - set(self.output_cols))
|
300
|
-
|
301
295
|
def _batch_inference_validate_snowpark(
|
302
296
|
self,
|
303
297
|
dataset: DataFrame,
|
304
298
|
inference_method: str,
|
305
299
|
) -> List[str]:
|
306
|
-
"""Util method to run validate that batch inference can be run on a snowpark dataframe
|
300
|
+
"""Util method to run validate that batch inference can be run on a snowpark dataframe and
|
301
|
+
return the available package that exists in the snowflake anaconda channel
|
302
|
+
|
303
|
+
Args:
|
304
|
+
dataset: snowpark dataframe
|
305
|
+
inference_method: the inference method such as predict, score...
|
306
|
+
|
307
|
+
Raises:
|
308
|
+
SnowflakeMLException: If the estimator is not fitted, raise error
|
309
|
+
SnowflakeMLException: If the session is None, raise error
|
310
|
+
|
311
|
+
Returns:
|
312
|
+
A list of available package that exists in the snowflake anaconda channel
|
307
313
|
"""
|
308
314
|
if not self._is_fitted:
|
309
315
|
raise exceptions.SnowflakeMLException(
|
@@ -377,7 +383,7 @@ class TransformedTargetRegressor(BaseTransformer):
|
|
377
383
|
transform_kwargs = dict(
|
378
384
|
session = dataset._session,
|
379
385
|
dependencies = self._deps,
|
380
|
-
|
386
|
+
drop_input_cols = self._drop_input_cols,
|
381
387
|
expected_output_cols_type = expected_type_inferred,
|
382
388
|
)
|
383
389
|
|
@@ -437,16 +443,16 @@ class TransformedTargetRegressor(BaseTransformer):
|
|
437
443
|
# from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
|
438
444
|
# based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
|
439
445
|
# each row containing a list of values.
|
440
|
-
expected_dtype = "
|
446
|
+
expected_dtype = "array"
|
441
447
|
|
442
448
|
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
443
449
|
if expected_dtype == "":
|
444
|
-
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "
|
450
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
|
445
451
|
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
446
|
-
expected_dtype = "
|
447
|
-
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "
|
452
|
+
expected_dtype = "array"
|
453
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
|
448
454
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
449
|
-
expected_dtype = "
|
455
|
+
expected_dtype = "array"
|
450
456
|
else:
|
451
457
|
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
452
458
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
@@ -464,7 +470,7 @@ class TransformedTargetRegressor(BaseTransformer):
|
|
464
470
|
transform_kwargs = dict(
|
465
471
|
session = dataset._session,
|
466
472
|
dependencies = self._deps,
|
467
|
-
|
473
|
+
drop_input_cols = self._drop_input_cols,
|
468
474
|
expected_output_cols_type = expected_dtype,
|
469
475
|
)
|
470
476
|
|
@@ -515,7 +521,7 @@ class TransformedTargetRegressor(BaseTransformer):
|
|
515
521
|
subproject=_SUBPROJECT,
|
516
522
|
)
|
517
523
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
518
|
-
|
524
|
+
drop_input_cols=self._drop_input_cols,
|
519
525
|
expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
|
520
526
|
)
|
521
527
|
self._sklearn_object = fitted_estimator
|
@@ -533,44 +539,6 @@ class TransformedTargetRegressor(BaseTransformer):
|
|
533
539
|
assert self._sklearn_object is not None
|
534
540
|
return self._sklearn_object.embedding_
|
535
541
|
|
536
|
-
|
537
|
-
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
538
|
-
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
539
|
-
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
540
|
-
"""
|
541
|
-
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
542
|
-
if output_cols:
|
543
|
-
output_cols = [
|
544
|
-
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
545
|
-
for c in output_cols
|
546
|
-
]
|
547
|
-
elif getattr(self._sklearn_object, "classes_", None) is None:
|
548
|
-
output_cols = [output_cols_prefix]
|
549
|
-
elif self._sklearn_object is not None:
|
550
|
-
classes = self._sklearn_object.classes_
|
551
|
-
if isinstance(classes, numpy.ndarray):
|
552
|
-
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
553
|
-
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
554
|
-
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
555
|
-
output_cols = []
|
556
|
-
for i, cl in enumerate(classes):
|
557
|
-
# For binary classification, there is only one output column for each class
|
558
|
-
# ndarray as the two classes are complementary.
|
559
|
-
if len(cl) == 2:
|
560
|
-
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
561
|
-
else:
|
562
|
-
output_cols.extend([
|
563
|
-
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
564
|
-
])
|
565
|
-
else:
|
566
|
-
output_cols = []
|
567
|
-
|
568
|
-
# Make sure column names are valid snowflake identifiers.
|
569
|
-
assert output_cols is not None # Make MyPy happy
|
570
|
-
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
571
|
-
|
572
|
-
return rv
|
573
|
-
|
574
542
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
575
543
|
@telemetry.send_api_usage_telemetry(
|
576
544
|
project=_PROJECT,
|
@@ -610,7 +578,7 @@ class TransformedTargetRegressor(BaseTransformer):
|
|
610
578
|
transform_kwargs = dict(
|
611
579
|
session=dataset._session,
|
612
580
|
dependencies=self._deps,
|
613
|
-
|
581
|
+
drop_input_cols = self._drop_input_cols,
|
614
582
|
expected_output_cols_type="float",
|
615
583
|
)
|
616
584
|
|
@@ -675,7 +643,7 @@ class TransformedTargetRegressor(BaseTransformer):
|
|
675
643
|
transform_kwargs = dict(
|
676
644
|
session=dataset._session,
|
677
645
|
dependencies=self._deps,
|
678
|
-
|
646
|
+
drop_input_cols = self._drop_input_cols,
|
679
647
|
expected_output_cols_type="float",
|
680
648
|
)
|
681
649
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -736,7 +704,7 @@ class TransformedTargetRegressor(BaseTransformer):
|
|
736
704
|
transform_kwargs = dict(
|
737
705
|
session=dataset._session,
|
738
706
|
dependencies=self._deps,
|
739
|
-
|
707
|
+
drop_input_cols = self._drop_input_cols,
|
740
708
|
expected_output_cols_type="float",
|
741
709
|
)
|
742
710
|
|
@@ -801,7 +769,7 @@ class TransformedTargetRegressor(BaseTransformer):
|
|
801
769
|
transform_kwargs = dict(
|
802
770
|
session=dataset._session,
|
803
771
|
dependencies=self._deps,
|
804
|
-
|
772
|
+
drop_input_cols = self._drop_input_cols,
|
805
773
|
expected_output_cols_type="float",
|
806
774
|
)
|
807
775
|
|
@@ -857,13 +825,17 @@ class TransformedTargetRegressor(BaseTransformer):
|
|
857
825
|
transform_kwargs: ScoreKwargsTypedDict = dict()
|
858
826
|
|
859
827
|
if isinstance(dataset, DataFrame):
|
828
|
+
self._deps = self._batch_inference_validate_snowpark(
|
829
|
+
dataset=dataset,
|
830
|
+
inference_method="score",
|
831
|
+
)
|
860
832
|
selected_cols = self._get_active_columns()
|
861
833
|
if len(selected_cols) > 0:
|
862
834
|
dataset = dataset.select(selected_cols)
|
863
835
|
assert isinstance(dataset._session, Session) # keep mypy happy
|
864
836
|
transform_kwargs = dict(
|
865
837
|
session=dataset._session,
|
866
|
-
dependencies=["snowflake-snowpark-python"] + self.
|
838
|
+
dependencies=["snowflake-snowpark-python"] + self._deps,
|
867
839
|
score_sproc_imports=['sklearn'],
|
868
840
|
)
|
869
841
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -937,9 +909,9 @@ class TransformedTargetRegressor(BaseTransformer):
|
|
937
909
|
transform_kwargs = dict(
|
938
910
|
session = dataset._session,
|
939
911
|
dependencies = self._deps,
|
940
|
-
|
941
|
-
expected_output_cols_type
|
942
|
-
n_neighbors =
|
912
|
+
drop_input_cols = self._drop_input_cols,
|
913
|
+
expected_output_cols_type="array",
|
914
|
+
n_neighbors = n_neighbors,
|
943
915
|
return_distance = return_distance
|
944
916
|
)
|
945
917
|
elif isinstance(dataset, pd.DataFrame):
|