snowflake-ml-python 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/file_utils.py +3 -3
- snowflake/ml/_internal/human_readable_id/adjectives.txt +128 -0
- snowflake/ml/_internal/human_readable_id/animals.txt +128 -0
- snowflake/ml/_internal/human_readable_id/hrid_generator.py +40 -0
- snowflake/ml/_internal/human_readable_id/hrid_generator_base.py +135 -0
- snowflake/ml/_internal/telemetry.py +11 -2
- snowflake/ml/_internal/utils/formatting.py +1 -1
- snowflake/ml/feature_store/feature_store.py +15 -106
- snowflake/ml/fileset/sfcfs.py +4 -3
- snowflake/ml/fileset/stage_fs.py +18 -0
- snowflake/ml/model/_api.py +9 -9
- snowflake/ml/model/_client/model/model_version_impl.py +20 -15
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +3 -9
- snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +3 -5
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +7 -6
- snowflake/ml/model/_model_composer/model_composer.py +10 -8
- snowflake/ml/model/_model_composer/model_method/function_generator.py +1 -1
- snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +2 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +2 -2
- snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +1 -1
- snowflake/ml/model/_packager/model_handlers/_base.py +2 -2
- snowflake/ml/model/_packager/model_handlers/_utils.py +5 -5
- snowflake/ml/model/_packager/model_handlers/custom.py +7 -7
- snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +2 -2
- snowflake/ml/model/_packager/model_handlers/llm.py +1 -1
- snowflake/ml/model/_packager/model_handlers/mlflow.py +1 -1
- snowflake/ml/model/_packager/model_handlers/pytorch.py +13 -10
- snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +214 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +6 -6
- snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +15 -3
- snowflake/ml/model/_packager/model_handlers/tensorflow.py +8 -8
- snowflake/ml/model/_packager/model_handlers/torchscript.py +7 -7
- snowflake/ml/model/_packager/model_handlers/xgboost.py +8 -8
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_packager.py +8 -6
- snowflake/ml/model/custom_model.py +3 -1
- snowflake/ml/model/type_hints.py +13 -0
- snowflake/ml/modeling/_internal/estimator_utils.py +61 -1
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -43
- snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +4 -4
- snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +21 -17
- snowflake/ml/modeling/_internal/model_specifications.py +3 -1
- snowflake/ml/modeling/_internal/model_trainer.py +2 -2
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +547 -1
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +67 -114
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +9 -9
- snowflake/ml/modeling/_internal/transformer_protocols.py +2 -3
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +33 -61
- snowflake/ml/modeling/cluster/affinity_propagation.py +33 -61
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +33 -61
- snowflake/ml/modeling/cluster/birch.py +33 -61
- snowflake/ml/modeling/cluster/bisecting_k_means.py +33 -61
- snowflake/ml/modeling/cluster/dbscan.py +33 -61
- snowflake/ml/modeling/cluster/feature_agglomeration.py +33 -61
- snowflake/ml/modeling/cluster/k_means.py +33 -61
- snowflake/ml/modeling/cluster/mean_shift.py +33 -61
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +33 -61
- snowflake/ml/modeling/cluster/optics.py +33 -61
- snowflake/ml/modeling/cluster/spectral_biclustering.py +33 -61
- snowflake/ml/modeling/cluster/spectral_clustering.py +33 -61
- snowflake/ml/modeling/cluster/spectral_coclustering.py +33 -61
- snowflake/ml/modeling/compose/column_transformer.py +33 -61
- snowflake/ml/modeling/compose/transformed_target_regressor.py +33 -61
- snowflake/ml/modeling/covariance/elliptic_envelope.py +33 -61
- snowflake/ml/modeling/covariance/empirical_covariance.py +33 -61
- snowflake/ml/modeling/covariance/graphical_lasso.py +33 -61
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +33 -61
- snowflake/ml/modeling/covariance/ledoit_wolf.py +33 -61
- snowflake/ml/modeling/covariance/min_cov_det.py +33 -61
- snowflake/ml/modeling/covariance/oas.py +33 -61
- snowflake/ml/modeling/covariance/shrunk_covariance.py +33 -61
- snowflake/ml/modeling/decomposition/dictionary_learning.py +33 -61
- snowflake/ml/modeling/decomposition/factor_analysis.py +33 -61
- snowflake/ml/modeling/decomposition/fast_ica.py +33 -61
- snowflake/ml/modeling/decomposition/incremental_pca.py +33 -61
- snowflake/ml/modeling/decomposition/kernel_pca.py +33 -61
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +33 -61
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +33 -61
- snowflake/ml/modeling/decomposition/pca.py +33 -61
- snowflake/ml/modeling/decomposition/sparse_pca.py +33 -61
- snowflake/ml/modeling/decomposition/truncated_svd.py +33 -61
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +33 -61
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +33 -61
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/bagging_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/bagging_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/isolation_forest.py +33 -61
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/stacking_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/voting_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/voting_regressor.py +33 -61
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +33 -61
- snowflake/ml/modeling/feature_selection/select_fdr.py +33 -61
- snowflake/ml/modeling/feature_selection/select_fpr.py +33 -61
- snowflake/ml/modeling/feature_selection/select_fwe.py +33 -61
- snowflake/ml/modeling/feature_selection/select_k_best.py +33 -61
- snowflake/ml/modeling/feature_selection/select_percentile.py +33 -61
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +33 -61
- snowflake/ml/modeling/feature_selection/variance_threshold.py +33 -61
- snowflake/ml/modeling/framework/base.py +55 -5
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +33 -61
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +33 -61
- snowflake/ml/modeling/impute/iterative_imputer.py +33 -61
- snowflake/ml/modeling/impute/knn_imputer.py +33 -61
- snowflake/ml/modeling/impute/missing_indicator.py +33 -61
- snowflake/ml/modeling/impute/simple_imputer.py +4 -15
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +33 -61
- snowflake/ml/modeling/kernel_approximation/nystroem.py +33 -61
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +33 -61
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +33 -61
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +33 -61
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +33 -61
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +36 -63
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +36 -63
- snowflake/ml/modeling/linear_model/ard_regression.py +33 -61
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +33 -61
- snowflake/ml/modeling/linear_model/elastic_net.py +33 -61
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +33 -61
- snowflake/ml/modeling/linear_model/gamma_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/huber_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/lars.py +33 -61
- snowflake/ml/modeling/linear_model/lars_cv.py +33 -61
- snowflake/ml/modeling/linear_model/lasso.py +33 -61
- snowflake/ml/modeling/linear_model/lasso_cv.py +33 -61
- snowflake/ml/modeling/linear_model/lasso_lars.py +33 -61
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +33 -61
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +33 -61
- snowflake/ml/modeling/linear_model/linear_regression.py +33 -61
- snowflake/ml/modeling/linear_model/logistic_regression.py +33 -61
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +33 -61
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +33 -61
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +33 -61
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +33 -61
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +33 -61
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +33 -61
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +33 -61
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/perceptron.py +33 -61
- snowflake/ml/modeling/linear_model/poisson_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/ransac_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/ridge.py +33 -61
- snowflake/ml/modeling/linear_model/ridge_classifier.py +33 -61
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +33 -61
- snowflake/ml/modeling/linear_model/ridge_cv.py +33 -61
- snowflake/ml/modeling/linear_model/sgd_classifier.py +33 -61
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +33 -61
- snowflake/ml/modeling/linear_model/sgd_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +33 -61
- snowflake/ml/modeling/manifold/isomap.py +33 -61
- snowflake/ml/modeling/manifold/mds.py +33 -61
- snowflake/ml/modeling/manifold/spectral_embedding.py +33 -61
- snowflake/ml/modeling/manifold/tsne.py +33 -61
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +33 -61
- snowflake/ml/modeling/mixture/gaussian_mixture.py +33 -61
- snowflake/ml/modeling/model_selection/grid_search_cv.py +39 -57
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +26 -57
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +33 -61
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +33 -61
- snowflake/ml/modeling/multiclass/output_code_classifier.py +33 -61
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +33 -61
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +33 -61
- snowflake/ml/modeling/naive_bayes/complement_nb.py +33 -61
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +33 -61
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +33 -61
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +33 -61
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +33 -61
- snowflake/ml/modeling/neighbors/kernel_density.py +33 -61
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +33 -61
- snowflake/ml/modeling/neighbors/nearest_centroid.py +33 -61
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +33 -61
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +33 -61
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +33 -61
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +33 -61
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +33 -61
- snowflake/ml/modeling/neural_network/mlp_classifier.py +33 -61
- snowflake/ml/modeling/neural_network/mlp_regressor.py +33 -61
- snowflake/ml/modeling/preprocessing/polynomial_features.py +33 -61
- snowflake/ml/modeling/semi_supervised/label_propagation.py +33 -61
- snowflake/ml/modeling/semi_supervised/label_spreading.py +33 -61
- snowflake/ml/modeling/svm/linear_svc.py +33 -61
- snowflake/ml/modeling/svm/linear_svr.py +33 -61
- snowflake/ml/modeling/svm/nu_svc.py +33 -61
- snowflake/ml/modeling/svm/nu_svr.py +33 -61
- snowflake/ml/modeling/svm/svc.py +33 -61
- snowflake/ml/modeling/svm/svr.py +33 -61
- snowflake/ml/modeling/tree/decision_tree_classifier.py +33 -61
- snowflake/ml/modeling/tree/decision_tree_regressor.py +33 -61
- snowflake/ml/modeling/tree/extra_tree_classifier.py +33 -61
- snowflake/ml/modeling/tree/extra_tree_regressor.py +33 -61
- snowflake/ml/modeling/xgboost/xgb_classifier.py +33 -61
- snowflake/ml/modeling/xgboost/xgb_regressor.py +33 -61
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +33 -61
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +33 -61
- snowflake/ml/registry/_manager/model_manager.py +6 -2
- snowflake/ml/registry/model_registry.py +100 -27
- snowflake/ml/registry/registry.py +6 -2
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/METADATA +43 -7
- {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/RECORD +211 -206
- {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/top_level.txt +0 -0
@@ -285,18 +285,24 @@ class MissingIndicator(BaseTransformer):
|
|
285
285
|
self._get_model_signatures(dataset)
|
286
286
|
return self
|
287
287
|
|
288
|
-
def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
|
289
|
-
if self._drop_input_cols:
|
290
|
-
return []
|
291
|
-
else:
|
292
|
-
return list(set(dataset.columns) - set(self.output_cols))
|
293
|
-
|
294
288
|
def _batch_inference_validate_snowpark(
|
295
289
|
self,
|
296
290
|
dataset: DataFrame,
|
297
291
|
inference_method: str,
|
298
292
|
) -> List[str]:
|
299
|
-
"""Util method to run validate that batch inference can be run on a snowpark dataframe
|
293
|
+
"""Util method to run validate that batch inference can be run on a snowpark dataframe and
|
294
|
+
return the available package that exists in the snowflake anaconda channel
|
295
|
+
|
296
|
+
Args:
|
297
|
+
dataset: snowpark dataframe
|
298
|
+
inference_method: the inference method such as predict, score...
|
299
|
+
|
300
|
+
Raises:
|
301
|
+
SnowflakeMLException: If the estimator is not fitted, raise error
|
302
|
+
SnowflakeMLException: If the session is None, raise error
|
303
|
+
|
304
|
+
Returns:
|
305
|
+
A list of available package that exists in the snowflake anaconda channel
|
300
306
|
"""
|
301
307
|
if not self._is_fitted:
|
302
308
|
raise exceptions.SnowflakeMLException(
|
@@ -368,7 +374,7 @@ class MissingIndicator(BaseTransformer):
|
|
368
374
|
transform_kwargs = dict(
|
369
375
|
session = dataset._session,
|
370
376
|
dependencies = self._deps,
|
371
|
-
|
377
|
+
drop_input_cols = self._drop_input_cols,
|
372
378
|
expected_output_cols_type = expected_type_inferred,
|
373
379
|
)
|
374
380
|
|
@@ -430,16 +436,16 @@ class MissingIndicator(BaseTransformer):
|
|
430
436
|
# from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
|
431
437
|
# based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
|
432
438
|
# each row containing a list of values.
|
433
|
-
expected_dtype = "
|
439
|
+
expected_dtype = "array"
|
434
440
|
|
435
441
|
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
436
442
|
if expected_dtype == "":
|
437
|
-
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "
|
443
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
|
438
444
|
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
439
|
-
expected_dtype = "
|
440
|
-
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "
|
445
|
+
expected_dtype = "array"
|
446
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
|
441
447
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
442
|
-
expected_dtype = "
|
448
|
+
expected_dtype = "array"
|
443
449
|
else:
|
444
450
|
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
445
451
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
@@ -457,7 +463,7 @@ class MissingIndicator(BaseTransformer):
|
|
457
463
|
transform_kwargs = dict(
|
458
464
|
session = dataset._session,
|
459
465
|
dependencies = self._deps,
|
460
|
-
|
466
|
+
drop_input_cols = self._drop_input_cols,
|
461
467
|
expected_output_cols_type = expected_dtype,
|
462
468
|
)
|
463
469
|
|
@@ -508,7 +514,7 @@ class MissingIndicator(BaseTransformer):
|
|
508
514
|
subproject=_SUBPROJECT,
|
509
515
|
)
|
510
516
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
511
|
-
|
517
|
+
drop_input_cols=self._drop_input_cols,
|
512
518
|
expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
|
513
519
|
)
|
514
520
|
self._sklearn_object = fitted_estimator
|
@@ -526,44 +532,6 @@ class MissingIndicator(BaseTransformer):
|
|
526
532
|
assert self._sklearn_object is not None
|
527
533
|
return self._sklearn_object.embedding_
|
528
534
|
|
529
|
-
|
530
|
-
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
531
|
-
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
532
|
-
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
533
|
-
"""
|
534
|
-
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
535
|
-
if output_cols:
|
536
|
-
output_cols = [
|
537
|
-
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
538
|
-
for c in output_cols
|
539
|
-
]
|
540
|
-
elif getattr(self._sklearn_object, "classes_", None) is None:
|
541
|
-
output_cols = [output_cols_prefix]
|
542
|
-
elif self._sklearn_object is not None:
|
543
|
-
classes = self._sklearn_object.classes_
|
544
|
-
if isinstance(classes, numpy.ndarray):
|
545
|
-
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
546
|
-
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
547
|
-
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
548
|
-
output_cols = []
|
549
|
-
for i, cl in enumerate(classes):
|
550
|
-
# For binary classification, there is only one output column for each class
|
551
|
-
# ndarray as the two classes are complementary.
|
552
|
-
if len(cl) == 2:
|
553
|
-
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
554
|
-
else:
|
555
|
-
output_cols.extend([
|
556
|
-
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
557
|
-
])
|
558
|
-
else:
|
559
|
-
output_cols = []
|
560
|
-
|
561
|
-
# Make sure column names are valid snowflake identifiers.
|
562
|
-
assert output_cols is not None # Make MyPy happy
|
563
|
-
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
564
|
-
|
565
|
-
return rv
|
566
|
-
|
567
535
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
568
536
|
@telemetry.send_api_usage_telemetry(
|
569
537
|
project=_PROJECT,
|
@@ -603,7 +571,7 @@ class MissingIndicator(BaseTransformer):
|
|
603
571
|
transform_kwargs = dict(
|
604
572
|
session=dataset._session,
|
605
573
|
dependencies=self._deps,
|
606
|
-
|
574
|
+
drop_input_cols = self._drop_input_cols,
|
607
575
|
expected_output_cols_type="float",
|
608
576
|
)
|
609
577
|
|
@@ -668,7 +636,7 @@ class MissingIndicator(BaseTransformer):
|
|
668
636
|
transform_kwargs = dict(
|
669
637
|
session=dataset._session,
|
670
638
|
dependencies=self._deps,
|
671
|
-
|
639
|
+
drop_input_cols = self._drop_input_cols,
|
672
640
|
expected_output_cols_type="float",
|
673
641
|
)
|
674
642
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -729,7 +697,7 @@ class MissingIndicator(BaseTransformer):
|
|
729
697
|
transform_kwargs = dict(
|
730
698
|
session=dataset._session,
|
731
699
|
dependencies=self._deps,
|
732
|
-
|
700
|
+
drop_input_cols = self._drop_input_cols,
|
733
701
|
expected_output_cols_type="float",
|
734
702
|
)
|
735
703
|
|
@@ -794,7 +762,7 @@ class MissingIndicator(BaseTransformer):
|
|
794
762
|
transform_kwargs = dict(
|
795
763
|
session=dataset._session,
|
796
764
|
dependencies=self._deps,
|
797
|
-
|
765
|
+
drop_input_cols = self._drop_input_cols,
|
798
766
|
expected_output_cols_type="float",
|
799
767
|
)
|
800
768
|
|
@@ -848,13 +816,17 @@ class MissingIndicator(BaseTransformer):
|
|
848
816
|
transform_kwargs: ScoreKwargsTypedDict = dict()
|
849
817
|
|
850
818
|
if isinstance(dataset, DataFrame):
|
819
|
+
self._deps = self._batch_inference_validate_snowpark(
|
820
|
+
dataset=dataset,
|
821
|
+
inference_method="score",
|
822
|
+
)
|
851
823
|
selected_cols = self._get_active_columns()
|
852
824
|
if len(selected_cols) > 0:
|
853
825
|
dataset = dataset.select(selected_cols)
|
854
826
|
assert isinstance(dataset._session, Session) # keep mypy happy
|
855
827
|
transform_kwargs = dict(
|
856
828
|
session=dataset._session,
|
857
|
-
dependencies=["snowflake-snowpark-python"] + self.
|
829
|
+
dependencies=["snowflake-snowpark-python"] + self._deps,
|
858
830
|
score_sproc_imports=['sklearn'],
|
859
831
|
)
|
860
832
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -928,9 +900,9 @@ class MissingIndicator(BaseTransformer):
|
|
928
900
|
transform_kwargs = dict(
|
929
901
|
session = dataset._session,
|
930
902
|
dependencies = self._deps,
|
931
|
-
|
932
|
-
expected_output_cols_type
|
933
|
-
n_neighbors =
|
903
|
+
drop_input_cols = self._drop_input_cols,
|
904
|
+
expected_output_cols_type="array",
|
905
|
+
n_neighbors = n_neighbors,
|
934
906
|
return_distance = return_distance
|
935
907
|
)
|
936
908
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -74,8 +74,6 @@ _NUMERIC_TYPES = [
|
|
74
74
|
]
|
75
75
|
|
76
76
|
|
77
|
-
# TODO(thoyt): Implement logic for `add_indicator` parameter and `indicator_` attribute.Requires
|
78
|
-
# `snowflake.ml.impute.MissingIndicator` to be implemented.
|
79
77
|
class SimpleImputer(base.BaseTransformer):
|
80
78
|
"""
|
81
79
|
Univariate imputer for completing missing values with simple strategies.
|
@@ -96,7 +94,8 @@ class SimpleImputer(base.BaseTransformer):
|
|
96
94
|
* If "most_frequent", replace missing using the most frequent value along each column.
|
97
95
|
Can be used with strings or numeric data.
|
98
96
|
If there is more than one such value, only the smallest is returned.
|
99
|
-
* If "constant", replace the missing values with `fill_value
|
97
|
+
* If "constant", replace the missing values with `fill_value`, including columns that are entirely
|
98
|
+
null. Can be used with strings or numeric data.
|
100
99
|
|
101
100
|
fill_value: Optional[str]
|
102
101
|
When `strategy == "constant"`, `fill_value` is used to replace all occurrences of `missing_values`.
|
@@ -262,18 +261,8 @@ class SimpleImputer(base.BaseTransformer):
|
|
262
261
|
break
|
263
262
|
|
264
263
|
for input_col in self.input_cols:
|
265
|
-
|
266
|
-
|
267
|
-
# TODO(hayu): [SNOW-752265] Support SimpleImputer keep_empty_features.
|
268
|
-
# Add back when `keep_empty_features` is supported.
|
269
|
-
# not self.keep_empty_features
|
270
|
-
# and dataset.filter(F.col(input_col).is_not_null()).count(statement_params=statement_params) == 0
|
271
|
-
dataset.filter(F.col(input_col).is_not_null()).count(statement_params=statement_params)
|
272
|
-
== 0
|
273
|
-
):
|
274
|
-
self.statistics_[input_col] = np.nan
|
275
|
-
else:
|
276
|
-
self.statistics_[input_col] = self.fill_value
|
264
|
+
self.statistics_[input_col] = self.fill_value
|
265
|
+
|
277
266
|
else:
|
278
267
|
state = STRATEGY_TO_STATE_DICT[self.strategy]
|
279
268
|
assert state is not None
|
@@ -260,18 +260,24 @@ class AdditiveChi2Sampler(BaseTransformer):
|
|
260
260
|
self._get_model_signatures(dataset)
|
261
261
|
return self
|
262
262
|
|
263
|
-
def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
|
264
|
-
if self._drop_input_cols:
|
265
|
-
return []
|
266
|
-
else:
|
267
|
-
return list(set(dataset.columns) - set(self.output_cols))
|
268
|
-
|
269
263
|
def _batch_inference_validate_snowpark(
|
270
264
|
self,
|
271
265
|
dataset: DataFrame,
|
272
266
|
inference_method: str,
|
273
267
|
) -> List[str]:
|
274
|
-
"""Util method to run validate that batch inference can be run on a snowpark dataframe
|
268
|
+
"""Util method to run validate that batch inference can be run on a snowpark dataframe and
|
269
|
+
return the available package that exists in the snowflake anaconda channel
|
270
|
+
|
271
|
+
Args:
|
272
|
+
dataset: snowpark dataframe
|
273
|
+
inference_method: the inference method such as predict, score...
|
274
|
+
|
275
|
+
Raises:
|
276
|
+
SnowflakeMLException: If the estimator is not fitted, raise error
|
277
|
+
SnowflakeMLException: If the session is None, raise error
|
278
|
+
|
279
|
+
Returns:
|
280
|
+
A list of available package that exists in the snowflake anaconda channel
|
275
281
|
"""
|
276
282
|
if not self._is_fitted:
|
277
283
|
raise exceptions.SnowflakeMLException(
|
@@ -343,7 +349,7 @@ class AdditiveChi2Sampler(BaseTransformer):
|
|
343
349
|
transform_kwargs = dict(
|
344
350
|
session = dataset._session,
|
345
351
|
dependencies = self._deps,
|
346
|
-
|
352
|
+
drop_input_cols = self._drop_input_cols,
|
347
353
|
expected_output_cols_type = expected_type_inferred,
|
348
354
|
)
|
349
355
|
|
@@ -405,16 +411,16 @@ class AdditiveChi2Sampler(BaseTransformer):
|
|
405
411
|
# from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
|
406
412
|
# based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
|
407
413
|
# each row containing a list of values.
|
408
|
-
expected_dtype = "
|
414
|
+
expected_dtype = "array"
|
409
415
|
|
410
416
|
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
411
417
|
if expected_dtype == "":
|
412
|
-
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "
|
418
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
|
413
419
|
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
414
|
-
expected_dtype = "
|
415
|
-
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "
|
420
|
+
expected_dtype = "array"
|
421
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
|
416
422
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
417
|
-
expected_dtype = "
|
423
|
+
expected_dtype = "array"
|
418
424
|
else:
|
419
425
|
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
420
426
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
@@ -432,7 +438,7 @@ class AdditiveChi2Sampler(BaseTransformer):
|
|
432
438
|
transform_kwargs = dict(
|
433
439
|
session = dataset._session,
|
434
440
|
dependencies = self._deps,
|
435
|
-
|
441
|
+
drop_input_cols = self._drop_input_cols,
|
436
442
|
expected_output_cols_type = expected_dtype,
|
437
443
|
)
|
438
444
|
|
@@ -483,7 +489,7 @@ class AdditiveChi2Sampler(BaseTransformer):
|
|
483
489
|
subproject=_SUBPROJECT,
|
484
490
|
)
|
485
491
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
486
|
-
|
492
|
+
drop_input_cols=self._drop_input_cols,
|
487
493
|
expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
|
488
494
|
)
|
489
495
|
self._sklearn_object = fitted_estimator
|
@@ -501,44 +507,6 @@ class AdditiveChi2Sampler(BaseTransformer):
|
|
501
507
|
assert self._sklearn_object is not None
|
502
508
|
return self._sklearn_object.embedding_
|
503
509
|
|
504
|
-
|
505
|
-
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
506
|
-
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
507
|
-
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
508
|
-
"""
|
509
|
-
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
510
|
-
if output_cols:
|
511
|
-
output_cols = [
|
512
|
-
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
513
|
-
for c in output_cols
|
514
|
-
]
|
515
|
-
elif getattr(self._sklearn_object, "classes_", None) is None:
|
516
|
-
output_cols = [output_cols_prefix]
|
517
|
-
elif self._sklearn_object is not None:
|
518
|
-
classes = self._sklearn_object.classes_
|
519
|
-
if isinstance(classes, numpy.ndarray):
|
520
|
-
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
521
|
-
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
522
|
-
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
523
|
-
output_cols = []
|
524
|
-
for i, cl in enumerate(classes):
|
525
|
-
# For binary classification, there is only one output column for each class
|
526
|
-
# ndarray as the two classes are complementary.
|
527
|
-
if len(cl) == 2:
|
528
|
-
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
529
|
-
else:
|
530
|
-
output_cols.extend([
|
531
|
-
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
532
|
-
])
|
533
|
-
else:
|
534
|
-
output_cols = []
|
535
|
-
|
536
|
-
# Make sure column names are valid snowflake identifiers.
|
537
|
-
assert output_cols is not None # Make MyPy happy
|
538
|
-
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
539
|
-
|
540
|
-
return rv
|
541
|
-
|
542
510
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
543
511
|
@telemetry.send_api_usage_telemetry(
|
544
512
|
project=_PROJECT,
|
@@ -578,7 +546,7 @@ class AdditiveChi2Sampler(BaseTransformer):
|
|
578
546
|
transform_kwargs = dict(
|
579
547
|
session=dataset._session,
|
580
548
|
dependencies=self._deps,
|
581
|
-
|
549
|
+
drop_input_cols = self._drop_input_cols,
|
582
550
|
expected_output_cols_type="float",
|
583
551
|
)
|
584
552
|
|
@@ -643,7 +611,7 @@ class AdditiveChi2Sampler(BaseTransformer):
|
|
643
611
|
transform_kwargs = dict(
|
644
612
|
session=dataset._session,
|
645
613
|
dependencies=self._deps,
|
646
|
-
|
614
|
+
drop_input_cols = self._drop_input_cols,
|
647
615
|
expected_output_cols_type="float",
|
648
616
|
)
|
649
617
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -704,7 +672,7 @@ class AdditiveChi2Sampler(BaseTransformer):
|
|
704
672
|
transform_kwargs = dict(
|
705
673
|
session=dataset._session,
|
706
674
|
dependencies=self._deps,
|
707
|
-
|
675
|
+
drop_input_cols = self._drop_input_cols,
|
708
676
|
expected_output_cols_type="float",
|
709
677
|
)
|
710
678
|
|
@@ -769,7 +737,7 @@ class AdditiveChi2Sampler(BaseTransformer):
|
|
769
737
|
transform_kwargs = dict(
|
770
738
|
session=dataset._session,
|
771
739
|
dependencies=self._deps,
|
772
|
-
|
740
|
+
drop_input_cols = self._drop_input_cols,
|
773
741
|
expected_output_cols_type="float",
|
774
742
|
)
|
775
743
|
|
@@ -823,13 +791,17 @@ class AdditiveChi2Sampler(BaseTransformer):
|
|
823
791
|
transform_kwargs: ScoreKwargsTypedDict = dict()
|
824
792
|
|
825
793
|
if isinstance(dataset, DataFrame):
|
794
|
+
self._deps = self._batch_inference_validate_snowpark(
|
795
|
+
dataset=dataset,
|
796
|
+
inference_method="score",
|
797
|
+
)
|
826
798
|
selected_cols = self._get_active_columns()
|
827
799
|
if len(selected_cols) > 0:
|
828
800
|
dataset = dataset.select(selected_cols)
|
829
801
|
assert isinstance(dataset._session, Session) # keep mypy happy
|
830
802
|
transform_kwargs = dict(
|
831
803
|
session=dataset._session,
|
832
|
-
dependencies=["snowflake-snowpark-python"] + self.
|
804
|
+
dependencies=["snowflake-snowpark-python"] + self._deps,
|
833
805
|
score_sproc_imports=['sklearn'],
|
834
806
|
)
|
835
807
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -903,9 +875,9 @@ class AdditiveChi2Sampler(BaseTransformer):
|
|
903
875
|
transform_kwargs = dict(
|
904
876
|
session = dataset._session,
|
905
877
|
dependencies = self._deps,
|
906
|
-
|
907
|
-
expected_output_cols_type
|
908
|
-
n_neighbors =
|
878
|
+
drop_input_cols = self._drop_input_cols,
|
879
|
+
expected_output_cols_type="array",
|
880
|
+
n_neighbors = n_neighbors,
|
909
881
|
return_distance = return_distance
|
910
882
|
)
|
911
883
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -308,18 +308,24 @@ class Nystroem(BaseTransformer):
|
|
308
308
|
self._get_model_signatures(dataset)
|
309
309
|
return self
|
310
310
|
|
311
|
-
def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
|
312
|
-
if self._drop_input_cols:
|
313
|
-
return []
|
314
|
-
else:
|
315
|
-
return list(set(dataset.columns) - set(self.output_cols))
|
316
|
-
|
317
311
|
def _batch_inference_validate_snowpark(
|
318
312
|
self,
|
319
313
|
dataset: DataFrame,
|
320
314
|
inference_method: str,
|
321
315
|
) -> List[str]:
|
322
|
-
"""Util method to run validate that batch inference can be run on a snowpark dataframe
|
316
|
+
"""Util method to run validate that batch inference can be run on a snowpark dataframe and
|
317
|
+
return the available package that exists in the snowflake anaconda channel
|
318
|
+
|
319
|
+
Args:
|
320
|
+
dataset: snowpark dataframe
|
321
|
+
inference_method: the inference method such as predict, score...
|
322
|
+
|
323
|
+
Raises:
|
324
|
+
SnowflakeMLException: If the estimator is not fitted, raise error
|
325
|
+
SnowflakeMLException: If the session is None, raise error
|
326
|
+
|
327
|
+
Returns:
|
328
|
+
A list of available package that exists in the snowflake anaconda channel
|
323
329
|
"""
|
324
330
|
if not self._is_fitted:
|
325
331
|
raise exceptions.SnowflakeMLException(
|
@@ -391,7 +397,7 @@ class Nystroem(BaseTransformer):
|
|
391
397
|
transform_kwargs = dict(
|
392
398
|
session = dataset._session,
|
393
399
|
dependencies = self._deps,
|
394
|
-
|
400
|
+
drop_input_cols = self._drop_input_cols,
|
395
401
|
expected_output_cols_type = expected_type_inferred,
|
396
402
|
)
|
397
403
|
|
@@ -453,16 +459,16 @@ class Nystroem(BaseTransformer):
|
|
453
459
|
# from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
|
454
460
|
# based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
|
455
461
|
# each row containing a list of values.
|
456
|
-
expected_dtype = "
|
462
|
+
expected_dtype = "array"
|
457
463
|
|
458
464
|
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
459
465
|
if expected_dtype == "":
|
460
|
-
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "
|
466
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
|
461
467
|
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
462
|
-
expected_dtype = "
|
463
|
-
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "
|
468
|
+
expected_dtype = "array"
|
469
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
|
464
470
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
465
|
-
expected_dtype = "
|
471
|
+
expected_dtype = "array"
|
466
472
|
else:
|
467
473
|
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
468
474
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
@@ -480,7 +486,7 @@ class Nystroem(BaseTransformer):
|
|
480
486
|
transform_kwargs = dict(
|
481
487
|
session = dataset._session,
|
482
488
|
dependencies = self._deps,
|
483
|
-
|
489
|
+
drop_input_cols = self._drop_input_cols,
|
484
490
|
expected_output_cols_type = expected_dtype,
|
485
491
|
)
|
486
492
|
|
@@ -531,7 +537,7 @@ class Nystroem(BaseTransformer):
|
|
531
537
|
subproject=_SUBPROJECT,
|
532
538
|
)
|
533
539
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
534
|
-
|
540
|
+
drop_input_cols=self._drop_input_cols,
|
535
541
|
expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
|
536
542
|
)
|
537
543
|
self._sklearn_object = fitted_estimator
|
@@ -549,44 +555,6 @@ class Nystroem(BaseTransformer):
|
|
549
555
|
assert self._sklearn_object is not None
|
550
556
|
return self._sklearn_object.embedding_
|
551
557
|
|
552
|
-
|
553
|
-
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
554
|
-
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
555
|
-
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
556
|
-
"""
|
557
|
-
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
558
|
-
if output_cols:
|
559
|
-
output_cols = [
|
560
|
-
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
561
|
-
for c in output_cols
|
562
|
-
]
|
563
|
-
elif getattr(self._sklearn_object, "classes_", None) is None:
|
564
|
-
output_cols = [output_cols_prefix]
|
565
|
-
elif self._sklearn_object is not None:
|
566
|
-
classes = self._sklearn_object.classes_
|
567
|
-
if isinstance(classes, numpy.ndarray):
|
568
|
-
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
569
|
-
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
570
|
-
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
571
|
-
output_cols = []
|
572
|
-
for i, cl in enumerate(classes):
|
573
|
-
# For binary classification, there is only one output column for each class
|
574
|
-
# ndarray as the two classes are complementary.
|
575
|
-
if len(cl) == 2:
|
576
|
-
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
577
|
-
else:
|
578
|
-
output_cols.extend([
|
579
|
-
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
580
|
-
])
|
581
|
-
else:
|
582
|
-
output_cols = []
|
583
|
-
|
584
|
-
# Make sure column names are valid snowflake identifiers.
|
585
|
-
assert output_cols is not None # Make MyPy happy
|
586
|
-
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
587
|
-
|
588
|
-
return rv
|
589
|
-
|
590
558
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
591
559
|
@telemetry.send_api_usage_telemetry(
|
592
560
|
project=_PROJECT,
|
@@ -626,7 +594,7 @@ class Nystroem(BaseTransformer):
|
|
626
594
|
transform_kwargs = dict(
|
627
595
|
session=dataset._session,
|
628
596
|
dependencies=self._deps,
|
629
|
-
|
597
|
+
drop_input_cols = self._drop_input_cols,
|
630
598
|
expected_output_cols_type="float",
|
631
599
|
)
|
632
600
|
|
@@ -691,7 +659,7 @@ class Nystroem(BaseTransformer):
|
|
691
659
|
transform_kwargs = dict(
|
692
660
|
session=dataset._session,
|
693
661
|
dependencies=self._deps,
|
694
|
-
|
662
|
+
drop_input_cols = self._drop_input_cols,
|
695
663
|
expected_output_cols_type="float",
|
696
664
|
)
|
697
665
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -752,7 +720,7 @@ class Nystroem(BaseTransformer):
|
|
752
720
|
transform_kwargs = dict(
|
753
721
|
session=dataset._session,
|
754
722
|
dependencies=self._deps,
|
755
|
-
|
723
|
+
drop_input_cols = self._drop_input_cols,
|
756
724
|
expected_output_cols_type="float",
|
757
725
|
)
|
758
726
|
|
@@ -817,7 +785,7 @@ class Nystroem(BaseTransformer):
|
|
817
785
|
transform_kwargs = dict(
|
818
786
|
session=dataset._session,
|
819
787
|
dependencies=self._deps,
|
820
|
-
|
788
|
+
drop_input_cols = self._drop_input_cols,
|
821
789
|
expected_output_cols_type="float",
|
822
790
|
)
|
823
791
|
|
@@ -871,13 +839,17 @@ class Nystroem(BaseTransformer):
|
|
871
839
|
transform_kwargs: ScoreKwargsTypedDict = dict()
|
872
840
|
|
873
841
|
if isinstance(dataset, DataFrame):
|
842
|
+
self._deps = self._batch_inference_validate_snowpark(
|
843
|
+
dataset=dataset,
|
844
|
+
inference_method="score",
|
845
|
+
)
|
874
846
|
selected_cols = self._get_active_columns()
|
875
847
|
if len(selected_cols) > 0:
|
876
848
|
dataset = dataset.select(selected_cols)
|
877
849
|
assert isinstance(dataset._session, Session) # keep mypy happy
|
878
850
|
transform_kwargs = dict(
|
879
851
|
session=dataset._session,
|
880
|
-
dependencies=["snowflake-snowpark-python"] + self.
|
852
|
+
dependencies=["snowflake-snowpark-python"] + self._deps,
|
881
853
|
score_sproc_imports=['sklearn'],
|
882
854
|
)
|
883
855
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -951,9 +923,9 @@ class Nystroem(BaseTransformer):
|
|
951
923
|
transform_kwargs = dict(
|
952
924
|
session = dataset._session,
|
953
925
|
dependencies = self._deps,
|
954
|
-
|
955
|
-
expected_output_cols_type
|
956
|
-
n_neighbors =
|
926
|
+
drop_input_cols = self._drop_input_cols,
|
927
|
+
expected_output_cols_type="array",
|
928
|
+
n_neighbors = n_neighbors,
|
957
929
|
return_distance = return_distance
|
958
930
|
)
|
959
931
|
elif isinstance(dataset, pd.DataFrame):
|